Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 607b102

Browse filesBrowse files
authored
bpo-40521: Disable Unicode caches in isolated subinterpreters (GH-19933)
When Python is built in the experimental isolated subinterpreters mode, disable Unicode singletons and Unicode interned strings since they are shared by all interpreters. Temporary workaround until these caches are made per-interpreter.
1 parent 299b8c6 commit 607b102
Copy full SHA for 607b102

File tree

Expand file treeCollapse file tree

2 files changed

+79
-15
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+79
-15
lines changed

‎Objects/typeobject.c

Copy file name to clipboardExpand all lines: Objects/typeobject.c
+16Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ static size_t method_cache_misses = 0;
5656
static size_t method_cache_collisions = 0;
5757
#endif
5858

59+
/* bpo-40521: Interned strings are shared by all subinterpreters */
60+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
61+
# define INTERN_NAME_STRINGS
62+
#endif
63+
5964
/* alphabetical order */
6065
_Py_IDENTIFIER(__abstractmethods__);
6166
_Py_IDENTIFIER(__class__);
@@ -3418,6 +3423,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
34183423
if (name == NULL)
34193424
return -1;
34203425
}
3426+
#ifdef INTERN_NAME_STRINGS
34213427
if (!PyUnicode_CHECK_INTERNED(name)) {
34223428
PyUnicode_InternInPlace(&name);
34233429
if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -3427,6 +3433,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
34273433
return -1;
34283434
}
34293435
}
3436+
#endif
34303437
}
34313438
else {
34323439
/* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -7531,10 +7538,17 @@ _PyTypes_InitSlotDefs(void)
75317538
for (slotdef *p = slotdefs; p->name; p++) {
75327539
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
75337540
assert(!p[1].name || p->offset <= p[1].offset);
7541+
#ifdef INTERN_NAME_STRINGS
75347542
p->name_strobj = PyUnicode_InternFromString(p->name);
75357543
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
75367544
return _PyStatus_NO_MEMORY();
75377545
}
7546+
#else
7547+
p->name_strobj = PyUnicode_FromString(p->name);
7548+
if (!p->name_strobj) {
7549+
return _PyStatus_NO_MEMORY();
7550+
}
7551+
#endif
75387552
}
75397553
slotdefs_initialized = 1;
75407554
return _PyStatus_OK();
@@ -7559,7 +7573,9 @@ update_slot(PyTypeObject *type, PyObject *name)
75597573
int offset;
75607574

75617575
assert(PyUnicode_CheckExact(name));
7576+
#ifdef INTERN_NAME_STRINGS
75627577
assert(PyUnicode_CHECK_INTERNED(name));
7578+
#endif
75637579

75647580
assert(slotdefs_initialized);
75657581
pp = ptrs;

‎Objects/unicodeobject.c

Copy file name to clipboardExpand all lines: Objects/unicodeobject.c
+63-15Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,11 @@ extern "C" {
198198
# define OVERALLOCATE_FACTOR 4
199199
#endif
200200

201+
/* bpo-40521: Interned strings are shared by all interpreters. */
202+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
203+
# define INTERNED_STRINGS
204+
#endif
205+
201206
/* This dictionary holds all interned unicode strings. Note that references
202207
to strings in this dictionary are *not* counted in the string's ob_refcnt.
203208
When the interned string reaches a refcnt of 0 the string deallocation
@@ -206,7 +211,9 @@ extern "C" {
206211
Another way to look at this is that to say that the actual reference
207212
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
208213
*/
214+
#ifdef INTERNED_STRINGS
209215
static PyObject *interned = NULL;
216+
#endif
210217

211218
/* The empty Unicode object is shared to improve performance. */
212219
static PyObject *unicode_empty = NULL;
@@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
281288
/* List of static strings. */
282289
static _Py_Identifier *static_strings = NULL;
283290

291+
/* bpo-40521: Latin1 singletons are shared by all interpreters. */
292+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
293+
# define LATIN1_SINGLETONS
294+
#endif
295+
296+
#ifdef LATIN1_SINGLETONS
284297
/* Single character Unicode strings in the Latin-1 range are being
285298
shared as well. */
286299
static PyObject *unicode_latin1[256] = {NULL};
300+
#endif
287301

288302
/* Fast detection of the most frequent whitespace characters */
289303
const unsigned char _Py_ascii_whitespace[] = {
@@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)
662676
return unicode_empty;
663677
}
664678

679+
#ifdef LATIN1_SINGLETONS
665680
if (length == 1) {
666681
const void *data = PyUnicode_DATA(unicode);
667682
int kind = PyUnicode_KIND(unicode);
@@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)
683698
}
684699
}
685700
}
701+
#endif
686702

687703
assert(_PyUnicode_CheckConsistency(unicode, 1));
688704
return unicode;
@@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)
19131929
case SSTATE_INTERNED_MORTAL:
19141930
/* revive dead object temporarily for DelItem */
19151931
Py_SET_REFCNT(unicode, 3);
1932+
#ifdef INTERNED_STRINGS
19161933
if (PyDict_DelItem(interned, unicode) != 0) {
19171934
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19181935
NULL);
19191936
}
1937+
#endif
19201938
break;
19211939

19221940
case SSTATE_INTERNED_IMMORTAL:
@@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)
19441962
static int
19451963
unicode_is_singleton(PyObject *unicode)
19461964
{
1947-
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
1948-
if (unicode == unicode_empty)
1965+
if (unicode == unicode_empty) {
19491966
return 1;
1967+
}
1968+
#ifdef LATIN1_SINGLETONS
1969+
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
19501970
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
19511971
{
19521972
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
19531973
if (ch < 256 && unicode_latin1[ch] == unicode)
19541974
return 1;
19551975
}
1976+
#endif
19561977
return 0;
19571978
}
19581979
#endif
@@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
20942115
static PyObject*
20952116
get_latin1_char(unsigned char ch)
20962117
{
2097-
PyObject *unicode = unicode_latin1[ch];
2118+
PyObject *unicode;
2119+
2120+
#ifdef LATIN1_SINGLETONS
2121+
unicode = unicode_latin1[ch];
2122+
if (unicode) {
2123+
Py_INCREF(unicode);
2124+
return unicode;
2125+
}
2126+
#endif
2127+
2128+
unicode = PyUnicode_New(1, ch);
20982129
if (!unicode) {
2099-
unicode = PyUnicode_New(1, ch);
2100-
if (!unicode)
2101-
return NULL;
2102-
PyUnicode_1BYTE_DATA(unicode)[0] = ch;
2103-
assert(_PyUnicode_CheckConsistency(unicode, 1));
2104-
unicode_latin1[ch] = unicode;
2130+
return NULL;
21052131
}
2132+
2133+
PyUnicode_1BYTE_DATA(unicode)[0] = ch;
2134+
assert(_PyUnicode_CheckConsistency(unicode, 1));
2135+
2136+
#ifdef LATIN1_SINGLETONS
21062137
Py_INCREF(unicode);
2138+
unicode_latin1[ch] = unicode;
2139+
#endif
21072140
return unicode;
21082141
}
21092142

@@ -11270,7 +11303,6 @@ int
1127011303
_PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1127111304
{
1127211305
PyObject *right_uni;
11273-
Py_hash_t hash;
1127411306

1127511307
assert(_PyUnicode_CHECK(left));
1127611308
assert(right->string);
@@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1130211334
if (PyUnicode_CHECK_INTERNED(left))
1130311335
return 0;
1130411336

11337+
#ifdef INTERNED_STRINGS
1130511338
assert(_PyUnicode_HASH(right_uni) != -1);
11306-
hash = _PyUnicode_HASH(left);
11339+
Py_hash_t hash = _PyUnicode_HASH(left);
1130711340
if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
1130811341
return 0;
11342+
#endif
1130911343

1131011344
return unicode_compare_eq(left, right_uni);
1131111345
}
@@ -15487,43 +15521,55 @@ void
1548715521
PyUnicode_InternInPlace(PyObject **p)
1548815522
{
1548915523
PyObject *s = *p;
15490-
PyObject *t;
1549115524
#ifdef Py_DEBUG
1549215525
assert(s != NULL);
1549315526
assert(_PyUnicode_CHECK(s));
1549415527
#else
15495-
if (s == NULL || !PyUnicode_Check(s))
15528+
if (s == NULL || !PyUnicode_Check(s)) {
1549615529
return;
15530+
}
1549715531
#endif
15532+
1549815533
/* If it's a subclass, we don't really know what putting
1549915534
it in the interned dict might do. */
15500-
if (!PyUnicode_CheckExact(s))
15535+
if (!PyUnicode_CheckExact(s)) {
1550115536
return;
15502-
if (PyUnicode_CHECK_INTERNED(s))
15537+
}
15538+
15539+
if (PyUnicode_CHECK_INTERNED(s)) {
1550315540
return;
15541+
}
15542+
15543+
#ifdef INTERNED_STRINGS
1550415544
if (interned == NULL) {
1550515545
interned = PyDict_New();
1550615546
if (interned == NULL) {
1550715547
PyErr_Clear(); /* Don't leave an exception */
1550815548
return;
1550915549
}
1551015550
}
15551+
15552+
PyObject *t;
1551115553
Py_ALLOW_RECURSION
1551215554
t = PyDict_SetDefault(interned, s, s);
1551315555
Py_END_ALLOW_RECURSION
15556+
1551415557
if (t == NULL) {
1551515558
PyErr_Clear();
1551615559
return;
1551715560
}
15561+
1551815562
if (t != s) {
1551915563
Py_INCREF(t);
1552015564
Py_SETREF(*p, t);
1552115565
return;
1552215566
}
15567+
1552315568
/* The two references in interned are not counted by refcnt.
1552415569
The deallocator will take care of this */
1552515570
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1552615571
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15572+
#endif
1552715573
}
1552815574

1552915575
void
@@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
1610916155

1611016156
Py_CLEAR(unicode_empty);
1611116157

16158+
#ifdef LATIN1_SINGLETONS
1611216159
for (Py_ssize_t i = 0; i < 256; i++) {
1611316160
Py_CLEAR(unicode_latin1[i]);
1611416161
}
16162+
#endif
1611516163
_PyUnicode_ClearStaticStrings();
1611616164
}
1611716165

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.