@@ -198,6 +198,11 @@ extern "C" {
198
198
# define OVERALLOCATE_FACTOR 4
199
199
#endif
200
200
201
+ /* bpo-40521: Interned strings are shared by all interpreters. */
202
+ #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
203
+ # define INTERNED_STRINGS
204
+ #endif
205
+
201
206
/* This dictionary holds all interned unicode strings. Note that references
202
207
to strings in this dictionary are *not* counted in the string's ob_refcnt.
203
208
When the interned string reaches a refcnt of 0 the string deallocation
@@ -206,7 +211,9 @@ extern "C" {
206
211
Another way to look at this is that to say that the actual reference
207
212
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
208
213
*/
214
+ #ifdef INTERNED_STRINGS
209
215
static PyObject * interned = NULL ;
216
+ #endif
210
217
211
218
/* The empty Unicode object is shared to improve performance. */
212
219
static PyObject * unicode_empty = NULL ;
@@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
281
288
/* List of static strings. */
282
289
static _Py_Identifier * static_strings = NULL ;
283
290
291
+ /* bpo-40521: Latin1 singletons are shared by all interpreters. */
292
+ #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
293
+ # define LATIN1_SINGLETONS
294
+ #endif
295
+
296
+ #ifdef LATIN1_SINGLETONS
284
297
/* Single character Unicode strings in the Latin-1 range are being
285
298
shared as well. */
286
299
static PyObject * unicode_latin1 [256 ] = {NULL };
300
+ #endif
287
301
288
302
/* Fast detection of the most frequent whitespace characters */
289
303
const unsigned char _Py_ascii_whitespace [] = {
@@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)
662
676
return unicode_empty ;
663
677
}
664
678
679
+ #ifdef LATIN1_SINGLETONS
665
680
if (length == 1 ) {
666
681
const void * data = PyUnicode_DATA (unicode );
667
682
int kind = PyUnicode_KIND (unicode );
@@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)
683
698
}
684
699
}
685
700
}
701
+ #endif
686
702
687
703
assert (_PyUnicode_CheckConsistency (unicode , 1 ));
688
704
return unicode ;
@@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)
1913
1929
case SSTATE_INTERNED_MORTAL :
1914
1930
/* revive dead object temporarily for DelItem */
1915
1931
Py_SET_REFCNT (unicode , 3 );
1932
+ #ifdef INTERNED_STRINGS
1916
1933
if (PyDict_DelItem (interned , unicode ) != 0 ) {
1917
1934
_PyErr_WriteUnraisableMsg ("deletion of interned string failed" ,
1918
1935
NULL );
1919
1936
}
1937
+ #endif
1920
1938
break ;
1921
1939
1922
1940
case SSTATE_INTERNED_IMMORTAL :
@@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)
1944
1962
static int
1945
1963
unicode_is_singleton (PyObject * unicode )
1946
1964
{
1947
- PyASCIIObject * ascii = (PyASCIIObject * )unicode ;
1948
- if (unicode == unicode_empty )
1965
+ if (unicode == unicode_empty ) {
1949
1966
return 1 ;
1967
+ }
1968
+ #ifdef LATIN1_SINGLETONS
1969
+ PyASCIIObject * ascii = (PyASCIIObject * )unicode ;
1950
1970
if (ascii -> state .kind != PyUnicode_WCHAR_KIND && ascii -> length == 1 )
1951
1971
{
1952
1972
Py_UCS4 ch = PyUnicode_READ_CHAR (unicode , 0 );
1953
1973
if (ch < 256 && unicode_latin1 [ch ] == unicode )
1954
1974
return 1 ;
1955
1975
}
1976
+ #endif
1956
1977
return 0 ;
1957
1978
}
1958
1979
#endif
@@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
2094
2115
static PyObject *
2095
2116
get_latin1_char (unsigned char ch )
2096
2117
{
2097
- PyObject * unicode = unicode_latin1 [ch ];
2118
+ PyObject * unicode ;
2119
+
2120
+ #ifdef LATIN1_SINGLETONS
2121
+ unicode = unicode_latin1 [ch ];
2122
+ if (unicode ) {
2123
+ Py_INCREF (unicode );
2124
+ return unicode ;
2125
+ }
2126
+ #endif
2127
+
2128
+ unicode = PyUnicode_New (1 , ch );
2098
2129
if (!unicode ) {
2099
- unicode = PyUnicode_New (1 , ch );
2100
- if (!unicode )
2101
- return NULL ;
2102
- PyUnicode_1BYTE_DATA (unicode )[0 ] = ch ;
2103
- assert (_PyUnicode_CheckConsistency (unicode , 1 ));
2104
- unicode_latin1 [ch ] = unicode ;
2130
+ return NULL ;
2105
2131
}
2132
+
2133
+ PyUnicode_1BYTE_DATA (unicode )[0 ] = ch ;
2134
+ assert (_PyUnicode_CheckConsistency (unicode , 1 ));
2135
+
2136
+ #ifdef LATIN1_SINGLETONS
2106
2137
Py_INCREF (unicode );
2138
+ unicode_latin1 [ch ] = unicode ;
2139
+ #endif
2107
2140
return unicode ;
2108
2141
}
2109
2142
@@ -11270,7 +11303,6 @@ int
11270
11303
_PyUnicode_EqualToASCIIId (PyObject * left , _Py_Identifier * right )
11271
11304
{
11272
11305
PyObject * right_uni ;
11273
- Py_hash_t hash ;
11274
11306
11275
11307
assert (_PyUnicode_CHECK (left ));
11276
11308
assert (right -> string );
@@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
11302
11334
if (PyUnicode_CHECK_INTERNED (left ))
11303
11335
return 0 ;
11304
11336
11337
+ #ifdef INTERNED_STRINGS
11305
11338
assert (_PyUnicode_HASH (right_uni ) != -1 );
11306
- hash = _PyUnicode_HASH (left );
11339
+ Py_hash_t hash = _PyUnicode_HASH (left );
11307
11340
if (hash != -1 && hash != _PyUnicode_HASH (right_uni ))
11308
11341
return 0 ;
11342
+ #endif
11309
11343
11310
11344
return unicode_compare_eq (left , right_uni );
11311
11345
}
@@ -15487,43 +15521,55 @@ void
15487
15521
PyUnicode_InternInPlace (PyObject * * p )
15488
15522
{
15489
15523
PyObject * s = * p ;
15490
- PyObject * t ;
15491
15524
#ifdef Py_DEBUG
15492
15525
assert (s != NULL );
15493
15526
assert (_PyUnicode_CHECK (s ));
15494
15527
#else
15495
- if (s == NULL || !PyUnicode_Check (s ))
15528
+ if (s == NULL || !PyUnicode_Check (s )) {
15496
15529
return ;
15530
+ }
15497
15531
#endif
15532
+
15498
15533
/* If it's a subclass, we don't really know what putting
15499
15534
it in the interned dict might do. */
15500
- if (!PyUnicode_CheckExact (s ))
15535
+ if (!PyUnicode_CheckExact (s )) {
15501
15536
return ;
15502
- if (PyUnicode_CHECK_INTERNED (s ))
15537
+ }
15538
+
15539
+ if (PyUnicode_CHECK_INTERNED (s )) {
15503
15540
return ;
15541
+ }
15542
+
15543
+ #ifdef INTERNED_STRINGS
15504
15544
if (interned == NULL ) {
15505
15545
interned = PyDict_New ();
15506
15546
if (interned == NULL ) {
15507
15547
PyErr_Clear (); /* Don't leave an exception */
15508
15548
return ;
15509
15549
}
15510
15550
}
15551
+
15552
+ PyObject * t ;
15511
15553
Py_ALLOW_RECURSION
15512
15554
t = PyDict_SetDefault (interned , s , s );
15513
15555
Py_END_ALLOW_RECURSION
15556
+
15514
15557
if (t == NULL) {
15515
15558
PyErr_Clear ();
15516
15559
return ;
15517
15560
}
15561
+
15518
15562
if (t != s ) {
15519
15563
Py_INCREF (t );
15520
15564
Py_SETREF (* p , t );
15521
15565
return ;
15522
15566
}
15567
+
15523
15568
/* The two references in interned are not counted by refcnt.
15524
15569
The deallocator will take care of this */
15525
15570
Py_SET_REFCNT (s , Py_REFCNT (s ) - 2 );
15526
15571
_PyUnicode_STATE (s ).interned = SSTATE_INTERNED_MORTAL ;
15572
+ #endif
15527
15573
}
15528
15574
15529
15575
void
@@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
16109
16155
16110
16156
Py_CLEAR (unicode_empty );
16111
16157
16158
+ #ifdef LATIN1_SINGLETONS
16112
16159
for (Py_ssize_t i = 0 ; i < 256 ; i ++ ) {
16113
16160
Py_CLEAR (unicode_latin1 [i ]);
16114
16161
}
16162
+ #endif
16115
16163
_PyUnicode_ClearStaticStrings ();
16116
16164
}
16117
16165
0 commit comments