From ed1b0f49570e2f843f3ce2639ea7d6d17f323e83 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 5 Jul 2021 19:00:39 +0100 Subject: [PATCH 01/23] bpo-26280: Port BINARY_SUBSCR to PEP 659 adaptive interpreter --- Include/internal/pycore_code.h | 1 + Include/opcode.h | 22 ++++++---- Lib/opcode.py | 4 ++ Python/ceval.c | 75 ++++++++++++++++++++++++++++++++++ Python/opcode_targets.h | 22 +++++----- Python/specialize.c | 35 ++++++++++++++++ 6 files changed, 139 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index f5e814ddbd799a..f0da7cc5adf98c 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -321,6 +321,7 @@ cache_backoff(_PyAdaptiveEntry *entry) { int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); +int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); #define SPECIALIZATION_STATS 0 #define SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/opcode.h b/Include/opcode.h index 7f8376ff15ba95..c5a486ba47b131 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -136,15 +136,19 @@ extern "C" { #define DICT_MERGE 164 #define DICT_UPDATE 165 #define CALL_METHOD_KW 166 -#define JUMP_ABSOLUTE_QUICK 7 -#define LOAD_ATTR_ADAPTIVE 8 -#define LOAD_ATTR_SPLIT_KEYS 13 -#define LOAD_ATTR_WITH_HINT 14 -#define LOAD_ATTR_SLOT 18 -#define LOAD_ATTR_MODULE 21 -#define LOAD_GLOBAL_ADAPTIVE 36 -#define LOAD_GLOBAL_MODULE 38 -#define LOAD_GLOBAL_BUILTIN 39 +#define BINARY_SUBSCR_ADAPTIVE 7 +#define BINARY_SUBSCR_LIST 8 +#define BINARY_SUBSCR_TUPLE 13 +#define BINARY_SUBSCR_DICT 14 +#define JUMP_ABSOLUTE_QUICK 18 +#define LOAD_ATTR_ADAPTIVE 21 +#define LOAD_ATTR_SPLIT_KEYS 36 +#define LOAD_ATTR_WITH_HINT 38 +#define LOAD_ATTR_SLOT 39 +#define LOAD_ATTR_MODULE 40 +#define LOAD_GLOBAL_ADAPTIVE 41 +#define LOAD_GLOBAL_MODULE 42 +#define LOAD_GLOBAL_BUILTIN 43 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 7e5916a4245256..ef4216ce9462c4 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -220,6 +220,10 @@ def jabs_op(name, op): del def_op, name_op, jrel_op, jabs_op _specialized_instructions = [ + "BINARY_SUBSCR_ADAPTIVE", + "BINARY_SUBSCR_LIST", + "BINARY_SUBSCR_TUPLE", + "BINARY_SUBSCR_DICT", "JUMP_ABSOLUTE_QUICK", "LOAD_ATTR_ADAPTIVE", "LOAD_ATTR_SPLIT_KEYS", diff --git a/Python/ceval.c b/Python/ceval.c index 22184058af2d4b..e55505c394ecc8 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1913,6 +1913,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } case TARGET(BINARY_SUBSCR): { + PREDICTED(BINARY_SUBSCR); PyObject *sub = POP(); PyObject *container = TOP(); PyObject *res = PyObject_GetItem(container, sub); @@ -1924,6 +1925,79 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } + case TARGET(BINARY_SUBSCR_ADAPTIVE): { + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { + PyObject *sub = TOP(); + PyObject *container = SECOND(); + next_instr--; + if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) { + goto error; + } + DISPATCH(); + } + else { + STAT_INC(BINARY_SUBSCR, deferred); + cache->adaptive.counter--; + JUMP_TO_INSTRUCTION(BINARY_SUBSCR); + } + } + + case TARGET(BINARY_SUBSCR_LIST): { + PyObject *sub = TOP(); + PyObject *list = SECOND(); + DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); + DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); + Py_ssize_t index = PyLong_AsSsize_t(sub); + DEOPT_IF(index < 0 || index >= PyList_Size(list), BINARY_SUBSCR); + POP(); + PyObject *res = PyList_GetItem(list, index); + Py_XINCREF(res); + Py_DECREF(list); + Py_DECREF(sub); + SET_TOP(res); + if (res == NULL) + goto error; + DISPATCH(); + } + + case TARGET(BINARY_SUBSCR_TUPLE): { + PyObject *sub = TOP(); + PyObject *tuple = SECOND(); + DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); + DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); + Py_ssize_t index = PyLong_AsSsize_t(sub); + DEOPT_IF(index < 0 || index >= PyTuple_Size(tuple), BINARY_SUBSCR); + POP(); + PyObject *res = PyTuple_GetItem(tuple, index); + Py_XINCREF(res); + Py_DECREF(tuple); + Py_DECREF(sub); + SET_TOP(res); + if (res == NULL) + goto error; + DISPATCH(); + } + + case TARGET(BINARY_SUBSCR_DICT): { + DEOPT_IF(!PyDict_CheckExact(SECOND()), BINARY_SUBSCR); + PyObject *sub = POP(); + PyObject *dict = TOP(); + PyObject *res = PyDict_GetItemWithError(dict, sub); + Py_XINCREF(res); + Py_DECREF(dict); + SET_TOP(res); + if (res == NULL) { + if (!_PyErr_Occurred(tstate)) { + _PyErr_SetKeyError(sub); + } + Py_DECREF(sub); + goto error; + } + Py_DECREF(sub); + DISPATCH(); + } + case TARGET(BINARY_LSHIFT): { PyObject *right = POP(); PyObject *left = TOP(); @@ -4329,6 +4403,7 @@ opname ## _miss: \ MISS_WITH_CACHE(LOAD_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) +MISS_WITH_CACHE(BINARY_SUBSCR) error: /* Double-check exception status. */ diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index ecc95dabf4693e..f1464f30ace280 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -6,21 +6,21 @@ static void *opcode_targets[256] = { &&TARGET_DUP_TOP, &&TARGET_DUP_TOP_TWO, &&TARGET_ROT_FOUR, - &&TARGET_JUMP_ABSOLUTE_QUICK, - &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_LIST, &&TARGET_NOP, &&TARGET_UNARY_POSITIVE, &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, - &&TARGET_LOAD_ATTR_SPLIT_KEYS, - &&TARGET_LOAD_ATTR_WITH_HINT, + &&TARGET_BINARY_SUBSCR_TUPLE, + &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, &&TARGET_INPLACE_MATRIX_MULTIPLY, - &&TARGET_LOAD_ATTR_SLOT, + &&TARGET_JUMP_ABSOLUTE_QUICK, &&TARGET_BINARY_POWER, &&TARGET_BINARY_MULTIPLY, - &&TARGET_LOAD_ATTR_MODULE, + &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_BINARY_MODULO, &&TARGET_BINARY_ADD, &&TARGET_BINARY_SUBTRACT, @@ -35,8 +35,12 @@ static void *opcode_targets[256] = { &&TARGET_MATCH_KEYS, &&TARGET_COPY_DICT_WITHOUT_KEYS, &&TARGET_PUSH_EXC_INFO, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, + &&TARGET_LOAD_ATTR_SPLIT_KEYS, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_LOAD_ATTR_WITH_HINT, + &&TARGET_LOAD_ATTR_SLOT, + &&TARGET_LOAD_ATTR_MODULE, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, &&_unknown_opcode, @@ -44,10 +48,6 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, diff --git a/Python/specialize.c b/Python/specialize.c index 3277c6bc9e4894..33b1e02f50972e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -162,12 +162,14 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) { static uint8_t adaptive_opcodes[256] = { [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, + [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, }; /* The number of cache entries required for a "family" of instructions. */ static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyLoadAttrCache */ [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ + [BINARY_SUBSCR] = 1, /* _PyAdaptiveEntry */ }; /* Return the oparg for the cache_offset and instruction index. @@ -637,3 +639,36 @@ _Py_Specialize_LoadGlobal( cache0->counter = saturating_start(); return 0; } + +int +_Py_Specialize_BinarySubscr( + PyObject *container, PyObject *sub, + _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +{ + _PyAdaptiveEntry *cache0 = &cache->adaptive; + if (PyList_CheckExact(container) && PyLong_CheckExact(sub)) { + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST, _Py_OPARG(*instr)); + goto success; + } + if (PyTuple_CheckExact(container) && PyLong_CheckExact(sub)) { + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE, _Py_OPARG(*instr)); + goto success; + } + if (PyDict_CheckExact(container)) { + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, _Py_OPARG(*instr)); + goto success; + } + + goto fail; +fail: + STAT_INC(BINARY_SUBSCR, specialization_failure); + assert(!PyErr_Occurred()); + cache_backoff(cache0); + return 0; +success: + STAT_INC(BINARY_SUBSCR, specialization_success); + assert(!PyErr_Occurred()); + cache0->counter = saturating_start(); + return 0; +} + From 99219b0a9ad39d3f6d67a34725169f830c499b89 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 6 Jul 2021 14:47:24 +0100 Subject: [PATCH 02/23] POP() -->STACK_SHRINK(1) --- Python/ceval.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index e55505c394ecc8..476916d332703d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1950,7 +1950,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); Py_ssize_t index = PyLong_AsSsize_t(sub); DEOPT_IF(index < 0 || index >= PyList_Size(list), BINARY_SUBSCR); - POP(); + STACK_SHRINK(1); PyObject *res = PyList_GetItem(list, index); Py_XINCREF(res); Py_DECREF(list); @@ -1968,7 +1968,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); Py_ssize_t index = PyLong_AsSsize_t(sub); DEOPT_IF(index < 0 || index >= PyTuple_Size(tuple), BINARY_SUBSCR); - POP(); + STACK_SHRINK(1); PyObject *res = PyTuple_GetItem(tuple, index); Py_XINCREF(res); Py_DECREF(tuple); From f051c194df982004d3b41f37903b5fb5dd38ac45 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 7 Jul 2021 14:07:09 +0100 Subject: [PATCH 03/23] revert change to MAGIC_NUMBER --- Lib/importlib/_bootstrap_external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index d624b73f38eb66..9e7e77bf5f9ad4 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -372,7 +372,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3460).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3459).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' From 6401da8f59b37d94d2ac00ff30c465741aed219b Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 7 Jul 2021 14:20:24 +0100 Subject: [PATCH 04/23] add STAT_INCs --- Python/ceval.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 476916d332703d..13aeec74a8d1ae 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1914,6 +1914,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(BINARY_SUBSCR): { PREDICTED(BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, unquickened); PyObject *sub = POP(); PyObject *container = TOP(); PyObject *res = PyObject_GetItem(container, sub); @@ -1950,6 +1951,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); Py_ssize_t index = PyLong_AsSsize_t(sub); DEOPT_IF(index < 0 || index >= PyList_Size(list), BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); STACK_SHRINK(1); PyObject *res = PyList_GetItem(list, index); Py_XINCREF(res); @@ -1968,6 +1970,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); Py_ssize_t index = PyLong_AsSsize_t(sub); DEOPT_IF(index < 0 || index >= PyTuple_Size(tuple), BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); STACK_SHRINK(1); PyObject *res = PyTuple_GetItem(tuple, index); Py_XINCREF(res); @@ -1981,6 +1984,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) case TARGET(BINARY_SUBSCR_DICT): { DEOPT_IF(!PyDict_CheckExact(SECOND()), BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); PyObject *sub = POP(); PyObject *dict = TOP(); PyObject *res = PyDict_GetItemWithError(dict, sub); From 95f85d20291cace0e47d8cf75ae2dc632911627a Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 7 Jul 2021 14:23:42 +0100 Subject: [PATCH 05/23] use unchecked GET_ITEM --- Python/ceval.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 13aeec74a8d1ae..e2ce53c24c8d3b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1953,7 +1953,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(index < 0 || index >= PyList_Size(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); STACK_SHRINK(1); - PyObject *res = PyList_GetItem(list, index); + PyObject *res = PyList_GET_ITEM(list, index); Py_XINCREF(res); Py_DECREF(list); Py_DECREF(sub); @@ -1972,7 +1972,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(index < 0 || index >= PyTuple_Size(tuple), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); STACK_SHRINK(1); - PyObject *res = PyTuple_GetItem(tuple, index); + PyObject *res = PyTuple_GET_ITEM(tuple, index); Py_XINCREF(res); Py_DECREF(tuple); Py_DECREF(sub); From f6337bef245bcd96da8207e1b36e372c8608b863 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 7 Jul 2021 15:06:57 +0100 Subject: [PATCH 06/23] add SPECIALIZATION_FAIL stats --- Python/specialize.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 33b1e02f50972e..082275938dde74 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -646,19 +646,28 @@ _Py_Specialize_BinarySubscr( _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { _PyAdaptiveEntry *cache0 = &cache->adaptive; - if (PyList_CheckExact(container) && PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST, _Py_OPARG(*instr)); - goto success; + if (PyList_CheckExact(container)) { + if (PyLong_CheckExact(sub)) { + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST, _Py_OPARG(*instr)); + goto success; + } else { + SPECIALIZATION_FAIL(BINARY_SUBSCR, PyType(container), sub, "list; non-integer subscr"); + } } - if (PyTuple_CheckExact(container) && PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE, _Py_OPARG(*instr)); - goto success; + if (PyTuple_CheckExact(container)) { + if (PyLong_CheckExact(sub)) { + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE, _Py_OPARG(*instr)); + goto success; + } else { + SPECIALIZATION_FAIL(BINARY_SUBSCR, PyType(container), sub, "tuple; non-integer subscr"); + } } if (PyDict_CheckExact(container)) { *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, _Py_OPARG(*instr)); goto success; } + SPECIALIZATION_FAIL(BINARY_SUBSCR, PyType(container), sub, "not list|tuple|dict"); goto fail; fail: STAT_INC(BINARY_SUBSCR, specialization_failure); From dc45cac198f4625c5b7096dacc66a98107766388 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 7 Jul 2021 15:51:59 +0100 Subject: [PATCH 07/23] print BINARY_SUBSCR stats --- Python/specialize.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/specialize.c b/Python/specialize.c index 082275938dde74..542d6ee535fa77 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -78,6 +78,7 @@ _Py_PrintSpecializationStats(void) printf("Specialization stats:\n"); print_stats(&_specialization_stats[LOAD_ATTR], "load_attr"); print_stats(&_specialization_stats[LOAD_GLOBAL], "load_global"); + print_stats(&_specialization_stats[BINARY_SUBSCR], "binary_subscr"); } #if SPECIALIZATION_STATS_DETAILED From 09f8c509aeffeb487780a337182e25aed405ac4a Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 7 Jul 2021 16:05:21 +0100 Subject: [PATCH 08/23] fix SPECIALIZATION_FAIL --- Python/specialize.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 542d6ee535fa77..b86f955b095d0e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -652,7 +652,7 @@ _Py_Specialize_BinarySubscr( *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST, _Py_OPARG(*instr)); goto success; } else { - SPECIALIZATION_FAIL(BINARY_SUBSCR, PyType(container), sub, "list; non-integer subscr"); + SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), sub, "list; non-integer subscr"); } } if (PyTuple_CheckExact(container)) { @@ -660,7 +660,7 @@ _Py_Specialize_BinarySubscr( *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE, _Py_OPARG(*instr)); goto success; } else { - SPECIALIZATION_FAIL(BINARY_SUBSCR, PyType(container), sub, "tuple; non-integer subscr"); + SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), sub, "tuple; non-integer subscr"); } } if (PyDict_CheckExact(container)) { @@ -668,7 +668,7 @@ _Py_Specialize_BinarySubscr( goto success; } - SPECIALIZATION_FAIL(BINARY_SUBSCR, PyType(container), sub, "not list|tuple|dict"); + SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), sub, "not list|tuple|dict"); goto fail; fail: STAT_INC(BINARY_SUBSCR, specialization_failure); From 1efce2e4f7f51be4655656a3bf61da7aaed5923d Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 8 Jul 2021 12:24:12 +0100 Subject: [PATCH 09/23] optimise bytemode implementations --- Python/ceval.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index e2ce53c24c8d3b..a01dbf04580c41 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1952,14 +1952,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) Py_ssize_t index = PyLong_AsSsize_t(sub); DEOPT_IF(index < 0 || index >= PyList_Size(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - STACK_SHRINK(1); PyObject *res = PyList_GET_ITEM(list, index); - Py_XINCREF(res); - Py_DECREF(list); + assert(res != NULL); + Py_INCREF(res); + STACK_SHRINK(1); Py_DECREF(sub); SET_TOP(res); - if (res == NULL) - goto error; + Py_DECREF(list); DISPATCH(); } @@ -1971,26 +1970,30 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) Py_ssize_t index = PyLong_AsSsize_t(sub); DEOPT_IF(index < 0 || index >= PyTuple_Size(tuple), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - STACK_SHRINK(1); PyObject *res = PyTuple_GET_ITEM(tuple, index); - Py_XINCREF(res); - Py_DECREF(tuple); + assert(res != NULL); + Py_INCREF(res); + STACK_SHRINK(1); Py_DECREF(sub); SET_TOP(res); - if (res == NULL) - goto error; + Py_DECREF(tuple); DISPATCH(); } case TARGET(BINARY_SUBSCR_DICT): { + PyObject *dict = SECOND(); DEOPT_IF(!PyDict_CheckExact(SECOND()), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); - PyObject *sub = POP(); - PyObject *dict = TOP(); + PyObject *sub = TOP(); PyObject *res = PyDict_GetItemWithError(dict, sub); - Py_XINCREF(res); - Py_DECREF(dict); + if (res == NULL) { + goto binary_subscr_error; + } + Py_INCREF(res); + STACK_SHRINK(1); + Py_DECREF(sub); SET_TOP(res); + Py_DECREF(dict); if (res == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_SetKeyError(sub); @@ -1998,7 +2001,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) Py_DECREF(sub); goto error; } - Py_DECREF(sub); DISPATCH(); } @@ -4409,6 +4411,15 @@ MISS_WITH_CACHE(LOAD_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(BINARY_SUBSCR) +binary_subscr_error: + { + PyObject *sub = POP(); + if (!_PyErr_Occurred(tstate)) { + _PyErr_SetKeyError(sub); + } + Py_DECREF(sub); + } + error: /* Double-check exception status. */ #ifdef NDEBUG From d32bfc9cbc198d86e9622fd9cad50806d1e7857a Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 13 Jul 2021 11:00:54 +0100 Subject: [PATCH 10/23] more precise opcode and label names. Added (currently redundant) goto error at end of error handler --- Include/opcode.h | 4 ++-- Lib/opcode.py | 4 ++-- Python/ceval.c | 9 +++++---- Python/opcode_targets.h | 4 ++-- Python/specialize.c | 4 ++-- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index c5a486ba47b131..7bebb871edb444 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -137,8 +137,8 @@ extern "C" { #define DICT_UPDATE 165 #define CALL_METHOD_KW 166 #define BINARY_SUBSCR_ADAPTIVE 7 -#define BINARY_SUBSCR_LIST 8 -#define BINARY_SUBSCR_TUPLE 13 +#define BINARY_SUBSCR_LIST_INT 8 +#define BINARY_SUBSCR_TUPLE_INT 13 #define BINARY_SUBSCR_DICT 14 #define JUMP_ABSOLUTE_QUICK 18 #define LOAD_ATTR_ADAPTIVE 21 diff --git a/Lib/opcode.py b/Lib/opcode.py index ef4216ce9462c4..7ba15199b7f7c8 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -221,8 +221,8 @@ def jabs_op(name, op): _specialized_instructions = [ "BINARY_SUBSCR_ADAPTIVE", - "BINARY_SUBSCR_LIST", - "BINARY_SUBSCR_TUPLE", + "BINARY_SUBSCR_LIST_INT", + "BINARY_SUBSCR_TUPLE_INT", "BINARY_SUBSCR_DICT", "JUMP_ABSOLUTE_QUICK", "LOAD_ATTR_ADAPTIVE", diff --git a/Python/ceval.c b/Python/ceval.c index a01dbf04580c41..20594f80ac66d9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1944,7 +1944,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } } - case TARGET(BINARY_SUBSCR_LIST): { + case TARGET(BINARY_SUBSCR_LIST_INT): { PyObject *sub = TOP(); PyObject *list = SECOND(); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); @@ -1962,7 +1962,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } - case TARGET(BINARY_SUBSCR_TUPLE): { + case TARGET(BINARY_SUBSCR_TUPLE_INT): { PyObject *sub = TOP(); PyObject *tuple = SECOND(); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); @@ -1987,7 +1987,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyObject *sub = TOP(); PyObject *res = PyDict_GetItemWithError(dict, sub); if (res == NULL) { - goto binary_subscr_error; + goto binary_subscr_dict_error; } Py_INCREF(res); STACK_SHRINK(1); @@ -4411,13 +4411,14 @@ MISS_WITH_CACHE(LOAD_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(BINARY_SUBSCR) -binary_subscr_error: +binary_subscr_dict_error: { PyObject *sub = POP(); if (!_PyErr_Occurred(tstate)) { _PyErr_SetKeyError(sub); } Py_DECREF(sub); + goto error; } error: diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index f1464f30ace280..d88c766c07ab46 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -7,12 +7,12 @@ static void *opcode_targets[256] = { &&TARGET_DUP_TOP_TWO, &&TARGET_ROT_FOUR, &&TARGET_BINARY_SUBSCR_ADAPTIVE, - &&TARGET_BINARY_SUBSCR_LIST, + &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_NOP, &&TARGET_UNARY_POSITIVE, &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, - &&TARGET_BINARY_SUBSCR_TUPLE, + &&TARGET_BINARY_SUBSCR_TUPLE_INT, &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, diff --git a/Python/specialize.c b/Python/specialize.c index b86f955b095d0e..05de8e22c42287 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -649,7 +649,7 @@ _Py_Specialize_BinarySubscr( _PyAdaptiveEntry *cache0 = &cache->adaptive; if (PyList_CheckExact(container)) { if (PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, _Py_OPARG(*instr)); goto success; } else { SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), sub, "list; non-integer subscr"); @@ -657,7 +657,7 @@ _Py_Specialize_BinarySubscr( } if (PyTuple_CheckExact(container)) { if (PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, _Py_OPARG(*instr)); goto success; } else { SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), sub, "tuple; non-integer subscr"); From ccaea796bd6fa58612235e9525766aef8c424990 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 13 Jul 2021 11:43:20 +0100 Subject: [PATCH 11/23] remove redundant null check --- Python/ceval.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 20594f80ac66d9..9cba6f3e80e41d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1994,13 +1994,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) Py_DECREF(sub); SET_TOP(res); Py_DECREF(dict); - if (res == NULL) { - if (!_PyErr_Occurred(tstate)) { - _PyErr_SetKeyError(sub); - } - Py_DECREF(sub); - goto error; - } DISPATCH(); } From b40bfb721d0804bbbd57b84f196d80a0d60de248 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 13 Jul 2021 11:58:57 +0100 Subject: [PATCH 12/23] check container type more efficiently --- Python/specialize.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 05de8e22c42287..8479a539ed0d7d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -647,7 +647,9 @@ _Py_Specialize_BinarySubscr( _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { _PyAdaptiveEntry *cache0 = &cache->adaptive; - if (PyList_CheckExact(container)) { + + PyTypeObject *container_type = Py_TYPE(container); + if (container_type == &PyList_Type) { if (PyLong_CheckExact(sub)) { *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, _Py_OPARG(*instr)); goto success; @@ -655,7 +657,7 @@ _Py_Specialize_BinarySubscr( SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), sub, "list; non-integer subscr"); } } - if (PyTuple_CheckExact(container)) { + if (container_type == &PyTuple_Type) { if (PyLong_CheckExact(sub)) { *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, _Py_OPARG(*instr)); goto success; @@ -663,7 +665,7 @@ _Py_Specialize_BinarySubscr( SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), sub, "tuple; non-integer subscr"); } } - if (PyDict_CheckExact(container)) { + if (container_type == &PyDict_Type) { *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, _Py_OPARG(*instr)); goto success; } From 61fa932f7ec7fc6b5df75ec10e91ba4ebecdff7c Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 13 Jul 2021 19:16:57 +0100 Subject: [PATCH 13/23] use oparg as the adaptive cache counter for BINARY_SUBSCR --- Include/internal/pycore_code.h | 20 ++++++++-------- Python/ceval.c | 42 ++++++++++++++++++++++------------ Python/specialize.c | 25 ++++++++++---------- 3 files changed, 50 insertions(+), 37 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index f0da7cc5adf98c..a73383edec62b2 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -296,32 +296,32 @@ saturating_start(void) } static inline void -record_cache_hit(_PyAdaptiveEntry *entry) { - entry->counter = saturating_increment(entry->counter); +record_cache_hit(uint8_t *counter) { + *counter = saturating_increment(*counter); } static inline void -record_cache_miss(_PyAdaptiveEntry *entry) { - entry->counter = saturating_decrement(entry->counter); +record_cache_miss(uint8_t *counter) { + *counter = saturating_decrement(*counter); } static inline int -too_many_cache_misses(_PyAdaptiveEntry *entry) { - return entry->counter == saturating_zero(); +too_many_cache_misses(uint8_t counter) { + return counter == saturating_zero(); } #define BACKOFF 64 -static inline void -cache_backoff(_PyAdaptiveEntry *entry) { - entry->counter = BACKOFF; +static inline uint8_t +cache_backoff() { + return BACKOFF; } /* Specialization functions */ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); -int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); #define SPECIALIZATION_STATS 0 #define SPECIALIZATION_STATS_DETAILED 0 diff --git a/Python/ceval.c b/Python/ceval.c index 9cba6f3e80e41d..a490845e849e6d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1927,19 +1927,19 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) } case TARGET(BINARY_SUBSCR_ADAPTIVE): { - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + if (oparg == 0) { PyObject *sub = TOP(); PyObject *container = SECOND(); next_instr--; - if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) { + if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) { goto error; } DISPATCH(); } else { STAT_INC(BINARY_SUBSCR, deferred); - cache->adaptive.counter--; + // oparg is the adaptive cache counter + *(next_instr-1) = _Py_MAKECODEUNIT(opcode, oparg-1); JUMP_TO_INSTRUCTION(BINARY_SUBSCR); } } @@ -2955,7 +2955,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; PyObject *res = ep->me_value; DEOPT_IF(res == NULL, LOAD_GLOBAL); - record_cache_hit(cache0); + record_cache_hit(&cache0->counter); STAT_INC(LOAD_GLOBAL, hit); Py_INCREF(res); PUSH(res); @@ -2976,7 +2976,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache0->index; PyObject *res = ep->me_value; DEOPT_IF(res == NULL, LOAD_GLOBAL); - record_cache_hit(cache0); + record_cache_hit(&cache0->counter); STAT_INC(LOAD_GLOBAL, hit); Py_INCREF(res); PUSH(res); @@ -3399,7 +3399,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) res = dict->ma_values[cache0->index]; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(cache0); + record_cache_hit(&cache0->counter); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3423,7 +3423,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(cache0); + record_cache_hit(&cache0->counter); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3452,7 +3452,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(cache0); + record_cache_hit(&cache0->counter); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3473,7 +3473,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) res = *(PyObject **)addr; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(cache0); + record_cache_hit(&cache0->counter); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -4390,19 +4390,33 @@ opname ## _miss: \ { \ STAT_INC(opname, miss); \ _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \ - record_cache_miss(cache); \ - if (too_many_cache_misses(cache)) { \ + record_cache_miss(&cache->counter); \ + if (too_many_cache_misses(cache->counter)) { \ next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, _Py_OPARG(next_instr[-1])); \ STAT_INC(opname, deopt); \ - cache_backoff(cache); \ + cache->counter = cache_backoff(); \ } \ oparg = cache->original_oparg; \ JUMP_TO_INSTRUCTION(opname); \ } +#define MISS_WITH_OPARG_COUNTER(opname) \ +opname ## _miss: \ + { \ + STAT_INC(opname, miss); \ + uint8_t oparg = _Py_OPARG(next_instr[-1]); \ + record_cache_miss(&oparg); \ + if (too_many_cache_misses(oparg)) { \ + oparg = cache_backoff(); \ + next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, oparg); \ + STAT_INC(opname, deopt); \ + } \ + JUMP_TO_INSTRUCTION(opname); \ + } + MISS_WITH_CACHE(LOAD_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) -MISS_WITH_CACHE(BINARY_SUBSCR) +MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR) binary_subscr_dict_error: { diff --git a/Python/specialize.c b/Python/specialize.c index 8479a539ed0d7d..d49dd96dada0d0 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -170,7 +170,7 @@ static uint8_t adaptive_opcodes[256] = { static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyLoadAttrCache */ [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ - [BINARY_SUBSCR] = 1, /* _PyAdaptiveEntry */ + [BINARY_SUBSCR] = 0, }; /* Return the oparg for the cache_offset and instruction index. @@ -254,7 +254,6 @@ optimize(SpecializedCacheOrInstruction *quickened, int len) previous_opcode = opcode; continue; } - instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, new_oparg); previous_opcode = adaptive_opcode; int entries_needed = cache_requirements[opcode]; if (entries_needed) { @@ -264,7 +263,11 @@ optimize(SpecializedCacheOrInstruction *quickened, int len) _GetSpecializedCacheEntry(instructions, cache0_offset); cache->adaptive.original_oparg = oparg; cache->adaptive.counter = 0; + } else { + // oparg is the adaptive cache counter + new_oparg = 0; } + instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, new_oparg); } else { /* Super instructions don't use the cache, @@ -565,7 +568,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp fail: STAT_INC(LOAD_ATTR, specialization_failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache0->counter = cache_backoff(); return 0; success: STAT_INC(LOAD_ATTR, specialization_success); @@ -632,7 +635,7 @@ _Py_Specialize_LoadGlobal( fail: STAT_INC(LOAD_GLOBAL, specialization_failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache0->counter = cache_backoff(); return 0; success: STAT_INC(LOAD_GLOBAL, specialization_success); @@ -643,15 +646,12 @@ _Py_Specialize_LoadGlobal( int _Py_Specialize_BinarySubscr( - PyObject *container, PyObject *sub, - _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) + PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; - PyTypeObject *container_type = Py_TYPE(container); if (container_type == &PyList_Type) { if (PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, saturating_start()); goto success; } else { SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), sub, "list; non-integer subscr"); @@ -659,14 +659,14 @@ _Py_Specialize_BinarySubscr( } if (container_type == &PyTuple_Type) { if (PyLong_CheckExact(sub)) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, saturating_start()); goto success; } else { SPECIALIZATION_FAIL(BINARY_SUBSCR, Py_TYPE(container), sub, "tuple; non-integer subscr"); } } if (container_type == &PyDict_Type) { - *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, saturating_start()); goto success; } @@ -675,12 +675,11 @@ _Py_Specialize_BinarySubscr( fail: STAT_INC(BINARY_SUBSCR, specialization_failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), cache_backoff()); return 0; success: STAT_INC(BINARY_SUBSCR, specialization_success); assert(!PyErr_Occurred()); - cache0->counter = saturating_start(); return 0; } From 289f7be201dcf754cacac8f858de5a7378fc58e0 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 14 Jul 2021 10:45:30 +0100 Subject: [PATCH 14/23] avoid PyLong_AsSsize_t for index bounds check --- Python/ceval.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index a490845e849e6d..9feb4c97b6ba89 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1949,8 +1949,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyObject *list = SECOND(); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); - Py_ssize_t index = PyLong_AsSsize_t(sub); - DEOPT_IF(index < 0 || index >= PyList_Size(list), BINARY_SUBSCR); + + // Deopt unless 0 <= sub < PyList_Size(list) + DEOPT_IF(Py_SIZE(sub) < 0, BINARY_SUBSCR); + assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0); + Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; + DEOPT_IF(index >= PyList_Size(list), BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); PyObject *res = PyList_GET_ITEM(list, index); assert(res != NULL); @@ -1967,8 +1972,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyObject *tuple = SECOND(); DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); - Py_ssize_t index = PyLong_AsSsize_t(sub); - DEOPT_IF(index < 0 || index >= PyTuple_Size(tuple), BINARY_SUBSCR); + + // Deopt unless 0 <= sub < PyTuple_Size(list) + DEOPT_IF(Py_SIZE(sub) < 0, BINARY_SUBSCR); + assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0); + Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; + DEOPT_IF(index >= PyTuple_Size(tuple), BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); PyObject *res = PyTuple_GET_ITEM(tuple, index); assert(res != NULL); From 2f4d35984b100805b3653c6b87b4e96859fdc41a Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 14 Jul 2021 11:26:34 +0100 Subject: [PATCH 15/23] update Python/importlib_external.h --- Python/importlib_external.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/importlib_external.h b/Python/importlib_external.h index 020d12dd348f12..e97e04ee17f82c 100644 --- a/Python/importlib_external.h +++ b/Python/importlib_external.h @@ -444,7 +444,7 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = { 62,0,176,3,47,11,179,9,62,0,190,7,65,22,7,193, 6,5,65,12,6,193,11,1,65,22,7,193,12,7,65,21, 13,193,19,3,65,22,7,193,23,1,65,21,13,193,24,1, - 65,22,7,105,132,13,0,0,114,47,0,0,0,114,35,0, + 65,22,7,105,131,13,0,0,114,47,0,0,0,114,35,0, 0,0,115,2,0,0,0,13,10,90,11,95,95,112,121,99, 97,99,104,101,95,95,122,4,111,112,116,45,122,3,46,112, 121,122,4,46,112,121,119,122,4,46,112,121,99,41,1,218, From 1a2c302dcf9923ec92ff4d9bc9708fd5d5ca4aa2 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 10:31:11 +0000 Subject: [PATCH 16/23] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2021-07-14-10-31-10.bpo-26280.cgpM4B.rst | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-07-14-10-31-10.bpo-26280.cgpM4B.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-07-14-10-31-10.bpo-26280.cgpM4B.rst b/Misc/NEWS.d/next/Core and Builtins/2021-07-14-10-31-10.bpo-26280.cgpM4B.rst new file mode 100644 index 00000000000000..cb561e79c78cee --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-07-14-10-31-10.bpo-26280.cgpM4B.rst @@ -0,0 +1,9 @@ +Implement adaptive specialization for BINARY_SUBSCR + + Three specialized forms of BINARY_SUBSCR are added: + + * BINARY_SUBSCR_LIST_INT + + * BINARY_SUBSCR_TUPLE_INT + + * BINARY_SUBSCR_DICT \ No newline at end of file From 5b2e3f98aa322ac3d6fd05069a9a7f1cf40963f1 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 14 Jul 2021 15:28:49 +0100 Subject: [PATCH 17/23] revert change to adptive cache functions api --- Include/internal/pycore_code.h | 20 ++++++++++---------- Python/ceval.c | 24 ++++++++++++------------ Python/specialize.c | 6 +++--- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index a73383edec62b2..06a8b1b1e7df32 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -296,25 +296,25 @@ saturating_start(void) } static inline void -record_cache_hit(uint8_t *counter) { - *counter = saturating_increment(*counter); +record_cache_hit(_PyAdaptiveEntry *entry) { + entry->counter = saturating_increment(entry->counter); } static inline void -record_cache_miss(uint8_t *counter) { - *counter = saturating_decrement(*counter); +record_cache_miss(_PyAdaptiveEntry *entry) { + entry->counter = saturating_decrement(entry->counter); } static inline int -too_many_cache_misses(uint8_t counter) { - return counter == saturating_zero(); +too_many_cache_misses(_PyAdaptiveEntry *entry) { + return entry->counter == saturating_zero(); } -#define BACKOFF 64 +#define ADAPTIVE_CACHE_BACKOFF 64 -static inline uint8_t -cache_backoff() { - return BACKOFF; +static inline void +cache_backoff(_PyAdaptiveEntry *entry) { + entry->counter = ADAPTIVE_CACHE_BACKOFF; } /* Specialization functions */ diff --git a/Python/ceval.c b/Python/ceval.c index 9feb4c97b6ba89..c458525638880f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2965,7 +2965,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; PyObject *res = ep->me_value; DEOPT_IF(res == NULL, LOAD_GLOBAL); - record_cache_hit(&cache0->counter); + record_cache_hit(cache0); STAT_INC(LOAD_GLOBAL, hit); Py_INCREF(res); PUSH(res); @@ -2986,7 +2986,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache0->index; PyObject *res = ep->me_value; DEOPT_IF(res == NULL, LOAD_GLOBAL); - record_cache_hit(&cache0->counter); + record_cache_hit(cache0); STAT_INC(LOAD_GLOBAL, hit); Py_INCREF(res); PUSH(res); @@ -3409,7 +3409,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) res = dict->ma_values[cache0->index]; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(&cache0->counter); + record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3433,7 +3433,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(&cache0->counter); + record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3462,7 +3462,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) res = ep->me_value; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(&cache0->counter); + record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -3483,7 +3483,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) res = *(PyObject **)addr; DEOPT_IF(res == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - record_cache_hit(&cache0->counter); + record_cache_hit(cache0); Py_INCREF(res); SET_TOP(res); Py_DECREF(owner); @@ -4400,11 +4400,11 @@ opname ## _miss: \ { \ STAT_INC(opname, miss); \ _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \ - record_cache_miss(&cache->counter); \ - if (too_many_cache_misses(cache->counter)) { \ + record_cache_miss(cache); \ + if (too_many_cache_misses(cache)) { \ next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, _Py_OPARG(next_instr[-1])); \ STAT_INC(opname, deopt); \ - cache->counter = cache_backoff(); \ + cache_backoff(cache); \ } \ oparg = cache->original_oparg; \ JUMP_TO_INSTRUCTION(opname); \ @@ -4415,9 +4415,9 @@ opname ## _miss: \ { \ STAT_INC(opname, miss); \ uint8_t oparg = _Py_OPARG(next_instr[-1]); \ - record_cache_miss(&oparg); \ - if (too_many_cache_misses(oparg)) { \ - oparg = cache_backoff(); \ + oparg = saturating_decrement(oparg); \ + if (oparg == saturating_zero()) /* too many cache misses */ { \ + oparg = ADAPTIVE_CACHE_BACKOFF; \ next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, oparg); \ STAT_INC(opname, deopt); \ } \ diff --git a/Python/specialize.c b/Python/specialize.c index d49dd96dada0d0..5ebe596418b032 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -568,7 +568,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp fail: STAT_INC(LOAD_ATTR, specialization_failure); assert(!PyErr_Occurred()); - cache0->counter = cache_backoff(); + cache_backoff(cache0); return 0; success: STAT_INC(LOAD_ATTR, specialization_success); @@ -635,7 +635,7 @@ _Py_Specialize_LoadGlobal( fail: STAT_INC(LOAD_GLOBAL, specialization_failure); assert(!PyErr_Occurred()); - cache0->counter = cache_backoff(); + cache_backoff(cache0); return 0; success: STAT_INC(LOAD_GLOBAL, specialization_success); @@ -675,7 +675,7 @@ _Py_Specialize_BinarySubscr( fail: STAT_INC(BINARY_SUBSCR, specialization_failure); assert(!PyErr_Occurred()); - *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), cache_backoff()); + *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF); return 0; success: STAT_INC(BINARY_SUBSCR, specialization_success); From 4f909d646184a405f996ee14790bc76b3fc0ef3c Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 14 Jul 2021 15:34:10 +0100 Subject: [PATCH 18/23] add missing include --- Python/ceval.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/ceval.c b/Python/ceval.c index c458525638880f..82fb9b7a4e43be 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -15,6 +15,7 @@ #include "pycore_ceval.h" // _PyEval_SignalAsyncExc() #include "pycore_code.h" #include "pycore_initconfig.h" // _PyStatus_OK() +#include "pycore_long.h" // _PyLong_GetZero() #include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_moduleobject.h" #include "pycore_pyerrors.h" // _PyErr_Fetch() From a863a3dc1794bfc296ddd4d1c58ccf28403a22fc Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 14 Jul 2021 16:14:52 +0100 Subject: [PATCH 19/23] added UPDATE_PREV_INSTR_OPARG macro and use it to control the oparg counter --- Python/ceval.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 82fb9b7a4e43be..08e0752eb9dd9d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1399,6 +1399,8 @@ eval_frame_handle_pending(PyThreadState *tstate) #define DEOPT_IF(cond, instname) if (cond) { goto instname ## _miss; } +#define UPDATE_PREV_INSTR_OPARG(instr, oparg) ((uint8_t*)(instr))[-1] = (oparg) + #define GLOBALS() specials[FRAME_SPECIALS_GLOBALS_OFFSET] #define BUILTINS() specials[FRAME_SPECIALS_BUILTINS_OFFSET] #define LOCALS() specials[FRAME_SPECIALS_LOCALS_OFFSET] @@ -1940,7 +1942,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) else { STAT_INC(BINARY_SUBSCR, deferred); // oparg is the adaptive cache counter - *(next_instr-1) = _Py_MAKECODEUNIT(opcode, oparg-1); + UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1); + assert(_Py_OPCODE(next_instr[-1]) == BINARY_SUBSCR_ADAPTIVE); + assert(_Py_OPARG(next_instr[-1]) == oparg - 1); JUMP_TO_INSTRUCTION(BINARY_SUBSCR); } } @@ -4415,8 +4419,9 @@ opname ## _miss: \ opname ## _miss: \ { \ STAT_INC(opname, miss); \ - uint8_t oparg = _Py_OPARG(next_instr[-1]); \ - oparg = saturating_decrement(oparg); \ + uint8_t oparg = saturating_decrement(_Py_OPARG(next_instr[-1])); \ + UPDATE_PREV_INSTR_OPARG(next_instr, oparg); \ + assert(_Py_OPARG(next_instr[-1]) == oparg); \ if (oparg == saturating_zero()) /* too many cache misses */ { \ oparg = ADAPTIVE_CACHE_BACKOFF; \ next_instr[-1] = _Py_MAKECODEUNIT(opname ## _ADAPTIVE, oparg); \ From c30a188a503120fecd7590ba333dff18001863b1 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 14 Jul 2021 16:23:03 +0100 Subject: [PATCH 20/23] use the full weird sign check --- Python/ceval.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 08e0752eb9dd9d..05ac09fb1f5939 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1956,7 +1956,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); // Deopt unless 0 <= sub < PyList_Size(list) - DEOPT_IF(Py_SIZE(sub) < 0, BINARY_SUBSCR); + Py_ssize_t signed_magnitude = Py_SIZE(sub); + DEOPT_IF(((size_t)signed_magnitude) > 1, BINARY_SUBSCR); assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0); Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; DEOPT_IF(index >= PyList_Size(list), BINARY_SUBSCR); @@ -1979,7 +1980,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); // Deopt unless 0 <= sub < PyTuple_Size(list) - DEOPT_IF(Py_SIZE(sub) < 0, BINARY_SUBSCR); + Py_ssize_t signed_magnitude = Py_SIZE(sub); + DEOPT_IF(((size_t)signed_magnitude) > 1, BINARY_SUBSCR); assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0); Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; DEOPT_IF(index >= PyTuple_Size(tuple), BINARY_SUBSCR); From d5b0f32e851ce4596650fe199ec86b524928bf8b Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 14 Jul 2021 22:05:39 +0100 Subject: [PATCH 21/23] revert unintended change to magic number --- Lib/importlib/_bootstrap_external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 9e7e77bf5f9ad4..d624b73f38eb66 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -372,7 +372,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3459).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3460).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' From a9c9851f356f22f2b2215d2070e2235559af7bd9 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 14 Jul 2021 22:52:26 +0100 Subject: [PATCH 22/23] update Python/importlib_external.h --- Python/importlib_external.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/importlib_external.h b/Python/importlib_external.h index e97e04ee17f82c..020d12dd348f12 100644 --- a/Python/importlib_external.h +++ b/Python/importlib_external.h @@ -444,7 +444,7 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = { 62,0,176,3,47,11,179,9,62,0,190,7,65,22,7,193, 6,5,65,12,6,193,11,1,65,22,7,193,12,7,65,21, 13,193,19,3,65,22,7,193,23,1,65,21,13,193,24,1, - 65,22,7,105,131,13,0,0,114,47,0,0,0,114,35,0, + 65,22,7,105,132,13,0,0,114,47,0,0,0,114,35,0, 0,0,115,2,0,0,0,13,10,90,11,95,95,112,121,99, 97,99,104,101,95,95,122,4,111,112,116,45,122,3,46,112, 121,122,4,46,112,121,119,122,4,46,112,121,99,41,1,218, From 19cdf02d735c0a673beac8e33296cd2657894189 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 15 Jul 2021 12:05:47 +0100 Subject: [PATCH 23/23] _Size --> _GET_SIZE --- Python/ceval.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 05ac09fb1f5939..1467c12fd5e4df 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1960,7 +1960,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(((size_t)signed_magnitude) > 1, BINARY_SUBSCR); assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0); Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; - DEOPT_IF(index >= PyList_Size(list), BINARY_SUBSCR); + DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res = PyList_GET_ITEM(list, index); @@ -1984,7 +1984,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DEOPT_IF(((size_t)signed_magnitude) > 1, BINARY_SUBSCR); assert(((PyLongObject *)_PyLong_GetZero())->ob_digit[0] == 0); Py_ssize_t index = ((PyLongObject*)sub)->ob_digit[0]; - DEOPT_IF(index >= PyTuple_Size(tuple), BINARY_SUBSCR); + DEOPT_IF(index >= PyTuple_GET_SIZE(tuple), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res = PyTuple_GET_ITEM(tuple, index);