From dd606ae1965e320a80e95dea46ca4fbb8b18284b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 28 Sep 2023 16:27:11 +0100 Subject: [PATCH 1/9] Add valid flag to executor objects --- Include/cpython/optimizer.h | 1 + Python/optimizer.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 47536108a9665e..c1ff7ae1afbdb9 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -16,6 +16,7 @@ typedef struct _PyExecutorObject { /* WARNING: execute consumes a reference to self. This is necessary to allow executors to tail call into each other. */ struct _PyInterpreterFrame *(*execute)(struct _PyExecutorObject *self, struct _PyInterpreterFrame *frame, PyObject **stack_pointer); _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ + bool valid; /* This should be set to true by the optimizer */ /* Data needed by the executor goes here, but is opaque to the VM */ } _PyExecutorObject; diff --git a/Python/optimizer.c b/Python/optimizer.c index fbdbf7291784c4..3d2d91774ce036 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -261,6 +261,7 @@ counter_optimize( executor->optimizer = (_PyCounterOptimizerObject *)self; executor->next_instr = instr; *exec_ptr = (_PyExecutorObject *)executor; + executor->executor.valid = true; return 1; } @@ -904,6 +905,7 @@ uop_optimize( executor->base.execute = _PyUopExecute; memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); *exec_ptr = (_PyExecutorObject *)executor; + executor->base.valid = true; return 1; } From 45c30d369ea9c96e33aa3d288a34782f662094f0 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 1 Oct 2023 17:40:14 +0100 Subject: [PATCH 2/9] Add ability to invalidate executors as a result of changes to other objects. --- Include/cpython/code.h | 3 +- Include/cpython/optimizer.h | 20 ++- Include/internal/pycore_interp.h | 1 + Include/internal/pycore_opcode_metadata.h | 7 + Lib/test/test_capi/test_misc.py | 44 +++++ Modules/_testinternalcapi.c | 28 ++++ Objects/codeobject.c | 2 + Python/abstract_interp_cases.c.h | 4 + Python/bytecodes.c | 6 + Python/executor_cases.c.h | 7 + Python/instrumentation.c | 1 + Python/optimizer.c | 191 +++++++++++++++++++++- Python/pystate.c | 1 + 13 files changed, 311 insertions(+), 4 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 45b09a1265df80..25d12b33a65c1a 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -280,7 +280,8 @@ PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, i #define PY_FOREACH_CODE_EVENT(V) \ V(CREATE) \ - V(DESTROY) + V(DESTROY) \ + V(INSTRUMENT) typedef enum { #define PY_DEF_EVENT(op) PY_CODE_EVENT_##op, diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index c1ff7ae1afbdb9..f5b0a4169cad17 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -6,9 +6,23 @@ extern "C" { #endif +typedef struct _PyExecutorLinkListNode { + struct _PyExecutorObject *next; + struct _PyExecutorObject *previous; +} _PyExecutorLinkListNode; + +/* Bloom filter with k = 6, m = 256 */ +typedef struct _bloom_filter { + uint32_t bits[8]; +} _PyBloomFilter; + typedef struct { uint8_t opcode; uint8_t oparg; + uint8_t valid; + uint8_t linked; + _PyBloomFilter bloom; + _PyExecutorLinkListNode links; } _PyVMData; typedef struct _PyExecutorObject { @@ -16,7 +30,6 @@ typedef struct _PyExecutorObject { /* WARNING: execute consumes a reference to self. This is necessary to allow executors to tail call into each other. */ struct _PyInterpreterFrame *(*execute)(struct _PyExecutorObject *self, struct _PyInterpreterFrame *frame, PyObject **stack_pointer); _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ - bool valid; /* This should be set to true by the optimizer */ /* Data needed by the executor goes here, but is opaque to the VM */ } _PyExecutorObject; @@ -46,6 +59,11 @@ _PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_ extern _PyOptimizerObject _PyOptimizer_Default; +void _Py_ExecutorInit(_PyExecutorObject *); +void _Py_ExecutorClear(_PyExecutorObject *); +PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); +PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj); + /* For testing */ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 0912bd175fe4f7..2a8a8d753e6235 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -186,6 +186,7 @@ struct _is { struct types_state types; struct callable_cache callable_cache; _PyOptimizerObject *optimizer; + _PyExecutorObject *executor_list_head; uint16_t optimizer_resume_threshold; uint16_t optimizer_backedge_threshold; uint32_t next_func_version; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 16c1637e496033..cce460c60ab861 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -80,6 +80,7 @@ #define _JUMP_TO_TOP 352 #define _SAVE_CURRENT_IP 353 #define _INSERT 354 +#define _JUMP_IF_INVALID 355 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -633,6 +634,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case _INSERT: return oparg + 1; + case _JUMP_IF_INVALID: + return 0; default: return -1; } @@ -1191,6 +1194,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case _INSERT: return oparg + 1; + case _JUMP_IF_INVALID: + return 0; default: return -1; } @@ -1538,6 +1543,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_SAVE_CURRENT_IP] = { true, INSTR_FMT_IX, 0 }, [_EXIT_TRACE] = { true, INSTR_FMT_IX, 0 }, [_INSERT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [_JUMP_IF_INVALID] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, }; #endif // NEED_OPCODE_METADATA @@ -1745,6 +1751,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_SAVE_CURRENT_IP] = "_SAVE_CURRENT_IP", [_INSERT] = "_INSERT", + [_JUMP_IF_INVALID] = "_JUMP_IF_INVALID", }; #endif // NEED_OPCODE_METADATA diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 5ece213e7b2363..cdb251e7a64c10 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2387,6 +2387,50 @@ def testfunc(x): self.assertEqual(code, replace_code) self.assertEqual(hash(code), hash(replace_code)) +class TestExecutorInvalidation(unittest.TestCase): + + def setUp(self): + self.old = _testinternalcapi.get_optimizer() + self.opt = _testinternalcapi.get_counter_optimizer() + _testinternalcapi.set_optimizer(self.opt) + + def tearDown(self): + _testinternalcapi.set_optimizer(self.old) + + def test_invalidate_object(self): + # Generate a new set of functions at each call + ns = {} + func_src = "\n".join( + f""" + def f{n}(): + for _ in range(1000): + pass + """ for n in range(5) + ) + exec(textwrap.dedent(func_src), ns, ns) + funcs = [ ns[f'f{n}'] for n in range(5)] + objects = [object() for _ in range(5)] + + for f in funcs: + f() + executors = [ get_first_executor(f) for f in funcs] + # Set things up so each executor depends on the objects + # with an equal or lower index. + for i, exe in enumerate(executors): + self.assertTrue(exe.valid) + for obj in objects[:i+1]: + _testinternalcapi.add_executor_dependency(exe, obj) + self.assertTrue(exe.valid) + # Assert that the correct executors are invalidated + # and check that nothing crashes when we invalidate + # an executor mutliple times. + for i in (4,3,2,1,0): + _testinternalcapi.invalidate_executors(objects[i]) + for exe in executors[i:]: + self.assertFalse(exe.valid) + for exe in executors[:i]: + self.assertTrue(exe.valid) + def get_first_executor(func): code = func.__code__ diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index c6b80fffdec16d..99c7533ce2008d 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -998,6 +998,32 @@ get_executor(PyObject *self, PyObject *const *args, Py_ssize_t nargs) return (PyObject *)PyUnstable_GetExecutor((PyCodeObject *)code, ioffset); } +static PyObject * +add_executor_dependency(PyObject *self, PyObject *args) +{ + PyObject *exec; + PyObject *obj; + if (!PyArg_ParseTuple(args, "OO", &exec, &obj)) { + return NULL; + } + /* No way to tell in general if exec is an executor, so we only accept + * counting_executor */ + if (strcmp(Py_TYPE(exec)->tp_name, "counting_executor")) { + PyErr_SetString(PyExc_TypeError, "argument must be a counting_executor"); + return NULL; + } + _Py_Executor_DependsOn((_PyExecutorObject *)exec, obj); + Py_RETURN_NONE; +} + +static PyObject * +invalidate_executors(PyObject *self, PyObject *obj) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + _Py_Executors_InvalidateDependency(interp, obj); + Py_RETURN_NONE; +} + static int _pending_callback(void *arg) { /* we assume the argument is callable object to which we own a reference */ @@ -1561,6 +1587,8 @@ static PyMethodDef module_functions[] = { {"get_executor", _PyCFunction_CAST(get_executor), METH_FASTCALL, NULL}, {"get_counter_optimizer", get_counter_optimizer, METH_NOARGS, NULL}, {"get_uop_optimizer", get_uop_optimizer, METH_NOARGS, NULL}, + {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, + {"invalidate_executors", invalidate_executors, METH_O, NULL}, {"pending_threadfunc", _PyCFunction_CAST(pending_threadfunc), METH_VARARGS | METH_KEYWORDS}, {"pending_identify", pending_identify, METH_VARARGS, NULL}, diff --git a/Objects/codeobject.c b/Objects/codeobject.c index f662b8e354bb1e..9e5e84b2d8d2f0 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -63,6 +63,8 @@ notify_code_watchers(PyCodeEvent event, PyCodeObject *co) } } + + int PyCode_AddWatcher(PyCode_WatchCallback callback) { diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 61b1db9e5a1543..26a4c2c939d2cc 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -880,3 +880,7 @@ PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - oparg)), true); break; } + + case _JUMP_IF_INVALID: { + break; + } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0f89779fb9245f..44cc3acbafef65 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3957,6 +3957,12 @@ dummy_func( memmove(&stack_pointer[-1 - oparg], &stack_pointer[-oparg], oparg * sizeof(stack_pointer[0])); } + op(_JUMP_IF_INVALID, (--)) { + if (!self->base.vm_data.valid) { + pc = oparg; + } + } + // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 981db6973f281a..3d0d0eb2827dcc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3159,3 +3159,10 @@ stack_pointer[-1 - oparg] = top; break; } + + case _JUMP_IF_INVALID: { + if (!self->base.vm_data.valid) { + pc = oparg; + } + break; + } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 0b974f6133ce7d..3fa544ab44410a 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1564,6 +1564,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) if (code->co_executors != NULL) { _PyCode_Clear_Executors(code); } + _Py_Executors_InvalidateDependency(interp, code); int code_len = (int)Py_SIZE(code); /* code->_co_firsttraceable >= code_len indicates * that no instrumentation can be inserted. diff --git a/Python/optimizer.c b/Python/optimizer.c index 3d2d91774ce036..6eecc9f99e0d54 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -220,10 +220,16 @@ typedef struct { static void counter_dealloc(_PyCounterExecutorObject *self) { + _Py_ExecutorClear((_PyExecutorObject *)self); Py_DECREF(self->optimizer); PyObject_Free(self); } +static PyMemberDef counter_members[] = { + { "valid", Py_T_UBYTE, offsetof(_PyCounterExecutorObject, executor.vm_data.valid), Py_READONLY, "is valid?" }, + { NULL } +}; + static PyTypeObject CounterExecutor_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "counting_executor", @@ -231,6 +237,7 @@ static PyTypeObject CounterExecutor_Type = { .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_dealloc = (destructor)counter_dealloc, + .tp_members = counter_members, }; static _PyInterpreterFrame * @@ -261,7 +268,7 @@ counter_optimize( executor->optimizer = (_PyCounterOptimizerObject *)self; executor->next_instr = instr; *exec_ptr = (_PyExecutorObject *)executor; - executor->executor.valid = true; + _Py_ExecutorInit((_PyExecutorObject *)executor); return 1; } @@ -289,6 +296,8 @@ static PyTypeObject CounterOptimizer_Type = { PyObject * PyUnstable_Optimizer_NewCounter(void) { + PyType_Ready(&CounterExecutor_Type); + PyType_Ready(&CounterOptimizer_Type); _PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&CounterOptimizer_Type); if (opt == NULL) { return NULL; @@ -304,6 +313,7 @@ PyUnstable_Optimizer_NewCounter(void) static void uop_dealloc(_PyUOpExecutorObject *self) { + _Py_ExecutorClear((_PyExecutorObject *)self); PyObject_Free(self); } @@ -904,8 +914,8 @@ uop_optimize( } executor->base.execute = _PyUopExecute; memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); + _Py_ExecutorInit((_PyExecutorObject *)executor); *exec_ptr = (_PyExecutorObject *)executor; - executor->base.valid = true; return 1; } @@ -937,3 +947,180 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER; return (PyObject *)opt; } + + +/***************************************** + * Executor management + ****************************************/ + +static uint16_t +address_to_hash(void *ptr, uint32_t seed, uint32_t multiplier) { + assert(ptr != NULL); + uint16_t uhash = seed; + uintptr_t addr = (uintptr_t)ptr; + for (int i = 0; i < SIZEOF_VOID_P; i++) { + uhash ^= addr & 255; + uhash *= multiplier; + addr >>= 8; + } + return uhash; +} + +static const uint32_t multipliers[3] = { + _PyHASH_MULTIPLIER, + 110351524, + 48271 +}; + +static const uint32_t seeds[3] = { + 20221211, + 2147483647, + 1051, +}; + +static void +address_to_hashes(void *ptr, _PyBloomFilter *bloom) +{ + for (int i = 0; i < 8; i++) { + bloom->bits[i] = 0; + } + /* Set k (6) bits */ + for (int i = 0; i < 3; i++) { + uint16_t hash = address_to_hash(ptr, seeds[i], multipliers[i]); + int bit0 = hash & 255; + int bit1 = hash >> 8; + bloom->bits[bit0 >> 5] |= (1 << (bit0&31)); + bloom->bits[bit1 >> 5] |= (1 << (bit1&31)); + } +} + +static void +add_to_bloom_filter(_PyBloomFilter *bloom, PyObject *obj) +{ + _PyBloomFilter hashes; + address_to_hashes(obj, &hashes); + for (int i = 0; i < 8; i++) { + bloom->bits[i] |= hashes.bits[i]; + } +} + +static bool +bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes) +{ + for (int i = 0; i < 8; i++) { + if ((bloom->bits[i] & hashes->bits[i]) != hashes->bits[i]) { + return false; + } + } + return true; +} + +static void +link_executor(_PyExecutorObject *executor) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyExecutorLinkListNode *links = &executor->vm_data.links; + _PyExecutorObject *head = interp->executor_list_head; + if (head == NULL) { + interp->executor_list_head = executor; + links->previous = NULL; + links->next = NULL; + } + else { + _PyExecutorObject *next = head->vm_data.links.next; + links->previous = head; + links->next = next; + if (next != NULL) { + next->vm_data.links.previous = executor; + } + head->vm_data.links.next = executor; + } + executor->vm_data.linked = true; +} + +static void +unlink_executor(_PyExecutorObject *executor) +{ + if (!executor->vm_data.linked) { + return; + } + _PyExecutorLinkListNode *links = &executor->vm_data.links; + _PyExecutorObject *next = links->next; + _PyExecutorObject *prev = links->previous; + if (next != NULL) { + next->vm_data.links.previous = prev; + } + else if (prev == NULL) { + // Both are NULL, so this must be the list head. + PyInterpreterState *interp = PyInterpreterState_Get(); + assert(interp->executor_list_head == executor); + interp->executor_list_head = executor->vm_data.links.next; + } + if (prev != NULL) { + prev->vm_data.links.next = next; + } + executor->vm_data.linked = false; +} + +/* This must be called by optimizers before using the executor */ +void +_Py_ExecutorInit(_PyExecutorObject *executor) +{ + executor->vm_data.valid = true; + for (int i = 0; i < 8; i++) { + executor->vm_data.bloom.bits[i] = 0; + } + link_executor(executor); +} + +/* This must be called by executors during dealloc */ +void +_Py_ExecutorClear(_PyExecutorObject *executor) +{ + unlink_executor(executor); +} + +void +_Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) +{ + assert(executor->vm_data.valid = true); + add_to_bloom_filter(&executor->vm_data.bloom, obj); +} + +/* Invalidate all executors that depend on `obj` + * May cause other executors to be invalidated as well + */ +void +_Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) +{ + _PyBloomFilter hashes; + address_to_hashes(obj, &hashes); + /* Walk the list of executors */ + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { + assert(exec->vm_data.valid); + _PyExecutorObject *next = exec->vm_data.links.next; + if (bloom_filter_may_contain(&exec->vm_data.bloom, &hashes)) { + exec->vm_data.valid = false; + unlink_executor(exec); + } + exec = next; + } +} + +/* Invalidate all executors + */ +void +_Py_Executors_InvalidateAll(PyInterpreterState *interp) +{ + /* Walk the list of executors */ + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { + assert(exec->vm_data.valid); + _PyExecutorObject *next = exec->vm_data.links.next; + exec->vm_data.links.next = NULL; + exec->vm_data.links.previous = NULL; + exec->vm_data.valid = false; + exec->vm_data.linked = false; + exec = next; + } + interp->executor_list_head = NULL; +} diff --git a/Python/pystate.c b/Python/pystate.c index 570b5242600c0c..4a887216d1a4d7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -708,6 +708,7 @@ init_interpreter(PyInterpreterState *interp, interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold; interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold; interp->next_func_version = 1; + interp->executor_list_head = NULL; if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ interp->dtoa = (struct _dtoa_state)_dtoa_state_INIT(interp); From 79bda49a5bc2534bed6b2fd7e59161ebcddb6f12 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Oct 2023 12:33:57 +0100 Subject: [PATCH 3/9] Remove unrelated change --- Include/cpython/code.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 25d12b33a65c1a..45b09a1265df80 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -280,8 +280,7 @@ PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, i #define PY_FOREACH_CODE_EVENT(V) \ V(CREATE) \ - V(DESTROY) \ - V(INSTRUMENT) + V(DESTROY) typedef enum { #define PY_DEF_EVENT(op) PY_CODE_EVENT_##op, From 76dad942cdfaf3baa224048517dc56053e4405e0 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Oct 2023 14:24:28 +0100 Subject: [PATCH 4/9] Tidier logic and add explanatory comment --- Include/cpython/optimizer.h | 6 ++-- Python/optimizer.c | 60 +++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index f5b0a4169cad17..505e5476895e80 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -11,9 +11,11 @@ typedef struct _PyExecutorLinkListNode { struct _PyExecutorObject *previous; } _PyExecutorLinkListNode; -/* Bloom filter with k = 6, m = 256 */ +/* Bloom filter with m = 256 */ +#define BLOOM_FILTER_WORDS 8 + typedef struct _bloom_filter { - uint32_t bits[8]; + uint32_t bits[BLOOM_FILTER_WORDS]; } _PyBloomFilter; typedef struct { diff --git a/Python/optimizer.c b/Python/optimizer.c index 6eecc9f99e0d54..a095d8d341ad12 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -953,10 +953,38 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) * Executor management ****************************************/ -static uint16_t +/* We use a bloomfilter with k = 6, m = 256 + * The choice of k and the following constants + * could do with a more rigourous analysis, + * but here is a simple analysis: + * + * We want to keep the false positive rate low. + * For n = 5 (a trace depends on 5 objects), + * we expect 30 bits set, giving a false positive + * rate of (30/256)**6 == 2.5e-6 which is plenty + * good enough. + * + * However with n = 10 we expect 60 bits set (worst case), + * giving a false positive of (60/256)**6 == 0.0001 + * + * We choose k = 6, rather than a higher number as + * it means the false positive rate goes slower for high n. + * + * n = 15, k = 6 => fp = 0.18% + * n = 15, k = 8 => fp = 0.23% + * n = 20, k = 6 => fp = 1.1% + * n = 20, k = 8 => fp = 2.3% + * + * The above analysis assumes perfect hash functions, + * but we don't have those, so it is approximate. + */ + +#define K 6 + +static uint32_t address_to_hash(void *ptr, uint32_t seed, uint32_t multiplier) { assert(ptr != NULL); - uint16_t uhash = seed; + uint32_t uhash = seed; uintptr_t addr = (uintptr_t)ptr; for (int i = 0; i < SIZEOF_VOID_P; i++) { uhash ^= addr & 255; @@ -966,31 +994,30 @@ address_to_hash(void *ptr, uint32_t seed, uint32_t multiplier) { return uhash; } -static const uint32_t multipliers[3] = { +static const uint32_t multipliers[2] = { _PyHASH_MULTIPLIER, 110351524, - 48271 }; -static const uint32_t seeds[3] = { +static const uint32_t seeds[2] = { 20221211, 2147483647, - 1051, }; static void address_to_hashes(void *ptr, _PyBloomFilter *bloom) { - for (int i = 0; i < 8; i++) { + for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { bloom->bits[i] = 0; } /* Set k (6) bits */ - for (int i = 0; i < 3; i++) { - uint16_t hash = address_to_hash(ptr, seeds[i], multipliers[i]); - int bit0 = hash & 255; - int bit1 = hash >> 8; - bloom->bits[bit0 >> 5] |= (1 << (bit0&31)); - bloom->bits[bit1 >> 5] |= (1 << (bit1&31)); + for (int i = 0; i < 2; i++) { + uint32_t hash = address_to_hash(ptr, seeds[i], multipliers[i]); + for (int j = 0; j < (K/2); j++) { + uint8_t bits = hash & 255; + bloom->bits[bits >> 5] |= (1 << (bits&31)); + hash >>= 8; + } } } @@ -999,7 +1026,7 @@ add_to_bloom_filter(_PyBloomFilter *bloom, PyObject *obj) { _PyBloomFilter hashes; address_to_hashes(obj, &hashes); - for (int i = 0; i < 8; i++) { + for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { bloom->bits[i] |= hashes.bits[i]; } } @@ -1007,7 +1034,7 @@ add_to_bloom_filter(_PyBloomFilter *bloom, PyObject *obj) static bool bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes) { - for (int i = 0; i < 8; i++) { + for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { if ((bloom->bits[i] & hashes->bits[i]) != hashes->bits[i]) { return false; } @@ -1067,7 +1094,7 @@ void _Py_ExecutorInit(_PyExecutorObject *executor) { executor->vm_data.valid = true; - for (int i = 0; i < 8; i++) { + for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { executor->vm_data.bloom.bits[i] = 0; } link_executor(executor); @@ -1096,6 +1123,7 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) _PyBloomFilter hashes; address_to_hashes(obj, &hashes); /* Walk the list of executors */ + /* TO DO -- Use a tree to avoid traversing as many objects */ for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; From 60306d818b2e3b1ff2d4d87865762964c9bf23a1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Oct 2023 00:50:18 +0100 Subject: [PATCH 5/9] Tidy up code and add test --- Include/cpython/optimizer.h | 9 ++++- Lib/test/test_capi/test_misc.py | 43 +++++++++++++------- Python/instrumentation.c | 2 + Python/optimizer.c | 69 ++++++++++++++++++++------------- 4 files changed, 82 insertions(+), 41 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 505e5476895e80..2a5251b3ecb02a 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -11,7 +11,9 @@ typedef struct _PyExecutorLinkListNode { struct _PyExecutorObject *previous; } _PyExecutorLinkListNode; -/* Bloom filter with m = 256 */ + +/* Bloom filter with m = 256 + * https://en.wikipedia.org/wiki/Bloom_filter */ #define BLOOM_FILTER_WORDS 8 typedef struct _bloom_filter { @@ -61,10 +63,13 @@ _PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_ extern _PyOptimizerObject _PyOptimizer_Default; -void _Py_ExecutorInit(_PyExecutorObject *); +void _Py_ExecutorInit(_PyExecutorObject *, _PyBloomFilter *); void _Py_ExecutorClear(_PyExecutorObject *); +void _Py_BloomFilter_Init(_PyBloomFilter *); +void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj); +extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp); /* For testing */ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index cdb251e7a64c10..a68c7eb87e0a92 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2387,6 +2387,20 @@ def testfunc(x): self.assertEqual(code, replace_code) self.assertEqual(hash(code), hash(replace_code)) + +def get_first_executor(func): + code = func.__code__ + co_code = code.co_code + JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"] + for i in range(0, len(co_code), 2): + if co_code[i] == JUMP_BACKWARD: + try: + return _testinternalcapi.get_executor(code, i) + except ValueError: + pass + return None + + class TestExecutorInvalidation(unittest.TestCase): def setUp(self): @@ -2431,19 +2445,22 @@ def f{n}(): for exe in executors[:i]: self.assertTrue(exe.valid) - -def get_first_executor(func): - code = func.__code__ - co_code = code.co_code - JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"] - for i in range(0, len(co_code), 2): - if co_code[i] == JUMP_BACKWARD: - try: - return _testinternalcapi.get_executor(code, i) - except ValueError: - pass - return None - + def test_uop_optimizer_invalidation(self): + # Generate a new function at each call + ns = {} + exec(textwrap.dedent(""" + def f(): + for i in range(1000): + pass + """), ns, ns) + f = ns['f'] + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + f() + exe = get_first_executor(f) + self.assertTrue(exe.valid) + _testinternalcapi.invalidate_executors(f.__code__) + self.assertFalse(exe.valid) class TestUops(unittest.TestCase): diff --git a/Python/instrumentation.c b/Python/instrumentation.c index c62803c9708aba..4bb57a621e37e8 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1804,6 +1804,7 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) return -1; } set_global_version(interp, new_version); + _Py_Executors_InvalidateAll(interp); return instrument_all_executing_code_objects(interp); } @@ -1833,6 +1834,7 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent /* Force instrumentation update */ code->_co_instrumentation_version -= MONITORING_VERSION_INCREMENT; } + _Py_Executors_InvalidateDependency(interp, code); if (_Py_Instrument(code, interp)) { return -1; } diff --git a/Python/optimizer.c b/Python/optimizer.c index a095d8d341ad12..30248f8d60747e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -268,7 +268,9 @@ counter_optimize( executor->optimizer = (_PyCounterOptimizerObject *)self; executor->next_instr = instr; *exec_ptr = (_PyExecutorObject *)executor; - _Py_ExecutorInit((_PyExecutorObject *)executor); + _PyBloomFilter empty; + _Py_BloomFilter_Init(&empty); + _Py_ExecutorInit((_PyExecutorObject *)executor, &empty); return 1; } @@ -367,6 +369,12 @@ PySequenceMethods uop_as_sequence = { .sq_item = (ssizeargfunc)uop_item, }; + +static PyMemberDef uop_members[] = { + { "valid", Py_T_UBYTE, offsetof(_PyUOpExecutorObject, base.vm_data.valid), Py_READONLY, "is valid?" }, + { NULL } +}; + static PyTypeObject UOpExecutor_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uop_executor", @@ -375,6 +383,7 @@ static PyTypeObject UOpExecutor_Type = { .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_dealloc = (destructor)uop_dealloc, .tp_as_sequence = &uop_as_sequence, + .tp_members = uop_members, }; static int @@ -410,9 +419,11 @@ translate_bytecode_to_trace( PyCodeObject *code, _Py_CODEUNIT *instr, _PyUOpInstruction *trace, - int buffer_size) + int buffer_size, + _PyBloomFilter *dependencies) { PyCodeObject *initial_code = code; + _Py_BloomFilter_Add(dependencies, initial_code); _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; int max_length = buffer_size; @@ -738,6 +749,7 @@ translate_bytecode_to_trace( // Increment IP to the return address instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); + _Py_BloomFilter_Add(dependencies, new_code); code = new_code; instr = _PyCode_CODE(code); DPRINTF(2, @@ -896,8 +908,10 @@ uop_optimize( _PyExecutorObject **exec_ptr, int curr_stackentries) { + _PyBloomFilter dependencies; + _Py_BloomFilter_Init(&dependencies); _PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH]; - int trace_length = translate_bytecode_to_trace(code, instr, trace, _Py_UOP_MAX_TRACE_LENGTH); + int trace_length = translate_bytecode_to_trace(code, instr, trace, _Py_UOP_MAX_TRACE_LENGTH, &dependencies); if (trace_length <= 0) { // Error or nothing translated return trace_length; @@ -914,7 +928,7 @@ uop_optimize( } executor->base.execute = _PyUopExecute; memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); - _Py_ExecutorInit((_PyExecutorObject *)executor); + _Py_ExecutorInit((_PyExecutorObject *)executor, &dependencies); *exec_ptr = (_PyExecutorObject *)executor; return 1; } @@ -936,6 +950,8 @@ static PyTypeObject UOpOptimizer_Type = { PyObject * PyUnstable_Optimizer_NewUOpOptimizer(void) { + PyType_Ready(&UOpExecutor_Type); + PyType_Ready(&UOpOptimizer_Type); _PyOptimizerObject *opt = PyObject_New(_PyOptimizerObject, &UOpOptimizer_Type); if (opt == NULL) { return NULL; @@ -968,19 +984,25 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) * giving a false positive of (60/256)**6 == 0.0001 * * We choose k = 6, rather than a higher number as - * it means the false positive rate goes slower for high n. + * it means the false positive rate grows slower for high n. * + * n = 5, k = 6 => fp = 2.6e-6 + * n = 5, k = 8 => fp = 3.5e-7 + * n = 10, k = 6 => fp = 1.6e-4 + * n = 10, k = 8 => fp = 0.9e-4 * n = 15, k = 6 => fp = 0.18% * n = 15, k = 8 => fp = 0.23% * n = 20, k = 6 => fp = 1.1% * n = 20, k = 8 => fp = 2.3% * * The above analysis assumes perfect hash functions, - * but we don't have those, so it is approximate. + * but those don't exist, so the real false positive + * rates may be worse. */ #define K 6 +/* TO DO -- Use more modern hash functions with better distribution of bits */ static uint32_t address_to_hash(void *ptr, uint32_t seed, uint32_t multiplier) { assert(ptr != NULL); @@ -1004,12 +1026,17 @@ static const uint32_t seeds[2] = { 2147483647, }; -static void -address_to_hashes(void *ptr, _PyBloomFilter *bloom) +void +_Py_BloomFilter_Init(_PyBloomFilter *bloom) { for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { bloom->bits[i] = 0; } +} + +void +_Py_BloomFilter_Add(_PyBloomFilter *bloom, void *ptr) +{ /* Set k (6) bits */ for (int i = 0; i < 2; i++) { uint32_t hash = address_to_hash(ptr, seeds[i], multipliers[i]); @@ -1021,16 +1048,6 @@ address_to_hashes(void *ptr, _PyBloomFilter *bloom) } } -static void -add_to_bloom_filter(_PyBloomFilter *bloom, PyObject *obj) -{ - _PyBloomFilter hashes; - address_to_hashes(obj, &hashes); - for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { - bloom->bits[i] |= hashes.bits[i]; - } -} - static bool bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes) { @@ -1091,11 +1108,11 @@ unlink_executor(_PyExecutorObject *executor) /* This must be called by optimizers before using the executor */ void -_Py_ExecutorInit(_PyExecutorObject *executor) +_Py_ExecutorInit(_PyExecutorObject *executor, _PyBloomFilter *dependency_set) { executor->vm_data.valid = true; for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { - executor->vm_data.bloom.bits[i] = 0; + executor->vm_data.bloom.bits[i] = dependency_set->bits[i]; } link_executor(executor); } @@ -1111,7 +1128,7 @@ void _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) { assert(executor->vm_data.valid = true); - add_to_bloom_filter(&executor->vm_data.bloom, obj); + _Py_BloomFilter_Add(&executor->vm_data.bloom, obj); } /* Invalidate all executors that depend on `obj` @@ -1120,14 +1137,15 @@ _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) void _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) { - _PyBloomFilter hashes; - address_to_hashes(obj, &hashes); + _PyBloomFilter obj_filter; + _Py_BloomFilter_Init(&obj_filter); + _Py_BloomFilter_Add(&obj_filter, obj); /* Walk the list of executors */ /* TO DO -- Use a tree to avoid traversing as many objects */ for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; - if (bloom_filter_may_contain(&exec->vm_data.bloom, &hashes)) { + if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) { exec->vm_data.valid = false; unlink_executor(exec); } @@ -1135,8 +1153,7 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) } } -/* Invalidate all executors - */ +/* Invalidate all executors */ void _Py_Executors_InvalidateAll(PyInterpreterState *interp) { From 0436c1aeca5847fed2c4c6aa7a21d2b4018fc33e Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Oct 2023 01:15:05 +0100 Subject: [PATCH 6/9] Remove _JUMP_IF_INVALID. Save for next PR. --- Include/internal/pycore_opcode_metadata.h | 7 ------- Python/abstract_interp_cases.c.h | 4 ---- Python/bytecodes.c | 6 ------ Python/executor_cases.c.h | 7 ------- 4 files changed, 24 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 09fa1090a5c25f..8ef398c5db09f6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -89,7 +89,6 @@ #define _JUMP_TO_TOP 361 #define _SAVE_CURRENT_IP 362 #define _INSERT 363 -#define _JUMP_IF_INVALID 364 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -661,8 +660,6 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case _INSERT: return oparg + 1; - case _JUMP_IF_INVALID: - return 0; default: return -1; } @@ -1239,8 +1236,6 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case _INSERT: return oparg + 1; - case _JUMP_IF_INVALID: - return 0; default: return -1; } @@ -1597,7 +1592,6 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_SAVE_CURRENT_IP] = { true, INSTR_FMT_IX, 0 }, [_EXIT_TRACE] = { true, INSTR_FMT_IX, 0 }, [_INSERT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_JUMP_IF_INVALID] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, }; #endif // NEED_OPCODE_METADATA @@ -1820,7 +1814,6 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_SAVE_CURRENT_IP] = "_SAVE_CURRENT_IP", [_INSERT] = "_INSERT", - [_JUMP_IF_INVALID] = "_JUMP_IF_INVALID", }; #endif // NEED_OPCODE_METADATA diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index b7eb0b2c49c16d..04fe07fad39937 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -932,7 +932,3 @@ PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - oparg)), true); break; } - - case _JUMP_IF_INVALID: { - break; - } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f164a3db806e87..6482252eec9fb9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3973,12 +3973,6 @@ dummy_func( memmove(&stack_pointer[-1 - oparg], &stack_pointer[-oparg], oparg * sizeof(stack_pointer[0])); } - op(_JUMP_IF_INVALID, (--)) { - if (!self->base.vm_data.valid) { - pc = oparg; - } - } - // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d883b34801833a..c6eeee4925ef1f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3301,10 +3301,3 @@ stack_pointer[-1 - oparg] = top; break; } - - case _JUMP_IF_INVALID: { - if (!self->base.vm_data.valid) { - pc = oparg; - } - break; - } From 77ef6581d27824f9ea98bbb54a35d7418e1ad1f7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 23 Oct 2023 11:55:42 +0100 Subject: [PATCH 7/9] Address some review comments --- Lib/test/test_capi/test_misc.py | 2 +- Objects/codeobject.c | 2 -- Python/optimizer.c | 12 +++++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index a68c7eb87e0a92..107f26f7888b83 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2427,7 +2427,7 @@ def f{n}(): for f in funcs: f() - executors = [ get_first_executor(f) for f in funcs] + executors = [get_first_executor(f) for f in funcs] # Set things up so each executor depends on the objects # with an equal or lower index. for i, exe in enumerate(executors): diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 9e5e84b2d8d2f0..f662b8e354bb1e 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -63,8 +63,6 @@ notify_code_watchers(PyCodeEvent event, PyCodeObject *co) } } - - int PyCode_AddWatcher(PyCode_WatchCallback callback) { diff --git a/Python/optimizer.c b/Python/optimizer.c index 30248f8d60747e..ab5ab5bf1562f3 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1094,11 +1094,13 @@ unlink_executor(_PyExecutorObject *executor) if (next != NULL) { next->vm_data.links.previous = prev; } - else if (prev == NULL) { - // Both are NULL, so this must be the list head. - PyInterpreterState *interp = PyInterpreterState_Get(); - assert(interp->executor_list_head == executor); - interp->executor_list_head = executor->vm_data.links.next; + else { + if (prev == NULL) { + // Both next and prev are NULL, so this must be the list head. + PyInterpreterState *interp = PyInterpreterState_Get(); + assert(interp->executor_list_head == executor); + interp->executor_list_head = executor->vm_data.links.next; + } } if (prev != NULL) { prev->vm_data.links.next = next; From 41f54b8b1bab830b3b5b9076b463db4225ed9fbf Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 23 Oct 2023 12:13:20 +0100 Subject: [PATCH 8/9] Use a single 64 bit hash function and more clearly document how the bloom filter works. --- Python/optimizer.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index ab5ab5bf1562f3..8de572ee26f149 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1002,30 +1002,22 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) #define K 6 +#define SEED 20221211 + /* TO DO -- Use more modern hash functions with better distribution of bits */ -static uint32_t -address_to_hash(void *ptr, uint32_t seed, uint32_t multiplier) { +static uint64_t +address_to_hash(void *ptr) { assert(ptr != NULL); - uint32_t uhash = seed; + uint64_t uhash = SEED; uintptr_t addr = (uintptr_t)ptr; for (int i = 0; i < SIZEOF_VOID_P; i++) { uhash ^= addr & 255; - uhash *= multiplier; + uhash *= (uint64_t)_PyHASH_MULTIPLIER; addr >>= 8; } return uhash; } -static const uint32_t multipliers[2] = { - _PyHASH_MULTIPLIER, - 110351524, -}; - -static const uint32_t seeds[2] = { - 20221211, - 2147483647, -}; - void _Py_BloomFilter_Init(_PyBloomFilter *bloom) { @@ -1034,17 +1026,20 @@ _Py_BloomFilter_Init(_PyBloomFilter *bloom) } } +/* We want K hash functions that each set 1 bit. + * A hash function that sets 1 bit in M bits can be trivially + * derived from a log2(M) bit hash function. + * So we extract 8 (log2(256)) bits at a time from + * the 64bit hash. */ void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *ptr) { - /* Set k (6) bits */ - for (int i = 0; i < 2; i++) { - uint32_t hash = address_to_hash(ptr, seeds[i], multipliers[i]); - for (int j = 0; j < (K/2); j++) { - uint8_t bits = hash & 255; - bloom->bits[bits >> 5] |= (1 << (bits&31)); - hash >>= 8; - } + uint64_t hash = address_to_hash(ptr); + assert(K <= 8); + for (int i = 0; i < K; i++) { + uint8_t bits = hash & 255; + bloom->bits[bits >> 5] |= (1 << (bits&31)); + hash >>= 8; } } From 11985eaea519d2a4bb7d206152edf5f2db9fd625 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 23 Oct 2023 13:59:02 +0100 Subject: [PATCH 9/9] Fix unlinking of executors --- Python/optimizer.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 8de572ee26f149..052f2689f39b1a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1075,6 +1075,8 @@ link_executor(_PyExecutorObject *executor) head->vm_data.links.next = executor; } executor->vm_data.linked = true; + /* executor_list_head must be first in list */ + assert(interp->executor_list_head->vm_data.links.previous == NULL); } static void @@ -1089,17 +1091,15 @@ unlink_executor(_PyExecutorObject *executor) if (next != NULL) { next->vm_data.links.previous = prev; } - else { - if (prev == NULL) { - // Both next and prev are NULL, so this must be the list head. - PyInterpreterState *interp = PyInterpreterState_Get(); - assert(interp->executor_list_head == executor); - interp->executor_list_head = executor->vm_data.links.next; - } - } if (prev != NULL) { prev->vm_data.links.next = next; } + else { + // prev == NULL implies that executor is the list head + PyInterpreterState *interp = PyInterpreterState_Get(); + assert(interp->executor_list_head == executor); + interp->executor_list_head = next; + } executor->vm_data.linked = false; }