Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

GH-115506: Improve handling of constants in tier two #124809

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
16 changes: 8 additions & 8 deletions 16 Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion 3 Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ typedef struct _PyExecutorObject {
size_t jit_size;
void *jit_code;
void *jit_side_entry;
PyObject *refs;
_PyExitData exits[1];
} _PyExecutorObject;

Expand Down Expand Up @@ -144,7 +145,7 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);

int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame,
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
_PyBloomFilter *dependencies);
_PyBloomFilter *dependencies, PyObject *new_refs);

extern PyTypeObject _PyCounterExecutor_Type;
extern PyTypeObject _PyCounterOptimizer_Type;
Expand Down
3 changes: 2 additions & 1 deletion 3 Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,8 @@ def testfunc(n):
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertNotIn("_GUARD_BOTH_INT", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_BINARY_OP_ADD_INT", uops)
self.assertIn("_LOAD_CONST_INLINE_BORROW", uops)
# Try again, but between the runs, set the global to a float.
# This should result in no executor the second time.
ns = {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve constant propagation and folding in JIT-compiled code.
16 changes: 8 additions & 8 deletions 16 Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -495,11 +495,11 @@ dummy_func(
}

macro(BINARY_OP_MULTIPLY_INT) =
_GUARD_BOTH_INT + unused/1 + _BINARY_OP_MULTIPLY_INT;
NOP + _GUARD_BOTH_INT + unused/1 + _BINARY_OP_MULTIPLY_INT;
macro(BINARY_OP_ADD_INT) =
_GUARD_BOTH_INT + unused/1 + _BINARY_OP_ADD_INT;
NOP + _GUARD_BOTH_INT + unused/1 + _BINARY_OP_ADD_INT;
macro(BINARY_OP_SUBTRACT_INT) =
_GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT;
NOP + _GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT;

op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
Expand Down Expand Up @@ -558,11 +558,11 @@ dummy_func(
}

macro(BINARY_OP_MULTIPLY_FLOAT) =
_GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_MULTIPLY_FLOAT;
NOP + _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_MULTIPLY_FLOAT;
macro(BINARY_OP_ADD_FLOAT) =
_GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_ADD_FLOAT;
NOP + _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_ADD_FLOAT;
macro(BINARY_OP_SUBTRACT_FLOAT) =
_GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT;
NOP + _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT;

op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
Expand All @@ -585,7 +585,7 @@ dummy_func(
}

macro(BINARY_OP_ADD_UNICODE) =
_GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_ADD_UNICODE;
NOP + _GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_ADD_UNICODE;

// This is a subtle one. It's a super-instruction for
// BINARY_OP_ADD_UNICODE followed by STORE_FAST
Expand Down Expand Up @@ -634,7 +634,7 @@ dummy_func(
}

macro(BINARY_OP_INPLACE_ADD_UNICODE) =
_GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_INPLACE_ADD_UNICODE;
NOP + NOP + _GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_INPLACE_ADD_UNICODE;

family(BINARY_SUBSCR, INLINE_CACHE_ENTRIES_BINARY_SUBSCR) = {
BINARY_SUBSCR_DICT,
Expand Down
27 changes: 27 additions & 0 deletions 27 Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 38 additions & 1 deletion 39 Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ uop_dealloc(_PyExecutorObject *self) {
_PyObject_GC_UNTRACK(self);
assert(self->vm_data.code == NULL);
unlink_executor(self);
Py_CLEAR(self->refs);
#ifdef _Py_JIT
_PyJIT_Free(self);
#endif
Expand Down Expand Up @@ -360,6 +361,7 @@ static int
executor_traverse(PyObject *o, visitproc visit, void *arg)
{
_PyExecutorObject *executor = (_PyExecutorObject *)o;
Py_VISIT(executor->refs);
for (uint32_t i = 0; i < executor->exit_count; i++) {
Py_VISIT(executor->exits[i].executor);
}
Expand Down Expand Up @@ -1066,6 +1068,7 @@ allocate_executor(int exit_count, int length)
res->trace = (_PyUOpInstruction *)(res->exits + exit_count);
res->code_size = length;
res->exit_count = exit_count;
res->refs = NULL;
return res;
}

Expand Down Expand Up @@ -1247,12 +1250,19 @@ uop_optimize(
}
assert(length < UOP_MAX_TRACE_LENGTH);
OPT_STAT_INC(traces_created);
// These are any references that were created during optimization, and need
// to be kept alive until we build the executor's refs tuple:
PyObject *new_refs = PyList_New(0);
if (new_refs == NULL) {
return -1;
}
char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
if (env_var == NULL || *env_var == '\0' || *env_var > '0') {
length = _Py_uop_analyze_and_optimize(frame, buffer,
length,
curr_stackentries, &dependencies);
curr_stackentries, &dependencies, new_refs);
if (length <= 0) {
Py_DECREF(new_refs);
return length;
}
}
Expand All @@ -1274,13 +1284,39 @@ uop_optimize(
assert(_PyOpcode_uop_name[buffer[pc].opcode]);
assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
}
// We *might* want to de-duplicate these. In addition to making sure we do
// so in a way that preserves "equal" constants with different types (see
// _PyCode_ConstantKey), we *also* need to be careful to compare unknown
// objects by identity, since we don't want to invoke arbitary code in a
// __hash__/__eq__ implementation. It might be more trouble than it's worth:
int refs_needed = 0;
for (int i = 0; i < length; i++) {
if (buffer[i].opcode == _LOAD_CONST_INLINE) {
refs_needed++;
}
}
PyObject *refs = PyTuple_New(refs_needed);
if (refs == NULL) {
Py_DECREF(new_refs);
return -1;
}
int j = 0;
for (int i = 0; i < length; i++) {
if (buffer[i].opcode == _LOAD_CONST_INLINE) {
PyTuple_SET_ITEM(refs, j++, Py_NewRef(buffer[i].operand));
}
}
Py_DECREF(new_refs);
assert(j == refs_needed);
OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist);
length = prepare_for_execution(buffer, length);
assert(length <= UOP_MAX_TRACE_LENGTH);
_PyExecutorObject *executor = make_executor_from_uops(buffer, length, &dependencies);
if (executor == NULL) {
Py_DECREF(refs);
return -1;
}
executor->refs = refs;
assert(length <= UOP_MAX_TRACE_LENGTH);
*exec_ptr = executor;
return 1;
Expand Down Expand Up @@ -1584,6 +1620,7 @@ executor_clear(_PyExecutorObject *executor)
* free the executor unless we hold a strong reference to it
*/
Py_INCREF(executor);
Py_CLEAR(executor->refs);
for (uint32_t i = 0; i < executor->exit_count; i++) {
executor->exits[i].temperature = initial_unreachable_backoff_counter();
Py_CLEAR(executor->exits[i].executor);
Expand Down
36 changes: 27 additions & 9 deletions 36 Python/optimizer_analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,20 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,

#define GETLOCAL(idx) ((ctx->frame->locals[idx]))

#define REPLACE_OP(INST, OP, ARG, OPERAND) \
INST->opcode = OP; \
INST->oparg = ARG; \
INST->operand = OPERAND;
#define REPLACE_OP(INST, OP, ARG, OPERAND) \
do { \
(INST)->opcode = (OP); \
(INST)->oparg = (ARG); \
(INST)->operand = (OPERAND); \
} while (0)

#define REPLACE_OP_WITH_LOAD_CONST(INST, CONST) \
do { \
PyObject *o = (CONST); \
int opcode = _Py_IsImmortal(o) ? _LOAD_CONST_INLINE_BORROW \
: _LOAD_CONST_INLINE; \
REPLACE_OP((INST), opcode, 0, (uintptr_t)o); \
} while (0)

/* Shortened forms for convenience, used in optimizer_bytecodes.c */
#define sym_is_not_null _Py_uop_sym_is_not_null
Expand Down Expand Up @@ -392,7 +402,8 @@ optimize_uops(
_PyUOpInstruction *trace,
int trace_len,
int curr_stacklen,
_PyBloomFilter *dependencies
_PyBloomFilter *dependencies,
PyObject *new_refs
)
{

Expand Down Expand Up @@ -524,6 +535,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
last_set_ip = pc;
break;
case _POP_TOP:
case _POP_TOP_LOAD_CONST_INLINE_BORROW:
{
_PyUOpInstruction *last = &buffer[pc-1];
while (last->opcode == _NOP) {
Expand All @@ -535,9 +547,14 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
last->opcode == _COPY
) {
last->opcode = _NOP;
buffer[pc].opcode = _NOP;
if (buffer[pc].opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) {
buffer[pc].opcode = _LOAD_CONST_INLINE_BORROW;
}
else {
buffer[pc].opcode = _NOP;
}
}
if (last->opcode == _REPLACE_WITH_TRUE) {
if (last->opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) {
last->opcode = _NOP;
}
break;
Expand Down Expand Up @@ -580,7 +597,8 @@ _Py_uop_analyze_and_optimize(
_PyUOpInstruction *buffer,
int length,
int curr_stacklen,
_PyBloomFilter *dependencies
_PyBloomFilter *dependencies,
PyObject *new_refs
)
{
OPT_STAT_INC(optimizer_attempts);
Expand All @@ -592,7 +610,7 @@ _Py_uop_analyze_and_optimize(

length = optimize_uops(
_PyFrame_GetCode(frame), buffer,
length, curr_stacklen, dependencies);
length, curr_stacklen, dependencies, new_refs);

if (length <= 0) {
return length;
Expand Down
Loading
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.