From 385a66368a1bb7832420bc72fef866e28945dd02 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 19 Apr 2025 04:58:15 +0800 Subject: [PATCH 1/2] Eliminate guards for constant CALL_BUILTIN_O/FAST --- Include/internal/pycore_opcode_metadata.h | 4 +- Include/internal/pycore_uop_ids.h | 370 +++++++++++----------- Include/internal/pycore_uop_metadata.h | 19 +- Lib/test/test_capi/test_opt.py | 82 +++-- Python/bytecodes.c | 37 ++- Python/executor_cases.c.h | 149 ++++++++- Python/generated_cases.c.h | 45 ++- Python/optimizer_bytecodes.c | 26 ++ Python/optimizer_cases.c.h | 32 ++ 9 files changed, 516 insertions(+), 248 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 521f7a92cf08c4..b77860f51a9138 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1334,9 +1334,9 @@ _PyOpcode_macro_expansion[256] = { [CALL_BOUND_METHOD_EXACT_ARGS] = { .nuops = 9, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_CALL_BOUND_METHOD_EXACT_ARGS, OPARG_SIMPLE, 1 }, { _INIT_CALL_BOUND_METHOD_EXACT_ARGS, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_BOUND_METHOD_GENERAL] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_METHOD_VERSION, 2, 1 }, { _EXPAND_METHOD, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_BUILTIN_CLASS] = { .nuops = 2, .uops = { { _CALL_BUILTIN_CLASS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, - [CALL_BUILTIN_FAST] = { .nuops = 2, .uops = { { _CALL_BUILTIN_FAST, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, + [CALL_BUILTIN_FAST] = { .nuops = 3, .uops = { { _GUARD_CALL_BUILTIN_FAST, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_FAST, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { .nuops = 2, .uops = { { _CALL_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, - [CALL_BUILTIN_O] = { .nuops = 2, .uops = { { _CALL_BUILTIN_O, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, + [CALL_BUILTIN_O] = { .nuops = 3, .uops = { { _GUARD_CALL_BUILTIN_O, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_O, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_INTRINSIC_1] = { .nuops = 1, .uops = { { _CALL_INTRINSIC_1, OPARG_SIMPLE, 0 } } }, [CALL_INTRINSIC_2] = { .nuops = 1, .uops = { { _CALL_INTRINSIC_2, OPARG_SIMPLE, 0 } } }, [CALL_ISINSTANCE] = { .nuops = 1, .uops = { { _CALL_ISINSTANCE, OPARG_SIMPLE, 3 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index e9a536919da598..090a43d1208cc2 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -38,119 +38,123 @@ extern "C" { #define _CALL_BUILTIN_FAST 320 #define _CALL_BUILTIN_FAST_WITH_KEYWORDS 321 #define _CALL_BUILTIN_O 322 +#define _CALL_BUILTIN_O_0 323 +#define _CALL_BUILTIN_O_1 324 #define _CALL_INTRINSIC_1 CALL_INTRINSIC_1 #define _CALL_INTRINSIC_2 CALL_INTRINSIC_2 #define _CALL_ISINSTANCE CALL_ISINSTANCE -#define _CALL_KW_NON_PY 323 +#define _CALL_KW_NON_PY 325 #define _CALL_LEN CALL_LEN #define _CALL_LIST_APPEND CALL_LIST_APPEND -#define _CALL_METHOD_DESCRIPTOR_FAST 324 -#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 325 -#define _CALL_METHOD_DESCRIPTOR_NOARGS 326 -#define _CALL_METHOD_DESCRIPTOR_O 327 -#define _CALL_NON_PY_GENERAL 328 -#define _CALL_STR_1 329 -#define _CALL_TUPLE_1 330 +#define _CALL_METHOD_DESCRIPTOR_FAST 326 +#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 327 +#define _CALL_METHOD_DESCRIPTOR_NOARGS 328 +#define _CALL_METHOD_DESCRIPTOR_O 329 +#define _CALL_NON_PY_GENERAL 330 +#define _CALL_STR_1 331 +#define _CALL_TUPLE_1 332 #define _CALL_TYPE_1 CALL_TYPE_1 -#define _CHECK_AND_ALLOCATE_OBJECT 331 -#define _CHECK_ATTR_CLASS 332 -#define _CHECK_ATTR_METHOD_LAZY_DICT 333 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 334 +#define _CHECK_AND_ALLOCATE_OBJECT 333 +#define _CHECK_ATTR_CLASS 334 +#define _CHECK_ATTR_METHOD_LAZY_DICT 335 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 336 #define _CHECK_EG_MATCH CHECK_EG_MATCH #define _CHECK_EXC_MATCH CHECK_EXC_MATCH -#define _CHECK_FUNCTION 335 -#define _CHECK_FUNCTION_EXACT_ARGS 336 -#define _CHECK_FUNCTION_VERSION 337 -#define _CHECK_FUNCTION_VERSION_INLINE 338 -#define _CHECK_FUNCTION_VERSION_KW 339 -#define _CHECK_IS_NOT_PY_CALLABLE 340 -#define _CHECK_IS_NOT_PY_CALLABLE_KW 341 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 342 -#define _CHECK_METHOD_VERSION 343 -#define _CHECK_METHOD_VERSION_KW 344 -#define _CHECK_PEP_523 345 -#define _CHECK_PERIODIC 346 -#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 347 -#define _CHECK_STACK_SPACE 348 -#define _CHECK_STACK_SPACE_OPERAND 349 -#define _CHECK_VALIDITY 350 -#define _COMPARE_OP 351 -#define _COMPARE_OP_FLOAT 352 -#define _COMPARE_OP_INT 353 -#define _COMPARE_OP_STR 354 -#define _CONTAINS_OP 355 -#define _CONTAINS_OP_DICT 356 -#define _CONTAINS_OP_SET 357 +#define _CHECK_FUNCTION 337 +#define _CHECK_FUNCTION_EXACT_ARGS 338 +#define _CHECK_FUNCTION_VERSION 339 +#define _CHECK_FUNCTION_VERSION_INLINE 340 +#define _CHECK_FUNCTION_VERSION_KW 341 +#define _CHECK_IS_NOT_PY_CALLABLE 342 +#define _CHECK_IS_NOT_PY_CALLABLE_KW 343 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 344 +#define _CHECK_METHOD_VERSION 345 +#define _CHECK_METHOD_VERSION_KW 346 +#define _CHECK_PEP_523 347 +#define _CHECK_PERIODIC 348 +#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 349 +#define _CHECK_STACK_SPACE 350 +#define _CHECK_STACK_SPACE_OPERAND 351 +#define _CHECK_VALIDITY 352 +#define _COMPARE_OP 353 +#define _COMPARE_OP_FLOAT 354 +#define _COMPARE_OP_INT 355 +#define _COMPARE_OP_STR 356 +#define _CONTAINS_OP 357 +#define _CONTAINS_OP_DICT 358 +#define _CONTAINS_OP_SET 359 #define _CONVERT_VALUE CONVERT_VALUE #define _COPY COPY #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 358 +#define _CREATE_INIT_FRAME 360 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 359 +#define _DEOPT 361 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 360 -#define _DO_CALL_FUNCTION_EX 361 -#define _DO_CALL_KW 362 +#define _DO_CALL 362 +#define _DO_CALL_FUNCTION_EX 363 +#define _DO_CALL_KW 364 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 363 +#define _ERROR_POP_N 365 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 364 -#define _EXPAND_METHOD_KW 365 -#define _FATAL_ERROR 366 +#define _EXPAND_METHOD 366 +#define _EXPAND_METHOD_KW 367 +#define _FATAL_ERROR 368 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 367 -#define _FOR_ITER_GEN_FRAME 368 -#define _FOR_ITER_TIER_TWO 369 +#define _FOR_ITER 369 +#define _FOR_ITER_GEN_FRAME 370 +#define _FOR_ITER_TIER_TWO 371 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 370 -#define _GUARD_DORV_NO_DICT 371 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 372 -#define _GUARD_GLOBALS_VERSION 373 -#define _GUARD_IS_FALSE_POP 374 -#define _GUARD_IS_NONE_POP 375 -#define _GUARD_IS_NOT_NONE_POP 376 -#define _GUARD_IS_TRUE_POP 377 -#define _GUARD_KEYS_VERSION 378 -#define _GUARD_NOS_DICT 379 -#define _GUARD_NOS_FLOAT 380 -#define _GUARD_NOS_INT 381 -#define _GUARD_NOS_LIST 382 -#define _GUARD_NOS_TUPLE 383 -#define _GUARD_NOS_UNICODE 384 -#define _GUARD_NOT_EXHAUSTED_LIST 385 -#define _GUARD_NOT_EXHAUSTED_RANGE 386 -#define _GUARD_NOT_EXHAUSTED_TUPLE 387 -#define _GUARD_TOS_ANY_SET 388 -#define _GUARD_TOS_DICT 389 -#define _GUARD_TOS_FLOAT 390 -#define _GUARD_TOS_INT 391 -#define _GUARD_TOS_LIST 392 -#define _GUARD_TOS_TUPLE 393 -#define _GUARD_TOS_UNICODE 394 -#define _GUARD_TYPE_VERSION 395 -#define _GUARD_TYPE_VERSION_AND_LOCK 396 +#define _GUARD_BINARY_OP_EXTEND 372 +#define _GUARD_CALL_BUILTIN_FAST 373 +#define _GUARD_CALL_BUILTIN_O 374 +#define _GUARD_DORV_NO_DICT 375 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 376 +#define _GUARD_GLOBALS_VERSION 377 +#define _GUARD_IS_FALSE_POP 378 +#define _GUARD_IS_NONE_POP 379 +#define _GUARD_IS_NOT_NONE_POP 380 +#define _GUARD_IS_TRUE_POP 381 +#define _GUARD_KEYS_VERSION 382 +#define _GUARD_NOS_DICT 383 +#define _GUARD_NOS_FLOAT 384 +#define _GUARD_NOS_INT 385 +#define _GUARD_NOS_LIST 386 +#define _GUARD_NOS_TUPLE 387 +#define _GUARD_NOS_UNICODE 388 +#define _GUARD_NOT_EXHAUSTED_LIST 389 +#define _GUARD_NOT_EXHAUSTED_RANGE 390 +#define _GUARD_NOT_EXHAUSTED_TUPLE 391 +#define _GUARD_TOS_ANY_SET 392 +#define _GUARD_TOS_DICT 393 +#define _GUARD_TOS_FLOAT 394 +#define _GUARD_TOS_INT 395 +#define _GUARD_TOS_LIST 396 +#define _GUARD_TOS_TUPLE 397 +#define _GUARD_TOS_UNICODE 398 +#define _GUARD_TYPE_VERSION 399 +#define _GUARD_TYPE_VERSION_AND_LOCK 400 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 397 -#define _INIT_CALL_PY_EXACT_ARGS 398 -#define _INIT_CALL_PY_EXACT_ARGS_0 399 -#define _INIT_CALL_PY_EXACT_ARGS_1 400 -#define _INIT_CALL_PY_EXACT_ARGS_2 401 -#define _INIT_CALL_PY_EXACT_ARGS_3 402 -#define _INIT_CALL_PY_EXACT_ARGS_4 403 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 401 +#define _INIT_CALL_PY_EXACT_ARGS 402 +#define _INIT_CALL_PY_EXACT_ARGS_0 403 +#define _INIT_CALL_PY_EXACT_ARGS_1 404 +#define _INIT_CALL_PY_EXACT_ARGS_2 405 +#define _INIT_CALL_PY_EXACT_ARGS_3 406 +#define _INIT_CALL_PY_EXACT_ARGS_4 407 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -160,163 +164,163 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 404 +#define _IS_NONE 408 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 405 -#define _ITER_CHECK_RANGE 406 -#define _ITER_CHECK_TUPLE 407 -#define _ITER_JUMP_LIST 408 -#define _ITER_JUMP_RANGE 409 -#define _ITER_JUMP_TUPLE 410 -#define _ITER_NEXT_LIST 411 -#define _ITER_NEXT_LIST_TIER_TWO 412 -#define _ITER_NEXT_RANGE 413 -#define _ITER_NEXT_TUPLE 414 -#define _JUMP_TO_TOP 415 +#define _ITER_CHECK_LIST 409 +#define _ITER_CHECK_RANGE 410 +#define _ITER_CHECK_TUPLE 411 +#define _ITER_JUMP_LIST 412 +#define _ITER_JUMP_RANGE 413 +#define _ITER_JUMP_TUPLE 414 +#define _ITER_NEXT_LIST 415 +#define _ITER_NEXT_LIST_TIER_TWO 416 +#define _ITER_NEXT_RANGE 417 +#define _ITER_NEXT_TUPLE 418 +#define _JUMP_TO_TOP 419 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 416 -#define _LOAD_ATTR_CLASS 417 +#define _LOAD_ATTR 420 +#define _LOAD_ATTR_CLASS 421 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 418 -#define _LOAD_ATTR_METHOD_LAZY_DICT 419 -#define _LOAD_ATTR_METHOD_NO_DICT 420 -#define _LOAD_ATTR_METHOD_WITH_VALUES 421 -#define _LOAD_ATTR_MODULE 422 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 423 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 424 -#define _LOAD_ATTR_PROPERTY_FRAME 425 -#define _LOAD_ATTR_SLOT 426 -#define _LOAD_ATTR_WITH_HINT 427 +#define _LOAD_ATTR_INSTANCE_VALUE 422 +#define _LOAD_ATTR_METHOD_LAZY_DICT 423 +#define _LOAD_ATTR_METHOD_NO_DICT 424 +#define _LOAD_ATTR_METHOD_WITH_VALUES 425 +#define _LOAD_ATTR_MODULE 426 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 427 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 428 +#define _LOAD_ATTR_PROPERTY_FRAME 429 +#define _LOAD_ATTR_SLOT 430 +#define _LOAD_ATTR_WITH_HINT 431 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 428 +#define _LOAD_BYTECODE 432 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST #define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL -#define _LOAD_CONST_INLINE 429 -#define _LOAD_CONST_INLINE_BORROW 430 +#define _LOAD_CONST_INLINE 433 +#define _LOAD_CONST_INLINE_BORROW 434 #define _LOAD_CONST_MORTAL LOAD_CONST_MORTAL #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 431 -#define _LOAD_FAST_0 432 -#define _LOAD_FAST_1 433 -#define _LOAD_FAST_2 434 -#define _LOAD_FAST_3 435 -#define _LOAD_FAST_4 436 -#define _LOAD_FAST_5 437 -#define _LOAD_FAST_6 438 -#define _LOAD_FAST_7 439 +#define _LOAD_FAST 435 +#define _LOAD_FAST_0 436 +#define _LOAD_FAST_1 437 +#define _LOAD_FAST_2 438 +#define _LOAD_FAST_3 439 +#define _LOAD_FAST_4 440 +#define _LOAD_FAST_5 441 +#define _LOAD_FAST_6 442 +#define _LOAD_FAST_7 443 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 440 -#define _LOAD_FAST_BORROW_0 441 -#define _LOAD_FAST_BORROW_1 442 -#define _LOAD_FAST_BORROW_2 443 -#define _LOAD_FAST_BORROW_3 444 -#define _LOAD_FAST_BORROW_4 445 -#define _LOAD_FAST_BORROW_5 446 -#define _LOAD_FAST_BORROW_6 447 -#define _LOAD_FAST_BORROW_7 448 +#define _LOAD_FAST_BORROW 444 +#define _LOAD_FAST_BORROW_0 445 +#define _LOAD_FAST_BORROW_1 446 +#define _LOAD_FAST_BORROW_2 447 +#define _LOAD_FAST_BORROW_3 448 +#define _LOAD_FAST_BORROW_4 449 +#define _LOAD_FAST_BORROW_5 450 +#define _LOAD_FAST_BORROW_6 451 +#define _LOAD_FAST_BORROW_7 452 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 449 -#define _LOAD_GLOBAL_BUILTINS 450 -#define _LOAD_GLOBAL_MODULE 451 +#define _LOAD_GLOBAL 453 +#define _LOAD_GLOBAL_BUILTINS 454 +#define _LOAD_GLOBAL_MODULE 455 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 452 -#define _LOAD_SMALL_INT_0 453 -#define _LOAD_SMALL_INT_1 454 -#define _LOAD_SMALL_INT_2 455 -#define _LOAD_SMALL_INT_3 456 +#define _LOAD_SMALL_INT 456 +#define _LOAD_SMALL_INT_0 457 +#define _LOAD_SMALL_INT_1 458 +#define _LOAD_SMALL_INT_2 459 +#define _LOAD_SMALL_INT_3 460 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 457 +#define _MAKE_CALLARGS_A_TUPLE 461 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 458 +#define _MAKE_WARM 462 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 459 -#define _MAYBE_EXPAND_METHOD_KW 460 -#define _MONITOR_CALL 461 -#define _MONITOR_CALL_KW 462 -#define _MONITOR_JUMP_BACKWARD 463 -#define _MONITOR_RESUME 464 +#define _MAYBE_EXPAND_METHOD 463 +#define _MAYBE_EXPAND_METHOD_KW 464 +#define _MONITOR_CALL 465 +#define _MONITOR_CALL_KW 466 +#define _MONITOR_JUMP_BACKWARD 467 +#define _MONITOR_RESUME 468 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 465 -#define _POP_JUMP_IF_TRUE 466 +#define _POP_JUMP_IF_FALSE 469 +#define _POP_JUMP_IF_TRUE 470 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 467 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 468 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 469 +#define _POP_TOP_LOAD_CONST_INLINE 471 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 472 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 473 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 470 +#define _PUSH_FRAME 474 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 471 -#define _PY_FRAME_GENERAL 472 -#define _PY_FRAME_KW 473 -#define _QUICKEN_RESUME 474 -#define _REPLACE_WITH_TRUE 475 +#define _PUSH_NULL_CONDITIONAL 475 +#define _PY_FRAME_GENERAL 476 +#define _PY_FRAME_KW 477 +#define _QUICKEN_RESUME 478 +#define _REPLACE_WITH_TRUE 479 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 476 -#define _SEND 477 -#define _SEND_GEN_FRAME 478 +#define _SAVE_RETURN_OFFSET 480 +#define _SEND 481 +#define _SEND_GEN_FRAME 482 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 479 -#define _STORE_ATTR 480 -#define _STORE_ATTR_INSTANCE_VALUE 481 -#define _STORE_ATTR_SLOT 482 -#define _STORE_ATTR_WITH_HINT 483 +#define _START_EXECUTOR 483 +#define _STORE_ATTR 484 +#define _STORE_ATTR_INSTANCE_VALUE 485 +#define _STORE_ATTR_SLOT 486 +#define _STORE_ATTR_WITH_HINT 487 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 484 -#define _STORE_FAST_0 485 -#define _STORE_FAST_1 486 -#define _STORE_FAST_2 487 -#define _STORE_FAST_3 488 -#define _STORE_FAST_4 489 -#define _STORE_FAST_5 490 -#define _STORE_FAST_6 491 -#define _STORE_FAST_7 492 +#define _STORE_FAST 488 +#define _STORE_FAST_0 489 +#define _STORE_FAST_1 490 +#define _STORE_FAST_2 491 +#define _STORE_FAST_3 492 +#define _STORE_FAST_4 493 +#define _STORE_FAST_5 494 +#define _STORE_FAST_6 495 +#define _STORE_FAST_7 496 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 493 -#define _STORE_SUBSCR 494 -#define _STORE_SUBSCR_DICT 495 -#define _STORE_SUBSCR_LIST_INT 496 +#define _STORE_SLICE 497 +#define _STORE_SUBSCR 498 +#define _STORE_SUBSCR_DICT 499 +#define _STORE_SUBSCR_LIST_INT 500 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 497 -#define _TO_BOOL 498 +#define _TIER2_RESUME_CHECK 501 +#define _TO_BOOL 502 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 499 +#define _TO_BOOL_LIST 503 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 500 +#define _TO_BOOL_STR 504 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 501 -#define _UNPACK_SEQUENCE_LIST 502 -#define _UNPACK_SEQUENCE_TUPLE 503 -#define _UNPACK_SEQUENCE_TWO_TUPLE 504 +#define _UNPACK_SEQUENCE 505 +#define _UNPACK_SEQUENCE_LIST 506 +#define _UNPACK_SEQUENCE_TUPLE 507 +#define _UNPACK_SEQUENCE_TWO_TUPLE 508 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 504 +#define MAX_UOP_ID 508 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 874756770c1871..3d02360d0ae924 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -247,8 +247,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_EXIT_INIT_CHECK] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_CLASS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_CALL_BUILTIN_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG, + [_CALL_BUILTIN_O_0] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_BUILTIN_O_1] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_CALL_BUILTIN_FAST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_CALL_BUILTIN_FAST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_CALL_BUILTIN_FAST] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_LEN] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CALL_ISINSTANCE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, @@ -305,6 +309,7 @@ const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_SMALL_INT] = 4, [_STORE_FAST] = 8, [_INIT_CALL_PY_EXACT_ARGS] = 5, + [_CALL_BUILTIN_O] = 2, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { @@ -335,6 +340,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CALL_BUILTIN_FAST] = "_CALL_BUILTIN_FAST", [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = "_CALL_BUILTIN_FAST_WITH_KEYWORDS", [_CALL_BUILTIN_O] = "_CALL_BUILTIN_O", + [_CALL_BUILTIN_O_0] = "_CALL_BUILTIN_O_0", + [_CALL_BUILTIN_O_1] = "_CALL_BUILTIN_O_1", [_CALL_INTRINSIC_1] = "_CALL_INTRINSIC_1", [_CALL_INTRINSIC_2] = "_CALL_INTRINSIC_2", [_CALL_ISINSTANCE] = "_CALL_ISINSTANCE", @@ -410,6 +417,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GET_LEN] = "_GET_LEN", [_GET_YIELD_FROM_ITER] = "_GET_YIELD_FROM_ITER", [_GUARD_BINARY_OP_EXTEND] = "_GUARD_BINARY_OP_EXTEND", + [_GUARD_CALL_BUILTIN_FAST] = "_GUARD_CALL_BUILTIN_FAST", + [_GUARD_CALL_BUILTIN_O] = "_GUARD_CALL_BUILTIN_O", [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", @@ -1046,8 +1055,16 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _CALL_BUILTIN_CLASS: return 2 + oparg; + case _GUARD_CALL_BUILTIN_O: + return 0; + case _CALL_BUILTIN_O_0: + return 2 + oparg; + case _CALL_BUILTIN_O_1: + return 2 + oparg; case _CALL_BUILTIN_O: return 2 + oparg; + case _GUARD_CALL_BUILTIN_FAST: + return 0; case _CALL_BUILTIN_FAST: return 2 + oparg; case _CALL_BUILTIN_FAST_WITH_KEYWORDS: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 34b7c5982245c7..6612d0aa776c1f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -625,24 +625,9 @@ def dummy(x): def test_promote_globals_to_constants(self): - result = script_helper.run_python_until_end('-c', textwrap.dedent(""" - import _testinternalcapi - import opcode - import _opcode - - def get_first_executor(func): - code = func.__code__ - co_code = code.co_code - for i in range(0, len(co_code), 2): - try: - return _opcode.get_executor(code, i) - except ValueError: - pass - return None - - def get_opnames(ex): - return {item[0] for item in ex} - + result = script_helper.run_python_until_end('-c', textwrap.dedent(f""" + {EXTERNAL_SETUP} + def testfunc(n): for i in range(n): x = range(i) @@ -1767,6 +1752,67 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_UNICODE", uops) self.assertIn("_BINARY_OP_ADD_UNICODE", uops) + def test_constant_promotion_guard_eliminate_call_builtin_o(self): + result = script_helper.run_python_until_end('-c', textwrap.dedent(f""" + import _testcapi + {EXTERNAL_SETUP} + + def testfunc(n): + for _ in range(n): + # The guard for this function should be eliminated. + _testcapi.meth_o("") + + testfunc(_testinternalcapi.TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + assert ex is not None + uops = get_opnames(ex) + assert "_LOAD_GLOBAL_BUILTINS" not in uops + assert "_GUARD_CALL_BUILTIN_O" not in uops + assert "_CALL_BUILTIN_O_1" in uops + """), PYTHON_JIT="1") + self.assertEqual(result[0].rc, 0, result) + + def test_constant_promotion_guard_eliminate_call_builtin_fast(self): + result = script_helper.run_python_until_end('-c', textwrap.dedent(f""" + import _testcapi + {EXTERNAL_SETUP} + + def testfunc(n): + for _ in range(n): + # The guard for this function should be eliminated. + _testcapi.meth_fastcall("") + + testfunc(_testinternalcapi.TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + assert ex is not None + uops = get_opnames(ex) + assert "_LOAD_GLOBAL_BUILTINS" not in uops + assert "_GUARD_CALL_BUILTIN_FAST" not in uops + assert "_CALL_BUILTIN_FAST" in uops + """), PYTHON_JIT="1") + self.assertEqual(result[0].rc, 0, result) + + +EXTERNAL_SETUP = """ + import _testinternalcapi + import opcode + import _opcode + + def get_first_executor(func): + code = func.__code__ + co_code = code.co_code + for i in range(0, len(co_code), 2): + try: + return _opcode.get_executor(code, i) + except ValueError: + pass + return None + + def get_opnames(ex): + return {item[0] for item in ex} +""" def global_identity(x): return x diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2796c3f2e85732..413cf3d19243ab 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4112,23 +4112,31 @@ dummy_func( _CALL_BUILTIN_CLASS + _CHECK_PERIODIC; - op(_CALL_BUILTIN_O, (callable, self_or_null, args[oparg] -- res)) { - /* Builtin METH_O functions */ + op(_GUARD_CALL_BUILTIN_O, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - - int total_args = oparg; - if (!PyStackRef_IsNull(self_or_null)) { - args--; - total_args++; - } + int total_args = oparg + (!PyStackRef_IsNull(self_or_null)); EXIT_IF(total_args != 1); EXIT_IF(!PyCFunction_CheckExact(callable_o)); EXIT_IF(PyCFunction_GET_FLAGS(callable_o) != METH_O); + } + + replicate(2) op(_CALL_BUILTIN_O, (callable, self_or_null, args[oparg] -- res)) { + /* Builtin METH_O functions */ + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + + _PyStackRef arg; + if (oparg == 0) { + assert(!PyStackRef_IsNull(self_or_null)); + arg = self_or_null; + } + else { + arg = args[0]; + } + // CPython promises to check all non-vectorcall function calls. EXIT_IF(_Py_ReachedRecursionLimit(tstate)); STAT_INC(CALL, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o); - _PyStackRef arg = args[0]; PyObject *res_o = _PyCFunction_TrampolineCall(cfunc, PyCFunction_GET_SELF(callable_o), PyStackRef_AsPyObjectBorrow(arg)); _Py_LeaveRecursiveCallTstate(tstate); assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); @@ -4144,9 +4152,16 @@ dummy_func( macro(CALL_BUILTIN_O) = unused/1 + unused/2 + + _GUARD_CALL_BUILTIN_O + _CALL_BUILTIN_O + _CHECK_PERIODIC; + op(_GUARD_CALL_BUILTIN_FAST, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + DEOPT_IF(!PyCFunction_CheckExact(callable_o)); + DEOPT_IF(PyCFunction_GET_FLAGS(callable_o) != METH_FASTCALL); + } + op(_CALL_BUILTIN_FAST, (callable, self_or_null, args[oparg] -- res)) { /* Builtin METH_FASTCALL functions, without keywords */ PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); @@ -4157,8 +4172,7 @@ dummy_func( arguments--; total_args++; } - DEOPT_IF(!PyCFunction_CheckExact(callable_o)); - DEOPT_IF(PyCFunction_GET_FLAGS(callable_o) != METH_FASTCALL); + STAT_INC(CALL, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o); /* res = func(self, args, nargs) */ @@ -4181,6 +4195,7 @@ dummy_func( macro(CALL_BUILTIN_FAST) = unused/1 + unused/2 + + _GUARD_CALL_BUILTIN_FAST + _CALL_BUILTIN_FAST + _CHECK_PERIODIC; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 122285ba12e6d1..60f097e83a7545 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5414,21 +5414,14 @@ break; } - case _CALL_BUILTIN_O: { - _PyStackRef *args; + case _GUARD_CALL_BUILTIN_O: { _PyStackRef self_or_null; _PyStackRef callable; - _PyStackRef res; oparg = CURRENT_OPARG(); - args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - int total_args = oparg; - if (!PyStackRef_IsNull(self_or_null)) { - args--; - total_args++; - } + int total_args = oparg + (!PyStackRef_IsNull(self_or_null)); if (total_args != 1) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -5441,13 +5434,34 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + break; + } + + case _CALL_BUILTIN_O_0: { + _PyStackRef *args; + _PyStackRef self_or_null; + _PyStackRef callable; + _PyStackRef res; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + _PyStackRef arg; + if (oparg == 0) { + assert(!PyStackRef_IsNull(self_or_null)); + arg = self_or_null; + } + else { + arg = args[0]; + } if (_Py_ReachedRecursionLimit(tstate)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } STAT_INC(CALL, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o); - _PyStackRef arg = args[0]; _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = _PyCFunction_TrampolineCall(cfunc, PyCFunction_GET_SELF(callable_o), PyStackRef_AsPyObjectBorrow(arg)); stack_pointer = _PyFrame_GetStackPointer(frame); @@ -5471,7 +5485,55 @@ break; } - case _CALL_BUILTIN_FAST: { + case _CALL_BUILTIN_O_1: { + _PyStackRef *args; + _PyStackRef self_or_null; + _PyStackRef callable; + _PyStackRef res; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + _PyStackRef arg; + if (oparg == 0) { + assert(!PyStackRef_IsNull(self_or_null)); + arg = self_or_null; + } + else { + arg = args[0]; + } + if (_Py_ReachedRecursionLimit(tstate)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(CALL, hit); + PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *res_o = _PyCFunction_TrampolineCall(cfunc, PyCFunction_GET_SELF(callable_o), PyStackRef_AsPyObjectBorrow(arg)); + stack_pointer = _PyFrame_GetStackPointer(frame); + _Py_LeaveRecursiveCallTstate(tstate); + assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(arg); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (res_o == NULL) { + JUMP_TO_ERROR(); + } + res = PyStackRef_FromPyObjectSteal(res_o); + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CALL_BUILTIN_O: { _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; @@ -5481,12 +5543,48 @@ self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - int total_args = oparg; - _PyStackRef *arguments = args; - if (!PyStackRef_IsNull(self_or_null)) { - arguments--; - total_args++; + _PyStackRef arg; + if (oparg == 0) { + assert(!PyStackRef_IsNull(self_or_null)); + arg = self_or_null; + } + else { + arg = args[0]; + } + if (_Py_ReachedRecursionLimit(tstate)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); } + STAT_INC(CALL, hit); + PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *res_o = _PyCFunction_TrampolineCall(cfunc, PyCFunction_GET_SELF(callable_o), PyStackRef_AsPyObjectBorrow(arg)); + stack_pointer = _PyFrame_GetStackPointer(frame); + _Py_LeaveRecursiveCallTstate(tstate); + assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(arg); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (res_o == NULL) { + JUMP_TO_ERROR(); + } + res = PyStackRef_FromPyObjectSteal(res_o); + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _GUARD_CALL_BUILTIN_FAST: { + _PyStackRef callable; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-2 - oparg]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); if (!PyCFunction_CheckExact(callable_o)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -5495,6 +5593,25 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + break; + } + + case _CALL_BUILTIN_FAST: { + _PyStackRef *args; + _PyStackRef self_or_null; + _PyStackRef callable; + _PyStackRef res; + oparg = CURRENT_OPARG(); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + int total_args = oparg; + _PyStackRef *arguments = args; + if (!PyStackRef_IsNull(self_or_null)) { + arguments--; + total_args++; + } STAT_INC(CALL, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o); STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index cc85405f80b4c0..6812cfbd5e823e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2034,18 +2034,10 @@ _PyStackRef res; /* Skip 1 cache entry */ /* Skip 2 cache entries */ - // _CALL_BUILTIN_FAST + // _GUARD_CALL_BUILTIN_FAST { - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - int total_args = oparg; - _PyStackRef *arguments = args; - if (!PyStackRef_IsNull(self_or_null)) { - arguments--; - total_args++; - } if (!PyCFunction_CheckExact(callable_o)) { UPDATE_MISS_STATS(CALL); assert(_PyOpcode_Deopt[opcode] == (CALL)); @@ -2056,6 +2048,18 @@ assert(_PyOpcode_Deopt[opcode] == (CALL)); JUMP_TO_PREDICTED(CALL); } + } + // _CALL_BUILTIN_FAST + { + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + int total_args = oparg; + _PyStackRef *arguments = args; + if (!PyStackRef_IsNull(self_or_null)) { + arguments--; + total_args++; + } STAT_INC(CALL, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o); STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o); @@ -2268,17 +2272,12 @@ _PyStackRef res; /* Skip 1 cache entry */ /* Skip 2 cache entries */ - // _CALL_BUILTIN_O + // _GUARD_CALL_BUILTIN_O { - args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); - int total_args = oparg; - if (!PyStackRef_IsNull(self_or_null)) { - args--; - total_args++; - } + int total_args = oparg + (!PyStackRef_IsNull(self_or_null)); if (total_args != 1) { UPDATE_MISS_STATS(CALL); assert(_PyOpcode_Deopt[opcode] == (CALL)); @@ -2294,6 +2293,19 @@ assert(_PyOpcode_Deopt[opcode] == (CALL)); JUMP_TO_PREDICTED(CALL); } + } + // _CALL_BUILTIN_O + { + args = &stack_pointer[-oparg]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + _PyStackRef arg; + if (oparg == 0) { + assert(!PyStackRef_IsNull(self_or_null)); + arg = self_or_null; + } + else { + arg = args[0]; + } if (_Py_ReachedRecursionLimit(tstate)) { UPDATE_MISS_STATS(CALL); assert(_PyOpcode_Deopt[opcode] == (CALL)); @@ -2301,7 +2313,6 @@ } STAT_INC(CALL, hit); PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o); - _PyStackRef arg = args[0]; _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = _PyCFunction_TrampolineCall(cfunc, PyCFunction_GET_SELF(callable_o), PyStackRef_AsPyObjectBorrow(arg)); stack_pointer = _PyFrame_GetStackPointer(frame); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index c5d8b536bc6341..1a0f111963941f 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -686,6 +686,32 @@ dummy_func(void) { } } + op(_GUARD_CALL_BUILTIN_O, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { + int total_args = oparg; + if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { + total_args += sym_is_not_null(self_or_null); + // Constant propagate + if (total_args == 1 && sym_is_const(ctx, callable)) { + PyObject *callable_o = sym_get_const(ctx, callable); + if (PyCFunction_CheckExact(callable_o) && + PyCFunction_GET_FLAGS(callable_o) == METH_O) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + } + } + } + + op(_GUARD_CALL_BUILTIN_FAST, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { + if (sym_is_const(ctx, callable)) { + PyObject *callable_o = sym_get_const(ctx, callable); + // Constant propagate + if (PyCFunction_CheckExact(callable_o) && + PyCFunction_GET_FLAGS(callable_o) == METH_FASTCALL) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + } + } + op(_MAYBE_EXPAND_METHOD, (callable, self_or_null, args[oparg] -- func, maybe_self, args[oparg])) { (void)args; func = sym_new_not_null(ctx); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 828f0943a8db86..f6a7e6c9af60a4 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1915,6 +1915,25 @@ break; } + case _GUARD_CALL_BUILTIN_O: { + JitOptSymbol *self_or_null; + JitOptSymbol *callable; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int total_args = oparg; + if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) { + total_args += sym_is_not_null(self_or_null); + if (total_args == 1 && sym_is_const(ctx, callable)) { + PyObject *callable_o = sym_get_const(ctx, callable); + if (PyCFunction_CheckExact(callable_o) && + PyCFunction_GET_FLAGS(callable_o) == METH_O) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + } + } + break; + } + case _CALL_BUILTIN_O: { JitOptSymbol *res; res = sym_new_not_null(ctx); @@ -1924,6 +1943,19 @@ break; } + case _GUARD_CALL_BUILTIN_FAST: { + JitOptSymbol *callable; + callable = stack_pointer[-2 - oparg]; + if (sym_is_const(ctx, callable)) { + PyObject *callable_o = sym_get_const(ctx, callable); + if (PyCFunction_CheckExact(callable_o) && + PyCFunction_GET_FLAGS(callable_o) == METH_FASTCALL) { + REPLACE_OP(this_instr, _NOP, 0 ,0); + } + } + break; + } + case _CALL_BUILTIN_FAST: { JitOptSymbol *res; res = sym_new_not_null(ctx); From e357d1367bca672645f1cccccf34fcc74335ba80 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 19 Apr 2025 05:06:24 +0800 Subject: [PATCH 2/2] lint --- Lib/test/test_capi/test_opt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 6612d0aa776c1f..2196fce26b769b 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -627,7 +627,7 @@ def test_promote_globals_to_constants(self): result = script_helper.run_python_until_end('-c', textwrap.dedent(f""" {EXTERNAL_SETUP} - + def testfunc(n): for i in range(n): x = range(i) @@ -1799,7 +1799,7 @@ def testfunc(n): import _testinternalcapi import opcode import _opcode - + def get_first_executor(func): code = func.__code__ co_code = code.co_code @@ -1809,7 +1809,7 @@ def get_first_executor(func): except ValueError: pass return None - + def get_opnames(ex): return {item[0] for item in ex} """