From 317db3a6760c785d931174e81dfe0179f5b4d9c1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 5 Nov 2023 10:18:10 +0000 Subject: [PATCH 1/5] Replace jumps with deopts in tier 2 --- Include/internal/pycore_opcode_metadata.h | 140 +++++++++++++--------- Python/abstract_interp_cases.c.h | 14 ++- Python/bytecodes.c | 32 +++-- Python/executor_cases.c.h | 28 +++-- Python/generated_cases.c.h | 16 +-- Python/optimizer.c | 46 +++---- 6 files changed, 164 insertions(+), 112 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index bd5a401adf11bf..b87fddbd2142b6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -76,45 +76,49 @@ #define _STORE_ATTR_SLOT 348 #define _SPECIALIZE_COMPARE_OP 349 #define _COMPARE_OP 350 -#define _IS_NONE 351 -#define _SPECIALIZE_FOR_ITER 352 -#define _FOR_ITER 353 -#define _ITER_CHECK_LIST 354 -#define _ITER_JUMP_LIST 355 -#define _GUARD_NOT_EXHAUSTED_LIST 356 -#define _ITER_NEXT_LIST 357 -#define _ITER_CHECK_TUPLE 358 -#define _ITER_JUMP_TUPLE 359 -#define _GUARD_NOT_EXHAUSTED_TUPLE 360 -#define _ITER_NEXT_TUPLE 361 -#define _ITER_CHECK_RANGE 362 -#define _ITER_JUMP_RANGE 363 -#define _GUARD_NOT_EXHAUSTED_RANGE 364 -#define _ITER_NEXT_RANGE 365 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 366 -#define _GUARD_KEYS_VERSION 367 -#define _LOAD_ATTR_METHOD_WITH_VALUES 368 -#define _LOAD_ATTR_METHOD_NO_DICT 369 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 370 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 371 -#define _CHECK_ATTR_METHOD_LAZY_DICT 372 -#define _LOAD_ATTR_METHOD_LAZY_DICT 373 -#define _SPECIALIZE_CALL 374 -#define _CALL 375 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 376 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 377 -#define _CHECK_PEP_523 378 -#define _CHECK_FUNCTION_EXACT_ARGS 379 -#define _CHECK_STACK_SPACE 380 -#define _INIT_CALL_PY_EXACT_ARGS 381 -#define _PUSH_FRAME 382 -#define _SPECIALIZE_BINARY_OP 383 -#define _BINARY_OP 384 -#define _POP_JUMP_IF_FALSE 385 -#define _POP_JUMP_IF_TRUE 386 -#define _JUMP_TO_TOP 387 -#define _SAVE_RETURN_OFFSET 388 -#define _INSERT 389 +#define _POP_JUMP_IF_FALSE 351 +#define _POP_JUMP_IF_TRUE 352 +#define _IS_NONE 353 +#define _SPECIALIZE_FOR_ITER 354 +#define _FOR_ITER 355 +#define _ITER_CHECK_LIST 356 +#define _ITER_JUMP_LIST 357 +#define _GUARD_NOT_EXHAUSTED_LIST 358 +#define _ITER_NEXT_LIST 359 +#define _ITER_CHECK_TUPLE 360 +#define _ITER_JUMP_TUPLE 361 +#define _GUARD_NOT_EXHAUSTED_TUPLE 362 +#define _ITER_NEXT_TUPLE 363 +#define _ITER_CHECK_RANGE 364 +#define _ITER_JUMP_RANGE 365 +#define _GUARD_NOT_EXHAUSTED_RANGE 366 +#define _ITER_NEXT_RANGE 367 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 368 +#define _GUARD_KEYS_VERSION 369 +#define _LOAD_ATTR_METHOD_WITH_VALUES 370 +#define _LOAD_ATTR_METHOD_NO_DICT 371 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 372 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 373 +#define _CHECK_ATTR_METHOD_LAZY_DICT 374 +#define _LOAD_ATTR_METHOD_LAZY_DICT 375 +#define _SPECIALIZE_CALL 376 +#define _CALL 377 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 378 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 379 +#define _CHECK_PEP_523 380 +#define _CHECK_FUNCTION_EXACT_ARGS 381 +#define _CHECK_STACK_SPACE 382 +#define _INIT_CALL_PY_EXACT_ARGS 383 +#define _PUSH_FRAME 384 +#define _SPECIALIZE_BINARY_OP 385 +#define _BINARY_OP 386 +#define _GUARD_IS_TRUE_POP 387 +#define _GUARD_IS_FALSE_POP 388 +#define _GUARD_IS_NONE_POP 389 +#define _GUARD_IS_NOT_NONE_POP 390 +#define _JUMP_TO_TOP 391 +#define _SAVE_RETURN_OFFSET 392 +#define _INSERT 393 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -504,12 +508,16 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case ENTER_EXECUTOR: return 0; - case POP_JUMP_IF_FALSE: + case _POP_JUMP_IF_FALSE: return 1; - case POP_JUMP_IF_TRUE: + case _POP_JUMP_IF_TRUE: return 1; case _IS_NONE: return 1; + case POP_JUMP_IF_TRUE: + return 1; + case POP_JUMP_IF_FALSE: + return 1; case POP_JUMP_IF_NONE: return 1; case POP_JUMP_IF_NOT_NONE: @@ -724,9 +732,13 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case RESERVED: return 0; - case _POP_JUMP_IF_FALSE: + case _GUARD_IS_TRUE_POP: return 1; - case _POP_JUMP_IF_TRUE: + case _GUARD_IS_FALSE_POP: + return 1; + case _GUARD_IS_NONE_POP: + return 1; + case _GUARD_IS_NOT_NONE_POP: return 1; case _JUMP_TO_TOP: return 0; @@ -1132,12 +1144,16 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case ENTER_EXECUTOR: return 0; - case POP_JUMP_IF_FALSE: + case _POP_JUMP_IF_FALSE: return 0; - case POP_JUMP_IF_TRUE: + case _POP_JUMP_IF_TRUE: return 0; case _IS_NONE: return 1; + case POP_JUMP_IF_TRUE: + return 0; + case POP_JUMP_IF_FALSE: + return 0; case POP_JUMP_IF_NONE: return 0; case POP_JUMP_IF_NOT_NONE: @@ -1352,9 +1368,13 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case RESERVED: return 0; - case _POP_JUMP_IF_FALSE: + case _GUARD_IS_TRUE_POP: return 0; - case _POP_JUMP_IF_TRUE: + case _GUARD_IS_FALSE_POP: + return 0; + case _GUARD_IS_NONE_POP: + return 0; + case _GUARD_IS_NOT_NONE_POP: return 0; case _JUMP_TO_TOP: return 0; @@ -1632,9 +1652,11 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [JUMP] = { true, 0, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [JUMP_NO_INTERRUPT] = { true, 0, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG }, - [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [_IS_NONE] = { true, INSTR_FMT_IX, 0 }, + [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [JUMP_BACKWARD_NO_INTERRUPT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, @@ -1742,8 +1764,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [CACHE] = { true, INSTR_FMT_IX, 0 }, [RESERVED] = { true, INSTR_FMT_IX, 0 }, - [_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [_GUARD_IS_TRUE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [_GUARD_IS_FALSE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [_GUARD_IS_NONE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [_GUARD_IS_NOT_NONE_POP] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, [_JUMP_TO_TOP] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG }, [_SET_IP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_SAVE_RETURN_OFFSET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1863,6 +1887,10 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [CONTAINS_OP] = { .nuops = 1, .uops = { { CONTAINS_OP, 0, 0 } } }, [CHECK_EG_MATCH] = { .nuops = 1, .uops = { { CHECK_EG_MATCH, 0, 0 } } }, [CHECK_EXC_MATCH] = { .nuops = 1, .uops = { { CHECK_EXC_MATCH, 0, 0 } } }, + [POP_JUMP_IF_TRUE] = { .nuops = 1, .uops = { { _POP_JUMP_IF_TRUE, 0, 0 } } }, + [POP_JUMP_IF_FALSE] = { .nuops = 1, .uops = { { _POP_JUMP_IF_FALSE, 0, 0 } } }, + [POP_JUMP_IF_NONE] = { .nuops = 2, .uops = { { _IS_NONE, 0, 0 }, { _POP_JUMP_IF_TRUE, 0, 0 } } }, + [POP_JUMP_IF_NOT_NONE] = { .nuops = 2, .uops = { { _IS_NONE, 0, 0 }, { _POP_JUMP_IF_FALSE, 0, 0 } } }, [GET_LEN] = { .nuops = 1, .uops = { { GET_LEN, 0, 0 } } }, [MATCH_CLASS] = { .nuops = 1, .uops = { { MATCH_CLASS, 0, 0 } } }, [MATCH_MAPPING] = { .nuops = 1, .uops = { { MATCH_MAPPING, 0, 0 } } }, @@ -1964,6 +1992,8 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_STORE_ATTR_SLOT] = "_STORE_ATTR_SLOT", [_SPECIALIZE_COMPARE_OP] = "_SPECIALIZE_COMPARE_OP", [_COMPARE_OP] = "_COMPARE_OP", + [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", + [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE", [_IS_NONE] = "_IS_NONE", [_SPECIALIZE_FOR_ITER] = "_SPECIALIZE_FOR_ITER", [_FOR_ITER] = "_FOR_ITER", @@ -1998,8 +2028,10 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_PUSH_FRAME] = "_PUSH_FRAME", [_SPECIALIZE_BINARY_OP] = "_SPECIALIZE_BINARY_OP", [_BINARY_OP] = "_BINARY_OP", - [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", - [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE", + [_GUARD_IS_TRUE_POP] = "_GUARD_IS_TRUE_POP", + [_GUARD_IS_FALSE_POP] = "_GUARD_IS_FALSE_POP", + [_GUARD_IS_NONE_POP] = "_GUARD_IS_NONE_POP", + [_GUARD_IS_NOT_NONE_POP] = "_GUARD_IS_NOT_NONE_POP", [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_SAVE_RETURN_OFFSET] = "_SAVE_RETURN_OFFSET", [_INSERT] = "_INSERT", @@ -2247,8 +2279,8 @@ const uint8_t _PyOpcode_Caches[256] = { [LOAD_ATTR] = 9, [COMPARE_OP] = 1, [JUMP_BACKWARD] = 1, - [POP_JUMP_IF_FALSE] = 1, [POP_JUMP_IF_TRUE] = 1, + [POP_JUMP_IF_FALSE] = 1, [POP_JUMP_IF_NONE] = 1, [POP_JUMP_IF_NOT_NONE] = 1, [FOR_ITER] = 1, diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 384a11212b3a2c..0d6f330bb55af3 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -914,12 +914,22 @@ break; } - case _POP_JUMP_IF_FALSE: { + case _GUARD_IS_TRUE_POP: { STACK_SHRINK(1); break; } - case _POP_JUMP_IF_TRUE: { + case _GUARD_IS_FALSE_POP: { + STACK_SHRINK(1); + break; + } + + case _GUARD_IS_NONE_POP: { + STACK_SHRINK(1); + break; + } + + case _GUARD_IS_NOT_NONE_POP: { STACK_SHRINK(1); break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f879ea5850aede..6a050f45ae2391 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2384,7 +2384,7 @@ dummy_func( goto enter_tier_one; } - inst(POP_JUMP_IF_FALSE, (unused/1, cond -- )) { + replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { assert(PyBool_Check(cond)); int flag = Py_IsFalse(cond); #if ENABLE_SPECIALIZATION @@ -2393,7 +2393,7 @@ dummy_func( JUMPBY(oparg * flag); } - inst(POP_JUMP_IF_TRUE, (unused/1, cond -- )) { + replaced op(_POP_JUMP_IF_TRUE, (unused/1, cond -- )) { assert(PyBool_Check(cond)); int flag = Py_IsTrue(cond); #if ENABLE_SPECIALIZATION @@ -2412,9 +2412,13 @@ dummy_func( } } - macro(POP_JUMP_IF_NONE) = _IS_NONE + POP_JUMP_IF_TRUE; + macro(POP_JUMP_IF_TRUE) = _POP_JUMP_IF_TRUE; - macro(POP_JUMP_IF_NOT_NONE) = _IS_NONE + POP_JUMP_IF_FALSE; + macro(POP_JUMP_IF_FALSE) = _POP_JUMP_IF_FALSE; + + macro(POP_JUMP_IF_NONE) = _IS_NONE + _POP_JUMP_IF_TRUE; + + macro(POP_JUMP_IF_NOT_NONE) = _IS_NONE + _POP_JUMP_IF_FALSE; inst(JUMP_BACKWARD_NO_INTERRUPT, (--)) { /* This bytecode is used in the `yield from` or `await` loop. @@ -3979,16 +3983,20 @@ dummy_func( ///////// Tier-2 only opcodes ///////// - op(_POP_JUMP_IF_FALSE, (flag -- )) { - if (Py_IsFalse(flag)) { - next_uop = current_executor->trace + oparg; - } + op (_GUARD_IS_TRUE_POP, (flag -- )) { + DEOPT_IF(Py_IsFalse(flag)); } - op(_POP_JUMP_IF_TRUE, (flag -- )) { - if (Py_IsTrue(flag)) { - next_uop = current_executor->trace + oparg; - } + op (_GUARD_IS_FALSE_POP, (flag -- )) { + DEOPT_IF(Py_IsTrue(flag)); + } + + op (_GUARD_IS_NONE_POP, (val -- )) { + DEOPT_IF(!Py_IsNone(val)); + } + + op (_GUARD_IS_NOT_NONE_POP, (val -- )) { + DEOPT_IF(Py_IsNone(val)); } op(_JUMP_TO_TOP, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d94a7cc4be0052..185a3239520eda 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3215,22 +3215,34 @@ break; } - case _POP_JUMP_IF_FALSE: { + case _GUARD_IS_TRUE_POP: { PyObject *flag; flag = stack_pointer[-1]; - if (Py_IsFalse(flag)) { - next_uop = current_executor->trace + oparg; - } + DEOPT_IF(Py_IsFalse(flag), _GUARD_IS_TRUE_POP); STACK_SHRINK(1); break; } - case _POP_JUMP_IF_TRUE: { + case _GUARD_IS_FALSE_POP: { PyObject *flag; flag = stack_pointer[-1]; - if (Py_IsTrue(flag)) { - next_uop = current_executor->trace + oparg; - } + DEOPT_IF(Py_IsTrue(flag), _GUARD_IS_FALSE_POP); + STACK_SHRINK(1); + break; + } + + case _GUARD_IS_NONE_POP: { + PyObject *val; + val = stack_pointer[-1]; + DEOPT_IF(!Py_IsNone(val), _GUARD_IS_NONE_POP); + STACK_SHRINK(1); + break; + } + + case _GUARD_IS_NOT_NONE_POP: { + PyObject *val; + val = stack_pointer[-1]; + DEOPT_IF(Py_IsNone(val), _GUARD_IS_NOT_NONE_POP); STACK_SHRINK(1); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1c853047a260a0..6d779864954aa7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3452,14 +3452,14 @@ goto enter_tier_one; } - TARGET(POP_JUMP_IF_FALSE) { + TARGET(POP_JUMP_IF_TRUE) { _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; next_instr += 2; - INSTRUCTION_STATS(POP_JUMP_IF_FALSE); + INSTRUCTION_STATS(POP_JUMP_IF_TRUE); PyObject *cond; cond = stack_pointer[-1]; assert(PyBool_Check(cond)); - int flag = Py_IsFalse(cond); + int flag = Py_IsTrue(cond); #if ENABLE_SPECIALIZATION this_instr[1].cache = (this_instr[1].cache << 1) | flag; #endif @@ -3468,14 +3468,14 @@ DISPATCH(); } - TARGET(POP_JUMP_IF_TRUE) { + TARGET(POP_JUMP_IF_FALSE) { _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; next_instr += 2; - INSTRUCTION_STATS(POP_JUMP_IF_TRUE); + INSTRUCTION_STATS(POP_JUMP_IF_FALSE); PyObject *cond; cond = stack_pointer[-1]; assert(PyBool_Check(cond)); - int flag = Py_IsTrue(cond); + int flag = Py_IsFalse(cond); #if ENABLE_SPECIALIZATION this_instr[1].cache = (this_instr[1].cache << 1) | flag; #endif @@ -3502,7 +3502,7 @@ Py_DECREF(value); } } - // POP_JUMP_IF_TRUE + // _POP_JUMP_IF_TRUE cond = b; { assert(PyBool_Check(cond)); @@ -3534,7 +3534,7 @@ Py_DECREF(value); } } - // POP_JUMP_IF_FALSE + // _POP_JUMP_IF_FALSE cond = b; { assert(PyBool_Check(cond)); diff --git a/Python/optimizer.c b/Python/optimizer.c index 065e1274671993..a866ea75e502d5 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -392,6 +392,18 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = { [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE, }; +static const uint16_t +BRANCH_TO_GUARDS[4][2] = { + [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP, + [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_FALSE_POP, + [POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_FALSE_POP, + [POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_TRUE_POP, + [POP_JUMP_IF_NONE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_NOT_NONE_POP, + [POP_JUMP_IF_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NONE_POP, + [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_NONE_POP, + [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP, +}; + #define TRACE_STACK_SIZE 5 /* Returns 1 on success, @@ -526,45 +538,23 @@ translate_bytecode_to_trace( } switch (opcode) { - case POP_JUMP_IF_NONE: - { - RESERVE(2, 2); - ADD_TO_TRACE(_IS_NONE, 0, 0); - opcode = POP_JUMP_IF_TRUE; - goto pop_jump_if_bool; - } - case POP_JUMP_IF_NOT_NONE: - { - RESERVE(2, 2); - ADD_TO_TRACE(_IS_NONE, 0, 0); - opcode = POP_JUMP_IF_FALSE; - goto pop_jump_if_bool; - } - case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: { -pop_jump_if_bool: - RESERVE(1, 2); - max_length -= 2; // Really the start of the stubs + RESERVE(1, 0); int counter = instr[1].cache; int bitcount = _Py_popcount32(counter); - bool jump_likely = bitcount > 8; - bool jump_sense = opcode == POP_JUMP_IF_TRUE; - uint32_t uopcode = jump_sense ^ jump_likely ? - _POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE; + int jump_likely = bitcount > 8; + uint32_t uopcode = BRANCH_TO_GUARDS[opcode - POP_JUMP_IF_FALSE][jump_likely]; _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; - _Py_CODEUNIT *target_instr = next_instr + oparg; - _Py_CODEUNIT *stub_target = jump_likely ? next_instr : target_instr; - DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, sense=%d, uopcode=%s\n", + DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n", uop_name(opcode), oparg, - counter, bitcount, jump_likely, jump_sense, uop_name(uopcode)); + counter, bitcount, jump_likely, uop_name(uopcode)); ADD_TO_TRACE(uopcode, max_length, 0); - ADD_TO_STUB(max_length, _SET_IP, INSTR_IP(stub_target, code), 0); - ADD_TO_STUB(max_length + 1, _EXIT_TRACE, 0, 0); if (jump_likely) { + _Py_CODEUNIT *target_instr = next_instr + oparg; DPRINTF(2, "Jump likely (%x = %d bits), continue at byte offset %d\n", instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code)); instr = target_instr; From 2db61ccc925c329710cef63cdd09197482fee210 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 11 Nov 2023 11:17:50 +0000 Subject: [PATCH 2/5] Fewer special cases of uop names --- Python/optimizer.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index a866ea75e502d5..44106b5b87ecc2 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -384,7 +384,7 @@ PyTypeObject _PyUOpExecutor_Type = { .tp_methods = executor_methods, }; -/* TO DO -- Generate this table */ +/* TO DO -- Generate these tables */ static const uint16_t _PyUop_Replacements[OPCODE_METADATA_SIZE] = { [_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE, @@ -764,6 +764,16 @@ translate_bytecode_to_trace( #define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31))) #define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31))) +static bool +is_branch(opcode) { + /* Currently there are no jumps in the buffer, + * but we expect the optimizer to add them + * in the future. */ + assert(opcode != _POP_JUMP_IF_FALSE && + opcode != _POP_JUMP_IF_TRUE); + return false; +} + /* Count the number of used uops, and mark them in the bit vector `used`. * This can be done in a single pass using simple reachability analysis, * as there are no backward jumps. @@ -785,16 +795,13 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) } /* All other micro-ops fall through, so i+1 is reachable */ SET_BIT(used, i+1); - switch(opcode) { - case NOP: - /* Don't count NOPs as used */ - count--; - UNSET_BIT(used, i); - break; - case _POP_JUMP_IF_FALSE: - case _POP_JUMP_IF_TRUE: - /* Mark target as reachable */ - SET_BIT(used, buffer[i].oparg); + if (is_branch(opcode)) { + /* Mark target as reachable */ + SET_BIT(used, buffer[i].oparg); + } + if (opcode == NOP) { + count--; + UNSET_BIT(used, i); } } return count; From f36132831dc1d8696436ce107d6003c6514cd17d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 11 Nov 2023 11:59:00 +0000 Subject: [PATCH 3/5] Fix refleak --- Python/bytecodes.c | 14 ++++++++++++-- Python/executor_cases.c.h | 14 ++++++++++++-- Python/optimizer.c | 2 +- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6a050f45ae2391..b7ddfda014aa17 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3985,18 +3985,28 @@ dummy_func( op (_GUARD_IS_TRUE_POP, (flag -- )) { DEOPT_IF(Py_IsFalse(flag)); + assert(Py_IsTrue(flag)); } op (_GUARD_IS_FALSE_POP, (flag -- )) { DEOPT_IF(Py_IsTrue(flag)); + assert(Py_IsFalse(flag)); } op (_GUARD_IS_NONE_POP, (val -- )) { - DEOPT_IF(!Py_IsNone(val)); + if (!Py_IsNone(val)) { + Py_DECREF(val); + DEOPT_IF(true); + } } op (_GUARD_IS_NOT_NONE_POP, (val -- )) { - DEOPT_IF(Py_IsNone(val)); + if (Py_IsNone(val)) { + DEOPT_IF(true); + } + else { + Py_DECREF(val); + } } op(_JUMP_TO_TOP, (--)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 185a3239520eda..46cdb2b185398b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3219,6 +3219,7 @@ PyObject *flag; flag = stack_pointer[-1]; DEOPT_IF(Py_IsFalse(flag), _GUARD_IS_TRUE_POP); + assert(Py_IsTrue(flag)); STACK_SHRINK(1); break; } @@ -3227,6 +3228,7 @@ PyObject *flag; flag = stack_pointer[-1]; DEOPT_IF(Py_IsTrue(flag), _GUARD_IS_FALSE_POP); + assert(Py_IsFalse(flag)); STACK_SHRINK(1); break; } @@ -3234,7 +3236,10 @@ case _GUARD_IS_NONE_POP: { PyObject *val; val = stack_pointer[-1]; - DEOPT_IF(!Py_IsNone(val), _GUARD_IS_NONE_POP); + if (!Py_IsNone(val)) { + Py_DECREF(val); + DEOPT_IF(true, _GUARD_IS_NONE_POP); + } STACK_SHRINK(1); break; } @@ -3242,7 +3247,12 @@ case _GUARD_IS_NOT_NONE_POP: { PyObject *val; val = stack_pointer[-1]; - DEOPT_IF(Py_IsNone(val), _GUARD_IS_NOT_NONE_POP); + if (Py_IsNone(val)) { + DEOPT_IF(true, _GUARD_IS_NOT_NONE_POP); + } + else { + Py_DECREF(val); + } STACK_SHRINK(1); break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 44106b5b87ecc2..90c2eeb828e72b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -765,7 +765,7 @@ translate_bytecode_to_trace( #define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31))) static bool -is_branch(opcode) { +is_branch(int opcode) { /* Currently there are no jumps in the buffer, * but we expect the optimizer to add them * in the future. */ From 69cee8daae30c8363bee399641fc61130e3eab35 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 11 Nov 2023 15:20:50 +0000 Subject: [PATCH 4/5] Update test --- Lib/test/test_capi/test_misc.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index d526bbf62dda0b..fe5c36c0c0dec9 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2610,7 +2610,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_FALSE", uops) + self.assertIn("_GUARD_IS_TRUE_POP", uops) def test_pop_jump_if_none(self): def testfunc(a): @@ -2625,7 +2625,7 @@ def testfunc(a): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_TRUE", uops) + self.assertIn("_GUARD_IS_NOT_NONE_POP", uops) def test_pop_jump_if_not_none(self): def testfunc(a): @@ -2641,7 +2641,7 @@ def testfunc(a): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_FALSE", uops) + self.assertIn("_GUARD_IS_NONE_POP", uops) def test_pop_jump_if_true(self): def testfunc(n): @@ -2656,7 +2656,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_TRUE", uops) + self.assertIn("_GUARD_IS_FALSE_POP", uops) def test_jump_backward(self): def testfunc(n): @@ -2806,7 +2806,7 @@ def testfunc(n): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = {opname for opname, _, _ in ex} - self.assertIn("_POP_JUMP_IF_TRUE", uops) + self.assertIn("_GUARD_IS_FALSE_POP", uops) if __name__ == "__main__": From 68d1b276894d3b1e1ea8beb3abd8c4717c56fd7a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 12 Nov 2023 05:42:59 +0000 Subject: [PATCH 5/5] Address review comments --- Python/optimizer.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 1b94fa2c3e757a..bc518d0088b373 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -394,7 +394,7 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = { }; static const uint16_t -BRANCH_TO_GUARDS[4][2] = { +BRANCH_TO_GUARD[4][2] = { [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP, [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_FALSE_POP, [POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_FALSE_POP, @@ -549,7 +549,7 @@ translate_bytecode_to_trace( int counter = instr[1].cache; int bitcount = _Py_popcount32(counter); int jump_likely = bitcount > 8; - uint32_t uopcode = BRANCH_TO_GUARDS[opcode - POP_JUMP_IF_FALSE][jump_likely]; + uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely]; _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n", uop_name(opcode), oparg, @@ -766,16 +766,6 @@ translate_bytecode_to_trace( #define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31))) #define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31))) -static bool -is_branch(int opcode) { - /* Currently there are no jumps in the buffer, - * but we expect the optimizer to add them - * in the future. */ - assert(opcode != _POP_JUMP_IF_FALSE && - opcode != _POP_JUMP_IF_TRUE); - return false; -} - /* Count the number of used uops, and mark them in the bit vector `used`. * This can be done in a single pass using simple reachability analysis, * as there are no backward jumps. @@ -797,7 +787,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used) } /* All other micro-ops fall through, so i+1 is reachable */ SET_BIT(used, i+1); - if (is_branch(opcode)) { + if (OPCODE_HAS_JUMP(opcode)) { /* Mark target as reachable */ SET_BIT(used, buffer[i].oparg); }