diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 0dfd2a251bd6d5..3ec0a86341483a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1376,7 +1376,7 @@ _PyOpcode_macro_expansion[256] = { [CALL_PY_EXACT_ARGS] = { .nuops = 8, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_PY_GENERAL] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_STR_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_STR_1, OPARG_SIMPLE, 3 }, { _CALL_STR_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, - [CALL_TUPLE_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TUPLE_1, OPARG_SIMPLE, 3 }, { _CALL_TUPLE_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, + [CALL_TUPLE_1] = { .nuops = 5, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TUPLE_1, OPARG_SIMPLE, 3 }, { _CALL_TUPLE_1, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_TYPE_1] = { .nuops = 3, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TYPE_1, OPARG_SIMPLE, 3 }, { _CALL_TYPE_1, OPARG_SIMPLE, 3 } } }, [CHECK_EG_MATCH] = { .nuops = 1, .uops = { { _CHECK_EG_MATCH, OPARG_SIMPLE, 0 } } }, [CHECK_EXC_MATCH] = { .nuops = 1, .uops = { { _CHECK_EXC_MATCH, OPARG_SIMPLE, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 553636cbb5914a..08e517dea326e8 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -439,7 +439,7 @@ extern "C" { #define _CALL_METHOD_DESCRIPTOR_O_r01 632 #define _CALL_NON_PY_GENERAL_r01 633 #define _CALL_STR_1_r31 634 -#define _CALL_TUPLE_1_r31 635 +#define _CALL_TUPLE_1_r32 635 #define _CALL_TYPE_1_r31 636 #define _CHECK_AND_ALLOCATE_OBJECT_r00 637 #define _CHECK_ATTR_CLASS_r01 638 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 436defbb8df570..9ca53329e5d959 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -2543,7 +2543,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 0, _CALL_TUPLE_1_r31 }, + { 2, 0, _CALL_TUPLE_1_r32 }, }, }, [_CHECK_AND_ALLOCATE_OBJECT] = { @@ -3739,7 +3739,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_GUARD_CALLABLE_TUPLE_1_r13] = _GUARD_CALLABLE_TUPLE_1, [_GUARD_CALLABLE_TUPLE_1_r23] = _GUARD_CALLABLE_TUPLE_1, [_GUARD_CALLABLE_TUPLE_1_r33] = _GUARD_CALLABLE_TUPLE_1, - [_CALL_TUPLE_1_r31] = _CALL_TUPLE_1, + [_CALL_TUPLE_1_r32] = _CALL_TUPLE_1, [_CHECK_AND_ALLOCATE_OBJECT_r00] = _CHECK_AND_ALLOCATE_OBJECT, [_CREATE_INIT_FRAME_r01] = _CREATE_INIT_FRAME, [_EXIT_INIT_CHECK_r10] = _EXIT_INIT_CHECK, @@ -4058,7 +4058,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_CALL_STR_1] = "_CALL_STR_1", [_CALL_STR_1_r31] = "_CALL_STR_1_r31", [_CALL_TUPLE_1] = "_CALL_TUPLE_1", - [_CALL_TUPLE_1_r31] = "_CALL_TUPLE_1_r31", + [_CALL_TUPLE_1_r32] = "_CALL_TUPLE_1_r32", [_CALL_TYPE_1] = "_CALL_TYPE_1", [_CALL_TYPE_1_r31] = "_CALL_TYPE_1_r31", [_CHECK_AND_ALLOCATE_OBJECT] = "_CHECK_AND_ALLOCATE_OBJECT", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2e7485e89fdac5..f16055ab84de08 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1925,6 +1925,21 @@ def testfunc(n): uops = get_opnames(ex) self.assertNotIn("_GUARD_IS_NOT_NONE_POP", uops) + def test_call_tuple_1_pop_top(self): + def testfunc(n): + x = 0 + for _ in range(n): + t = tuple(()) + x += len(t) == 0 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_TUPLE_1", uops) + self.assertIn("_POP_TOP_NOP", uops) + def test_call_str_1(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-22-52-20.gh-issue-134584.qbiQfG.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-22-52-20.gh-issue-134584.qbiQfG.rst new file mode 100644 index 00000000000000..34df14750464ce --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-22-52-20.gh-issue-134584.qbiQfG.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting from ``_CALL_TUPLE_1``. Patch by Noam Cohen diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f64a7778121c71..3cc6ff842c9202 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4068,17 +4068,14 @@ dummy_func( DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type); } - op(_CALL_TUPLE_1, (callable, null, arg -- res)) { + op(_CALL_TUPLE_1, (callable, null, arg -- res, a)) { PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); PyObject *res_o = PySequence_Tuple(arg_o); - DEAD(null); - DEAD(callable); - (void)callable; // Silence compiler warnings about unused variables - (void)null; - PyStackRef_CLOSE(arg); + a = arg; + INPUTS_DEAD(); ERROR_IF(res_o == NULL); res = PyStackRef_FromPyObjectSteal(res_o); } @@ -4089,6 +4086,7 @@ dummy_func( _GUARD_NOS_NULL + _GUARD_CALLABLE_TUPLE_1 + _CALL_TUPLE_1 + + POP_TOP + _CHECK_PERIODIC_AT_END; op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { @@ -5455,6 +5453,12 @@ dummy_func( } } + label(pop_3_error) { + stack_pointer -= 3; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } + label(pop_2_error) { stack_pointer -= 2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 24f343c994038b..248cf60b70e455 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -12888,47 +12888,42 @@ break; } - case _CALL_TUPLE_1_r31: { + case _CALL_TUPLE_1_r32: { CHECK_CURRENT_CACHED_VALUES(3); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); _PyStackRef arg; - _PyStackRef null; - _PyStackRef callable; _PyStackRef res; + _PyStackRef a; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; _PyStackRef _stack_item_2 = _tos_cache2; oparg = CURRENT_OPARG(); arg = _stack_item_2; - null = _stack_item_1; - callable = _stack_item_0; PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); - stack_pointer[0] = callable; - stack_pointer[1] = null; + stack_pointer[0] = _stack_item_0; + stack_pointer[1] = _stack_item_1; stack_pointer[2] = arg; stack_pointer += 3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); - (void)callable; - (void)null; - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); - stack_pointer = _PyFrame_GetStackPointer(frame); + a = arg; if (res_o == NULL) { + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); + _tos_cache1 = a; _tos_cache0 = res; - _tos_cache1 = PyStackRef_ZERO_BITS; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); + SET_CURRENT_CACHED_VALUES(2); + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ba80886f6dc0c7..b70b7fde37a72a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4029,6 +4029,8 @@ _PyStackRef callable; _PyStackRef arg; _PyStackRef res; + _PyStackRef a; + _PyStackRef value; /* Skip 1 cache entry */ /* Skip 2 cache entries */ // _GUARD_NOS_NULL @@ -4059,23 +4061,24 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); - (void)callable; - (void)null; - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); - stack_pointer = _PyFrame_GetStackPointer(frame); + a = arg; if (res_o == NULL) { - JUMP_TO_LABEL(error); + JUMP_TO_LABEL(pop_3_error); } res = PyStackRef_FromPyObjectSteal(res_o); } - // _CHECK_PERIODIC_AT_END + // _POP_TOP { - stack_pointer[0] = res; - stack_pointer += 1; + value = a; + stack_pointer[-3] = res; + stack_pointer += -2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _CHECK_PERIODIC_AT_END + { _PyFrame_SetStackPointer(frame, stack_pointer); int err = check_periodics(tstate); stack_pointer = _PyFrame_GetStackPointer(frame); @@ -11849,6 +11852,13 @@ JUMP_TO_LABEL(error); #endif /* _Py_TAIL_CALL_INTERP */ /* BEGIN LABELS */ + LABEL(pop_3_error) + { + stack_pointer -= 3; + assert(WITHIN_STACK_BOUNDS()); + JUMP_TO_LABEL(error); + } + LABEL(pop_2_error) { stack_pointer -= 2; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index b2fa7d01e8f6c2..1a8a4392187eca 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -522,6 +522,7 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256]; static py_tail_call_funcptr instruction_funcptr_tracing_table[256]; +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_3_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_2_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_1_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_error(TAIL_CALL_PARAMS); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 864ae17525e6b2..701f46ac663120 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1097,7 +1097,7 @@ dummy_func(void) { } } - op(_CALL_TUPLE_1, (callable, null, arg -- res)) { + op(_CALL_TUPLE_1, (callable, null, arg -- res, a)) { if (sym_matches_type(arg, &PyTuple_Type)) { // e.g. tuple((1, 2)) or tuple(foo) where foo is known to be a tuple // Note: we must strip the reference information because it goes @@ -1107,6 +1107,7 @@ dummy_func(void) { else { res = sym_new_type(ctx, &PyTuple_Type); } + a = arg; } op(_GUARD_TOS_LIST, (tos -- tos)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 20192ebc593054..f19ecb898a24cb 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2882,6 +2882,7 @@ case _CALL_TUPLE_1: { JitOptRef arg; JitOptRef res; + JitOptRef a; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyTuple_Type)) { res = PyJitRef_StripReferenceInfo(arg); @@ -2889,9 +2890,11 @@ else { res = sym_new_type(ctx, &PyTuple_Type); } - CHECK_STACK_BOUNDS(-2); + a = arg; + CHECK_STACK_BOUNDS(-1); stack_pointer[-3] = res; - stack_pointer += -2; + stack_pointer[-2] = a; + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; }