From af1172a0c8a3199e0856ef37ba0b4c68d8658733 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Mon, 23 Jun 2025 23:18:07 +0300 Subject: [PATCH 1/5] eliminate refcount from `_CALL_TUPLE_1` --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 10 ++++----- Python/executor_cases.c.h | 16 +++++--------- Python/generated_cases.c.h | 27 ++++++++++++----------- Python/optimizer_bytecodes.c | 3 ++- Python/optimizer_cases.c.h | 5 ++++- 6 files changed, 30 insertions(+), 33 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index dd1bf2d1d2b51a..862e259e03ec1a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1367,7 +1367,7 @@ _PyOpcode_macro_expansion[256] = { [CALL_PY_EXACT_ARGS] = { .nuops = 8, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_PY_GENERAL] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_STR_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_STR_1, OPARG_SIMPLE, 3 }, { _CALL_STR_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, - [CALL_TUPLE_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TUPLE_1, OPARG_SIMPLE, 3 }, { _CALL_TUPLE_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, + [CALL_TUPLE_1] = { .nuops = 5, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TUPLE_1, OPARG_SIMPLE, 3 }, { _CALL_TUPLE_1, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_TYPE_1] = { .nuops = 3, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TYPE_1, OPARG_SIMPLE, 3 }, { _CALL_TYPE_1, OPARG_SIMPLE, 3 } } }, [CHECK_EG_MATCH] = { .nuops = 1, .uops = { { _CHECK_EG_MATCH, OPARG_SIMPLE, 0 } } }, [CHECK_EXC_MATCH] = { .nuops = 1, .uops = { { _CHECK_EXC_MATCH, OPARG_SIMPLE, 0 } } }, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 535e552e047475..69d0d46408de99 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4083,17 +4083,14 @@ dummy_func( DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type); } - op(_CALL_TUPLE_1, (callable, null, arg -- res)) { + op(_CALL_TUPLE_1, (callable, null, arg -- res, a)) { PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); + INPUTS_DEAD(); PyObject *res_o = PySequence_Tuple(arg_o); - DEAD(null); - DEAD(callable); - (void)callable; // Silence compiler warnings about unused variables - (void)null; - PyStackRef_CLOSE(arg); + a = arg; ERROR_IF(res_o == NULL); res = PyStackRef_FromPyObjectSteal(res_o); } @@ -4104,6 +4101,7 @@ dummy_func( _GUARD_NOS_NULL + _GUARD_CALLABLE_TUPLE_1 + _CALL_TUPLE_1 + + POP_TOP + _CHECK_PERIODIC; op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 46fc164a5b3bc2..0c36c6d9d537b8 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5486,32 +5486,26 @@ case _CALL_TUPLE_1: { _PyStackRef arg; - _PyStackRef null; - _PyStackRef callable; _PyStackRef res; + _PyStackRef a; oparg = CURRENT_OPARG(); arg = stack_pointer[-1]; - null = stack_pointer[-2]; - callable = stack_pointer[-3]; PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = PySequence_Tuple(arg_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - (void)callable; - (void)null; stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); + PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); + a = arg; if (res_o == NULL) { JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[0] = res; - stack_pointer += 1; + stack_pointer[1] = a; + stack_pointer += 2; assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8f7932f0033c6f..cd8ccaad16bf40 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4459,6 +4459,8 @@ _PyStackRef callable; _PyStackRef arg; _PyStackRef res; + _PyStackRef a; + _PyStackRef value; /* Skip 1 cache entry */ /* Skip 2 cache entries */ // _GUARD_NOS_NULL @@ -4486,41 +4488,40 @@ PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = PySequence_Tuple(arg_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - (void)callable; - (void)null; stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); + PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); + a = arg; if (res_o == NULL) { JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); } + // _POP_TOP + { + value = a; + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } // _CHECK_PERIODIC { _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); QSBR_QUIESCENT_STATE(tstate); if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { - stack_pointer[0] = res; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); int err = _Py_HandlePending(tstate); stack_pointer = _PyFrame_GetStackPointer(frame); if (err != 0) { JUMP_TO_LABEL(error); } - stack_pointer += -1; } } - stack_pointer[0] = res; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f8a0484bdc2b04..ed8269d58e9d07 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1127,7 +1127,7 @@ dummy_func(void) { } } - op(_CALL_TUPLE_1, (callable, null, arg -- res)) { + op(_CALL_TUPLE_1, (callable, null, arg -- res, a)) { if (sym_matches_type(arg, &PyTuple_Type)) { // e.g. tuple((1, 2)) or tuple(foo) where foo is known to be a tuple res = arg; @@ -1135,6 +1135,7 @@ dummy_func(void) { else { res = sym_new_type(ctx, &PyTuple_Type); } + a = arg; } op(_GUARD_TOS_LIST, (tos -- tos)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 10767ccdbd57f5..38ff85504b51b0 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2194,6 +2194,7 @@ case _CALL_TUPLE_1: { JitOptRef arg; JitOptRef res; + JitOptRef a; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyTuple_Type)) { res = arg; @@ -2201,8 +2202,10 @@ else { res = sym_new_type(ctx, &PyTuple_Type); } + a = arg; stack_pointer[-3] = res; - stack_pointer += -2; + stack_pointer[-2] = a; + stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } From 5a805a94635bc5046faf196dad8ca52ef8f8ac11 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Mon, 23 Jun 2025 23:18:12 +0300 Subject: [PATCH 2/5] test case --- Lib/test/test_capi/test_opt.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e4c9a463855a69..8a75dc386c06ae 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1841,6 +1841,21 @@ def testfunc(n): uops = get_opnames(ex) self.assertNotIn("_GUARD_IS_NOT_NONE_POP", uops) + def test_call_tuple_1_pop_top(self): + def testfunc(n): + x = 0 + for _ in range(n): + t = tuple(()) + x += len(t) == 0 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_TUPLE_1", uops) + self.assertIn("_POP_TOP_NOP", uops) + def test_call_str_1(self): def testfunc(n): x = 0 From c7c4873daf03708eb0f4cce0df494557dae5d8e9 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Mon, 23 Jun 2025 23:18:20 +0300 Subject: [PATCH 3/5] news entry --- .../2025-06-23-22-52-20.gh-issue-134584.qbiQfG.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-22-52-20.gh-issue-134584.qbiQfG.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-22-52-20.gh-issue-134584.qbiQfG.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-22-52-20.gh-issue-134584.qbiQfG.rst new file mode 100644 index 00000000000000..34df14750464ce --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-22-52-20.gh-issue-134584.qbiQfG.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting from ``_CALL_TUPLE_1``. Patch by Noam Cohen From f392667d98ddba1cd94f20bb2385eb2df9bd8bf3 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Thu, 11 Dec 2025 15:11:15 +0200 Subject: [PATCH 4/5] move `INPUTS_DEAD()` to the right place --- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 9 +++++++++ Python/generated_cases.c.h | 8 +++----- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6f22dbc617c12c..6da39de5cc2b2f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4073,9 +4073,9 @@ dummy_func( assert(oparg == 1); STAT_INC(CALL, hit); - INPUTS_DEAD(); PyObject *res_o = PySequence_Tuple(arg_o); a = arg; + INPUTS_DEAD(); ERROR_IF(res_o == NULL); res = PyStackRef_FromPyObjectSteal(res_o); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 85e2e20d807dbf..248cf60b70e455 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -12902,11 +12902,18 @@ PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); + stack_pointer[0] = _stack_item_0; + stack_pointer[1] = _stack_item_1; + stack_pointer[2] = arg; + stack_pointer += 3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); a = arg; if (res_o == NULL) { + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } @@ -12915,6 +12922,8 @@ _tos_cache0 = res; _tos_cache2 = PyStackRef_ZERO_BITS; SET_CURRENT_CACHED_VALUES(2); + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a4797dc5ac5973..41ca796d053837 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4058,22 +4058,20 @@ PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); a = arg; if (res_o == NULL) { - JUMP_TO_LABEL(error); + JUMP_TO_LABEL(pop_3_error); } res = PyStackRef_FromPyObjectSteal(res_o); } // _POP_TOP { value = a; - stack_pointer[0] = res; - stack_pointer += 1; + stack_pointer[-3] = res; + stack_pointer += -2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_XCLOSE(value); From 2aaafd2dcee7e690355ace4a4d040e9bde57c331 Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Thu, 11 Dec 2025 15:26:36 +0200 Subject: [PATCH 5/5] add `pop_3_error` --- Python/bytecodes.c | 6 ++++++ Python/generated_cases.c.h | 7 +++++++ Python/opcode_targets.h | 1 + 3 files changed, 14 insertions(+) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6da39de5cc2b2f..3cc6ff842c9202 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5453,6 +5453,12 @@ dummy_func( } } + label(pop_3_error) { + stack_pointer -= 3; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } + label(pop_2_error) { stack_pointer -= 2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 41ca796d053837..b70b7fde37a72a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -11852,6 +11852,13 @@ JUMP_TO_LABEL(error); #endif /* _Py_TAIL_CALL_INTERP */ /* BEGIN LABELS */ + LABEL(pop_3_error) + { + stack_pointer -= 3; + assert(WITHIN_STACK_BOUNDS()); + JUMP_TO_LABEL(error); + } + LABEL(pop_2_error) { stack_pointer -= 2; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index b2fa7d01e8f6c2..1a8a4392187eca 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -522,6 +522,7 @@ static py_tail_call_funcptr instruction_funcptr_handler_table[256]; static py_tail_call_funcptr instruction_funcptr_tracing_table[256]; +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_3_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_2_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_1_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_error(TAIL_CALL_PARAMS);