From bb23e4bf5ee51e2067dcbac8e5d384e7164f0d37 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Dec 2024 18:38:23 +0900 Subject: [PATCH 01/18] gh-115999: Enable BINARY_SUBSCR_GETITEM for free-threaded build --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_typeobject.h | 3 ++ Include/internal/pycore_uop_metadata.h | 4 +- Lib/test/test_opcache.py | 18 +++++++- Objects/typeobject.c | 56 +++++++++++++++++++++++ Python/bytecodes.c | 9 ++-- Python/executor_cases.c.h | 16 +++++-- Python/generated_cases.c.h | 13 ++++-- Python/specialize.c | 18 +++----- 9 files changed, 108 insertions(+), 31 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 81dde66a6f26c2..1b0874b24f8bf3 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1951,7 +1951,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, + [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 7b39d07f976ee3..abb94248526556 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -278,6 +278,9 @@ typedef int (*_py_validate_type)(PyTypeObject *); // and if the validation is passed, it will set the ``tp_version`` as valid // tp_version_tag from the ``ty``. extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); +extern PyObject *_PyType_GetItemFromCache(PyTypeObject *type); +extern PyObject *_PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version); +extern int _PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor); #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 89fce193f40bd8..d95be4beb59d5a 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -88,8 +88,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_SUBSCR_CHECK_FUNC] = HAS_DEOPT_FLAG, - [_BINARY_SUBSCR_INIT_CALL] = 0, + [_BINARY_SUBSCR_CHECK_FUNC] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SUBSCR_INIT_CALL] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_LIST_APPEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 50b5f365165921..cc2de3b46725bc 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -609,7 +609,7 @@ def assert_races_do_not_crash( for writer in writers: writer.join() - @requires_specialization + @requires_specialization_ft def test_binary_subscr_getitem(self): def get_items(): class C: @@ -1069,7 +1069,7 @@ def write(items): opname = "STORE_SUBSCR_LIST_INT" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_unpack_sequence_list(self): def get_items(): items = [] @@ -1520,6 +1520,20 @@ def binary_subscr_str_int(): self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT") self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR") + def binary_subscr_getitems(): + class C: + def __init__(self, val): + self.val = val + def __getitem__(self, item): + return self.val + items = [C(i) for i in range(100)] + for i in range(100): + self.assertEqual(items[i][i], i) + + binary_subscr_getitems() + self.assert_specialized(binary_subscr_getitems, "BINARY_SUBSCR_GETITEM") + self.assert_no_opcode(binary_subscr_getitems, "BINARY_SUBSCR") + if __name__ == "__main__": unittest.main() diff --git a/Objects/typeobject.c b/Objects/typeobject.c index cc95b9857e3f2d..5d61d0d08442ce 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5692,6 +5692,62 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, return can_cache; } +int +_PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor) +{ + int can_cache = 0; + if (!type) { + return -1; + } + BEGIN_TYPE_LOCK(); + // This pointer is invalidated by PyType_Modified (see the comment on + // struct _specialization_cache): + PyHeapTypeObject *ht = (PyHeapTypeObject *)type; + PyFunctionObject *func = (PyFunctionObject *)descriptor; + uint32_t version = _PyFunction_GetVersionForCurrentState(func); + if (!_PyFunction_IsVersionValid(version)) { + can_cache = -1; + goto end; + } + #ifdef Py_GIL_DISABLED + can_cache = _PyObject_HasDeferredRefcount(descriptor); + #else + can_cache = 0; + #endif + FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); + FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); +end: + END_TYPE_LOCK(); + return can_cache; +} + +PyObject * +_PyType_GetItemFromCache(PyTypeObject *type) +{ + PyObject *res = NULL; + BEGIN_TYPE_LOCK(); + PyHeapTypeObject *ht = (PyHeapTypeObject *)type; + res = ht->_spec_cache.getitem; + END_TYPE_LOCK(); + return res; +} + +PyObject * +_PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version) +{ + PyObject *res = NULL; + BEGIN_TYPE_LOCK(); + PyHeapTypeObject *ht = (PyHeapTypeObject *)type; + res = ht->_spec_cache.getitem; + if (res == NULL) { + goto end; + } + *version = ht->_spec_cache.getitem_version; +end: + END_TYPE_LOCK(); + return res; +} + static void set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3d280941b35244..56620911486c60 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -867,11 +867,10 @@ dummy_func( op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) { PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + uint32_t cached_version; + PyObject *getitem = _PyType_GetItemFromCacheWithVersion(tp, &cached_version); DEOPT_IF(getitem == NULL); assert(PyFunction_Check(getitem)); - uint32_t cached_version = ht->_spec_cache.getitem_version; DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version); PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); @@ -881,8 +880,8 @@ dummy_func( op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) { PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + PyObject *getitem = _PyType_GetItemFromCache(tp); + DEOPT_IF(getitem == NULL); new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 987ff2e6419669..68c8cb170b7ac2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1131,14 +1131,15 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + uint32_t cached_version; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *getitem = _PyType_GetItemFromCacheWithVersion(tp, &cached_version); + stack_pointer = _PyFrame_GetStackPointer(frame); if (getitem == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } assert(PyFunction_Check(getitem)); - uint32_t cached_version = ht->_spec_cache.getitem_version; if (((PyFunctionObject *)getitem)->func_version != cached_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -1160,8 +1161,13 @@ sub = stack_pointer[-1]; container = stack_pointer[-2]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *getitem = _PyType_GetItemFromCache(tp); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (getitem == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 33f32aba1e5145..f013e165161fe9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -517,11 +517,12 @@ container = stack_pointer[-2]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + uint32_t cached_version; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *getitem = _PyType_GetItemFromCacheWithVersion(tp, &cached_version); + stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(getitem == NULL, BINARY_SUBSCR); assert(PyFunction_Check(getitem)); - uint32_t cached_version = ht->_spec_cache.getitem_version; DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); @@ -532,8 +533,10 @@ { sub = stack_pointer[-1]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *getitem = _PyType_GetItemFromCache(tp); + stack_pointer = _PyFrame_GetStackPointer(frame); + DEOPT_IF(getitem == NULL, BINARY_SUBSCR); new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; diff --git a/Python/specialize.c b/Python/specialize.c index d3fea717243847..a9c92ee655bd95 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1085,6 +1085,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_METHOD); return -1; } + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER); return -1; @@ -1154,6 +1155,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na if (version == 0) { return -1; } + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER); return -1; @@ -1761,9 +1763,7 @@ _Py_Specialize_BinarySubscr( specialized_op = BINARY_SUBSCR_DICT; goto success; } -#ifndef Py_GIL_DISABLED - PyTypeObject *cls = Py_TYPE(container); - PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__)); + PyObject *descriptor = _PyType_Lookup(container_type, &_Py_ID(__getitem__)); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE); @@ -1780,24 +1780,18 @@ _Py_Specialize_BinarySubscr( SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); goto fail; } - uint32_t version = _PyFunction_GetVersionForCurrentState(func); - if (!_PyFunction_IsVersionValid(version)) { + if (_PyType_CacheGetItemForSpecialization(container_type, descriptor) < 0) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; } + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OTHER); goto fail; } - PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type; - // This pointer is invalidated by PyType_Modified (see the comment on - // struct _specialization_cache): - ht->_spec_cache.getitem = descriptor; - ht->_spec_cache.getitem_version = version; specialized_op = BINARY_SUBSCR_GETITEM; goto success; } -#endif // Py_GIL_DISABLED SPECIALIZATION_FAIL(BINARY_SUBSCR, binary_subscr_fail_kind(container_type, sub)); fail: @@ -2606,6 +2600,7 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR ); + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER); goto failure; @@ -2634,6 +2629,7 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr) assert(_PyOpcode_Caches[SEND] == INLINE_CACHE_ENTRIES_SEND); PyTypeObject *tp = Py_TYPE(receiver); if (tp == &PyGen_Type || tp == &PyCoro_Type) { + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(SEND, SPEC_FAIL_OTHER); goto failure; From b714963eda575e35d45d54b04715b2315c324d8f Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Dec 2024 18:59:16 +0900 Subject: [PATCH 02/18] fix --- Objects/typeobject.c | 11 +++-------- Programs/test_frozenmain.h | 14 +++++++------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5d61d0d08442ce..aeaeed1a57322c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5695,7 +5695,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, int _PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor) { - int can_cache = 0; + int ret = 0; if (!type) { return -1; } @@ -5706,19 +5706,14 @@ _PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor) PyFunctionObject *func = (PyFunctionObject *)descriptor; uint32_t version = _PyFunction_GetVersionForCurrentState(func); if (!_PyFunction_IsVersionValid(version)) { - can_cache = -1; + ret = -1; goto end; } - #ifdef Py_GIL_DISABLED - can_cache = _PyObject_HasDeferredRefcount(descriptor); - #else - can_cache = 0; - #endif FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); end: END_TYPE_LOCK(); - return can_cache; + return ret; } PyObject * diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index c936622c020e3c..d82decbeca39c1 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -12,26 +12,26 @@ unsigned char M_test_frozenmain[] = { 0,0,111,6,88,2,31,0,79,5,88,6,12,0,79,6, 88,5,88,6,2,0,0,0,12,0,47,4,49,1,0,0, 0,0,0,0,29,0,72,22,0,0,9,0,29,0,79,0, - 33,0,41,7,78,122,18,70,114,111,122,101,110,32,72,101, - 108,108,111,32,87,111,114,108,100,122,8,115,121,115,46,97, + 33,0,41,7,78,218,18,70,114,111,122,101,110,32,72,101, + 108,108,111,32,87,111,114,108,100,218,8,115,121,115,46,97, 114,103,118,218,6,99,111,110,102,105,103,41,5,218,12,112, 114,111,103,114,97,109,95,110,97,109,101,218,10,101,120,101, 99,117,116,97,98,108,101,218,15,117,115,101,95,101,110,118, 105,114,111,110,109,101,110,116,218,17,99,111,110,102,105,103, 117,114,101,95,99,95,115,116,100,105,111,218,14,98,117,102, - 102,101,114,101,100,95,115,116,100,105,111,122,7,99,111,110, - 102,105,103,32,122,2,58,32,41,7,218,3,115,121,115,218, + 102,101,114,101,100,95,115,116,100,105,111,218,7,99,111,110, + 102,105,103,32,218,2,58,32,41,7,218,3,115,121,115,218, 17,95,116,101,115,116,105,110,116,101,114,110,97,108,99,97, 112,105,218,5,112,114,105,110,116,218,4,97,114,103,118,218, - 11,103,101,116,95,99,111,110,102,105,103,115,114,2,0,0, + 11,103,101,116,95,99,111,110,102,105,103,115,114,4,0,0, 0,218,3,107,101,121,169,0,243,0,0,0,0,218,18,116, 101,115,116,95,102,114,111,122,101,110,109,97,105,110,46,112, - 121,218,8,60,109,111,100,117,108,101,62,114,17,0,0,0, + 121,218,8,60,109,111,100,117,108,101,62,114,21,0,0,0, 1,0,0,0,115,94,0,0,0,240,3,1,1,1,243,8, 0,1,11,219,0,24,225,0,5,208,6,26,212,0,27,217, 0,5,128,106,144,35,151,40,145,40,212,0,27,216,9,26, 215,9,38,210,9,38,211,9,40,168,24,209,9,50,128,6, 243,2,6,12,2,128,67,241,14,0,5,10,136,71,144,67, 144,53,152,2,152,54,160,35,153,59,152,45,208,10,40,214, - 4,41,243,15,6,12,2,114,15,0,0,0, + 4,41,243,15,6,12,2,114,19,0,0,0, }; From 9dd71033e8501085e045f65a9d1d1564d752b8f1 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Dec 2024 19:09:00 +0900 Subject: [PATCH 03/18] fix --- Programs/test_frozenmain.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index d82decbeca39c1..c936622c020e3c 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -12,26 +12,26 @@ unsigned char M_test_frozenmain[] = { 0,0,111,6,88,2,31,0,79,5,88,6,12,0,79,6, 88,5,88,6,2,0,0,0,12,0,47,4,49,1,0,0, 0,0,0,0,29,0,72,22,0,0,9,0,29,0,79,0, - 33,0,41,7,78,218,18,70,114,111,122,101,110,32,72,101, - 108,108,111,32,87,111,114,108,100,218,8,115,121,115,46,97, + 33,0,41,7,78,122,18,70,114,111,122,101,110,32,72,101, + 108,108,111,32,87,111,114,108,100,122,8,115,121,115,46,97, 114,103,118,218,6,99,111,110,102,105,103,41,5,218,12,112, 114,111,103,114,97,109,95,110,97,109,101,218,10,101,120,101, 99,117,116,97,98,108,101,218,15,117,115,101,95,101,110,118, 105,114,111,110,109,101,110,116,218,17,99,111,110,102,105,103, 117,114,101,95,99,95,115,116,100,105,111,218,14,98,117,102, - 102,101,114,101,100,95,115,116,100,105,111,218,7,99,111,110, - 102,105,103,32,218,2,58,32,41,7,218,3,115,121,115,218, + 102,101,114,101,100,95,115,116,100,105,111,122,7,99,111,110, + 102,105,103,32,122,2,58,32,41,7,218,3,115,121,115,218, 17,95,116,101,115,116,105,110,116,101,114,110,97,108,99,97, 112,105,218,5,112,114,105,110,116,218,4,97,114,103,118,218, - 11,103,101,116,95,99,111,110,102,105,103,115,114,4,0,0, + 11,103,101,116,95,99,111,110,102,105,103,115,114,2,0,0, 0,218,3,107,101,121,169,0,243,0,0,0,0,218,18,116, 101,115,116,95,102,114,111,122,101,110,109,97,105,110,46,112, - 121,218,8,60,109,111,100,117,108,101,62,114,21,0,0,0, + 121,218,8,60,109,111,100,117,108,101,62,114,17,0,0,0, 1,0,0,0,115,94,0,0,0,240,3,1,1,1,243,8, 0,1,11,219,0,24,225,0,5,208,6,26,212,0,27,217, 0,5,128,106,144,35,151,40,145,40,212,0,27,216,9,26, 215,9,38,210,9,38,211,9,40,168,24,209,9,50,128,6, 243,2,6,12,2,128,67,241,14,0,5,10,136,71,144,67, 144,53,152,2,152,54,160,35,153,59,152,45,208,10,40,214, - 4,41,243,15,6,12,2,114,19,0,0,0, + 4,41,243,15,6,12,2,114,15,0,0,0, }; From 2953fb449de06b428632b6d74ee792625d418655 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Dec 2024 19:44:57 +0900 Subject: [PATCH 04/18] fix --- Objects/typeobject.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index aeaeed1a57322c..92e7bf7d46fe8d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5723,6 +5723,9 @@ _PyType_GetItemFromCache(PyTypeObject *type) BEGIN_TYPE_LOCK(); PyHeapTypeObject *ht = (PyHeapTypeObject *)type; res = ht->_spec_cache.getitem; + if (res == NULL || !PyFunction_Check(res)) { + res = NULL; + } END_TYPE_LOCK(); return res; } @@ -5737,6 +5740,10 @@ _PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version) if (res == NULL) { goto end; } + if (!PyFunction_Check(res)) { + res = NULL; + goto end; + } *version = ht->_spec_cache.getitem_version; end: END_TYPE_LOCK(); From a059e688e05b3bd0f4d15c0abc9623ed00db5d1b Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Dec 2024 20:09:37 +0900 Subject: [PATCH 05/18] Fix for windows build --- Include/internal/pycore_typeobject.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index abb94248526556..c54ec010627a18 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -277,9 +277,9 @@ typedef int (*_py_validate_type)(PyTypeObject *); // It will verify the ``ty`` through user-defined validation function ``validate``, // and if the validation is passed, it will set the ``tp_version`` as valid // tp_version_tag from the ``ty``. -extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); extern PyObject *_PyType_GetItemFromCache(PyTypeObject *type); -extern PyObject *_PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version); +PyAPI_FUNC(int) _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); +PyAPI_FUNC(PyObject *) _PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version); extern int _PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor); #ifdef __cplusplus From d0a9f07ecf6e0f0dcb0749e80925bb588d07c976 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Dec 2024 20:26:58 +0900 Subject: [PATCH 06/18] Revert "Fix for windows build" This reverts commit a059e688e05b3bd0f4d15c0abc9623ed00db5d1b. --- Include/internal/pycore_typeobject.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index c54ec010627a18..abb94248526556 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -277,9 +277,9 @@ typedef int (*_py_validate_type)(PyTypeObject *); // It will verify the ``ty`` through user-defined validation function ``validate``, // and if the validation is passed, it will set the ``tp_version`` as valid // tp_version_tag from the ``ty``. +extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); extern PyObject *_PyType_GetItemFromCache(PyTypeObject *type); -PyAPI_FUNC(int) _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); -PyAPI_FUNC(PyObject *) _PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version); +extern PyObject *_PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version); extern int _PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor); #ifdef __cplusplus From 55aa2407ec7d602f790743dbc89e2401ee9cdff3 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 16 Dec 2024 00:34:19 +0900 Subject: [PATCH 07/18] Address partial review --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_typeobject.h | 4 +-- Include/internal/pycore_uop_metadata.h | 4 +-- Objects/typeobject.c | 40 +---------------------- Python/bytecodes.c | 8 +++-- Python/executor_cases.c.h | 12 +++---- Python/generated_cases.c.h | 12 +++---- Python/specialize.c | 4 ++- 8 files changed, 23 insertions(+), 63 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1b0874b24f8bf3..81dde66a6f26c2 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1951,7 +1951,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index abb94248526556..3527c680738265 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -278,9 +278,7 @@ typedef int (*_py_validate_type)(PyTypeObject *); // and if the validation is passed, it will set the ``tp_version`` as valid // tp_version_tag from the ``ty``. extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); -extern PyObject *_PyType_GetItemFromCache(PyTypeObject *type); -extern PyObject *_PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version); -extern int _PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor); +extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor); #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index d95be4beb59d5a..7107149afd3abc 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -88,8 +88,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_SUBSCR_CHECK_FUNC] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_SUBSCR_INIT_CALL] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SUBSCR_CHECK_FUNC] = HAS_DEOPT_FLAG, + [_BINARY_SUBSCR_INIT_CALL] = HAS_DEOPT_FLAG, [_LIST_APPEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 92e7bf7d46fe8d..103a596f171b18 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5693,16 +5693,12 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, } int -_PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor) +_PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor) { int ret = 0; - if (!type) { - return -1; - } BEGIN_TYPE_LOCK(); // This pointer is invalidated by PyType_Modified (see the comment on // struct _specialization_cache): - PyHeapTypeObject *ht = (PyHeapTypeObject *)type; PyFunctionObject *func = (PyFunctionObject *)descriptor; uint32_t version = _PyFunction_GetVersionForCurrentState(func); if (!_PyFunction_IsVersionValid(version)) { @@ -5716,40 +5712,6 @@ _PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor) return ret; } -PyObject * -_PyType_GetItemFromCache(PyTypeObject *type) -{ - PyObject *res = NULL; - BEGIN_TYPE_LOCK(); - PyHeapTypeObject *ht = (PyHeapTypeObject *)type; - res = ht->_spec_cache.getitem; - if (res == NULL || !PyFunction_Check(res)) { - res = NULL; - } - END_TYPE_LOCK(); - return res; -} - -PyObject * -_PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version) -{ - PyObject *res = NULL; - BEGIN_TYPE_LOCK(); - PyHeapTypeObject *ht = (PyHeapTypeObject *)type; - res = ht->_spec_cache.getitem; - if (res == NULL) { - goto end; - } - if (!PyFunction_Check(res)) { - res = NULL; - goto end; - } - *version = ht->_spec_cache.getitem_version; -end: - END_TYPE_LOCK(); - return res; -} - static void set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 56620911486c60..e85ee77a24ba38 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -867,10 +867,11 @@ dummy_func( op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) { PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)); - uint32_t cached_version; - PyObject *getitem = _PyType_GetItemFromCacheWithVersion(tp, &cached_version); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); DEOPT_IF(getitem == NULL); assert(PyFunction_Check(getitem)); + uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version); PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); @@ -880,7 +881,8 @@ dummy_func( op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) { PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyObject *getitem = _PyType_GetItemFromCache(tp); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); DEOPT_IF(getitem == NULL); new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); new_frame->localsplus[0] = container; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 68c8cb170b7ac2..95d8cfa362a81b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1131,15 +1131,14 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - uint32_t cached_version; - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *getitem = _PyType_GetItemFromCacheWithVersion(tp, &cached_version); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); if (getitem == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } assert(PyFunction_Check(getitem)); + uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); if (((PyFunctionObject *)getitem)->func_version != cached_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -1161,9 +1160,8 @@ sub = stack_pointer[-1]; container = stack_pointer[-2]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *getitem = _PyType_GetItemFromCache(tp); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); if (getitem == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f013e165161fe9..e67fbeccd3e347 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -517,12 +517,11 @@ container = stack_pointer[-2]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); - uint32_t cached_version; - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *getitem = _PyType_GetItemFromCacheWithVersion(tp, &cached_version); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); DEOPT_IF(getitem == NULL, BINARY_SUBSCR); assert(PyFunction_Check(getitem)); + uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); @@ -533,9 +532,8 @@ { sub = stack_pointer[-1]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *getitem = _PyType_GetItemFromCache(tp); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); DEOPT_IF(getitem == NULL, BINARY_SUBSCR); new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); new_frame->localsplus[0] = container; diff --git a/Python/specialize.c b/Python/specialize.c index a9c92ee655bd95..410799b7849354 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1780,7 +1780,9 @@ _Py_Specialize_BinarySubscr( SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); goto fail; } - if (_PyType_CacheGetItemForSpecialization(container_type, descriptor) < 0) { + + PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type; + if (_PyType_CacheGetItemForSpecialization(ht, descriptor) < 0) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; } From 245d5805eb253142dda2a1daeb1dc35fa13f4cc1 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 16 Dec 2024 00:42:21 +0900 Subject: [PATCH 08/18] fix --- Objects/typeobject.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 103a596f171b18..86c0363f8da5be 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5700,6 +5700,12 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor // This pointer is invalidated by PyType_Modified (see the comment on // struct _specialization_cache): PyFunctionObject *func = (PyFunctionObject *)descriptor; +#ifdef Py_GIL_DISABLED + if (!_PyObject_HasDeferredRefcount(func)) { + ret = -1; + goto end; + } +#endif uint32_t version = _PyFunction_GetVersionForCurrentState(func); if (!_PyFunction_IsVersionValid(version)) { ret = -1; From 58c2289ff3d7567d61a544394ca42b075b43f847 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 16 Dec 2024 00:45:20 +0900 Subject: [PATCH 09/18] fix --- Objects/typeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 86c0363f8da5be..223b9b03526d51 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5701,7 +5701,7 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor // struct _specialization_cache): PyFunctionObject *func = (PyFunctionObject *)descriptor; #ifdef Py_GIL_DISABLED - if (!_PyObject_HasDeferredRefcount(func)) { + if (!_PyObject_HasDeferredRefcount(descriptor)) { ret = -1; goto end; } From 23d5d49b262fcb51edab11edb5a13b7e5f53454e Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 16 Dec 2024 23:21:35 +0900 Subject: [PATCH 10/18] WIP --- Include/internal/pycore_typeobject.h | 2 +- Objects/typeobject.c | 25 +++++++++++-------------- Python/specialize.c | 19 ++++++++++++------- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 3527c680738265..e19298869ec207 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -278,7 +278,7 @@ typedef int (*_py_validate_type)(PyTypeObject *); // and if the validation is passed, it will set the ``tp_version`` as valid // tp_version_tag from the ``ty``. extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); -extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor); +extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t version); #ifdef __cplusplus } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 223b9b03526d51..e6229795738294 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5693,29 +5693,26 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, } int -_PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor) +_PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t version) { - int ret = 0; + if (!descriptor || !version) { + return 0; + } + int can_cache; BEGIN_TYPE_LOCK(); // This pointer is invalidated by PyType_Modified (see the comment on // struct _specialization_cache): PyFunctionObject *func = (PyFunctionObject *)descriptor; + can_cache = _PyFunction_GetVersionForCurrentState(func) == version; #ifdef Py_GIL_DISABLED - if (!_PyObject_HasDeferredRefcount(descriptor)) { - ret = -1; - goto end; - } + can_cache = can_cache && _PyObject_HasDeferredRefcount(descriptor); #endif - uint32_t version = _PyFunction_GetVersionForCurrentState(func); - if (!_PyFunction_IsVersionValid(version)) { - ret = -1; - goto end; + if (can_cache) { + FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); + FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); } - FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); - FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); -end: END_TYPE_LOCK(); - return ret; + return can_cache; } static void diff --git a/Python/specialize.c b/Python/specialize.c index 410799b7849354..7341a4f2b3ac15 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1763,10 +1763,12 @@ _Py_Specialize_BinarySubscr( specialized_op = BINARY_SUBSCR_DICT; goto success; } - PyObject *descriptor = _PyType_Lookup(container_type, &_Py_ID(__getitem__)); + unsigned int version; + PyObject *descriptor = _PyType_LookupRefAndVersion(container_type, &_Py_ID(__getitem__), &version); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE); + Py_DECREF(descriptor); goto fail; } PyFunctionObject *func = (PyFunctionObject *)descriptor; @@ -1774,25 +1776,28 @@ _Py_Specialize_BinarySubscr( int kind = function_kind(fcode); if (kind != SIMPLE_FUNCTION) { SPECIALIZATION_FAIL(BINARY_SUBSCR, kind); + Py_DECREF(descriptor); goto fail; } if (fcode->co_argcount != 2) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + Py_DECREF(descriptor); goto fail; } PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type; - if (_PyType_CacheGetItemForSpecialization(ht, descriptor) < 0) { - SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); - goto fail; - } /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OTHER); + Py_DECREF(descriptor); goto fail; } - specialized_op = BINARY_SUBSCR_GETITEM; - goto success; + if (_PyType_CacheGetItemForSpecialization(ht, descriptor, (uint32_t)version)) { + specialized_op = BINARY_SUBSCR_GETITEM; + Py_DECREF(descriptor); + goto success; + } + Py_DECREF(descriptor); } SPECIALIZATION_FAIL(BINARY_SUBSCR, binary_subscr_fail_kind(container_type, sub)); From 42ed55b8262a55a56e573210c83d6ba45d6d88e8 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 16 Dec 2024 23:41:57 +0900 Subject: [PATCH 11/18] fix --- Lib/test/test_opcache.py | 17 ++++++++++------- Objects/typeobject.c | 8 +++++--- Python/specialize.c | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index cc2de3b46725bc..213d3312ac18e0 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -609,7 +609,7 @@ def assert_races_do_not_crash( for writer in writers: writer.join() - @requires_specialization_ft + @requires_specialization def test_binary_subscr_getitem(self): def get_items(): class C: @@ -1245,6 +1245,14 @@ def f(o, n): f(test_obj, 1) self.assertEqual(test_obj.b, 0) +# gh-115999: BINARY_SUBSCR_GETITEM will only cache __getitem__ methods that +# are deferred. We only defer functions defined at the top-level. +class CGetItem: + def __init__(self, val): + self.val = val + def __getitem__(self, item): + return self.val + class TestSpecializer(TestBase): @@ -1521,12 +1529,7 @@ def binary_subscr_str_int(): self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR") def binary_subscr_getitems(): - class C: - def __init__(self, val): - self.val = val - def __getitem__(self, item): - return self.val - items = [C(i) for i in range(100)] + items = [CGetItem(i) for i in range(100)] for i in range(100): self.assertEqual(items[i][i], i) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e6229795738294..e8de2ed50732dc 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5693,17 +5693,19 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, } int -_PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t version) +_PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version) { - if (!descriptor || !version) { + if (!descriptor || !tp_version) { return 0; } int can_cache; BEGIN_TYPE_LOCK(); + can_cache = ((PyTypeObject*)ht)->tp_version_tag == tp_version; // This pointer is invalidated by PyType_Modified (see the comment on // struct _specialization_cache): PyFunctionObject *func = (PyFunctionObject *)descriptor; - can_cache = _PyFunction_GetVersionForCurrentState(func) == version; + uint32_t version = _PyFunction_GetVersionForCurrentState(func); + can_cache = _PyFunction_IsVersionValid(version); #ifdef Py_GIL_DISABLED can_cache = can_cache && _PyObject_HasDeferredRefcount(descriptor); #endif diff --git a/Python/specialize.c b/Python/specialize.c index 7341a4f2b3ac15..2397d55f003302 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1797,8 +1797,8 @@ _Py_Specialize_BinarySubscr( Py_DECREF(descriptor); goto success; } - Py_DECREF(descriptor); } + Py_XDECREF(descriptor); SPECIALIZATION_FAIL(BINARY_SUBSCR, binary_subscr_fail_kind(container_type, sub)); fail: From c555fcd6cb81a9edb8176cff26de0d02954a483d Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 16 Dec 2024 23:43:56 +0900 Subject: [PATCH 12/18] fix --- Include/internal/pycore_typeobject.h | 2 +- Python/specialize.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index e19298869ec207..581153344a8e05 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -278,7 +278,7 @@ typedef int (*_py_validate_type)(PyTypeObject *); // and if the validation is passed, it will set the ``tp_version`` as valid // tp_version_tag from the ``ty``. extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); -extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t version); +extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version); #ifdef __cplusplus } diff --git a/Python/specialize.c b/Python/specialize.c index 2397d55f003302..e43a1a73a177ef 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1763,8 +1763,8 @@ _Py_Specialize_BinarySubscr( specialized_op = BINARY_SUBSCR_DICT; goto success; } - unsigned int version; - PyObject *descriptor = _PyType_LookupRefAndVersion(container_type, &_Py_ID(__getitem__), &version); + unsigned int tp_version; + PyObject *descriptor = _PyType_LookupRefAndVersion(container_type, &_Py_ID(__getitem__), &tp_version); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE); @@ -1792,7 +1792,7 @@ _Py_Specialize_BinarySubscr( Py_DECREF(descriptor); goto fail; } - if (_PyType_CacheGetItemForSpecialization(ht, descriptor, (uint32_t)version)) { + if (_PyType_CacheGetItemForSpecialization(ht, descriptor, (uint32_t)tp_version)) { specialized_op = BINARY_SUBSCR_GETITEM; Py_DECREF(descriptor); goto success; From 7b72e7b8a38f88d185311c3254f7a86a8e6d964a Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 17 Dec 2024 00:45:30 +0900 Subject: [PATCH 13/18] fix --- Objects/typeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e8de2ed50732dc..9e10f257f9921b 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5705,7 +5705,7 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor // struct _specialization_cache): PyFunctionObject *func = (PyFunctionObject *)descriptor; uint32_t version = _PyFunction_GetVersionForCurrentState(func); - can_cache = _PyFunction_IsVersionValid(version); + can_cache = can_cache && _PyFunction_IsVersionValid(version); #ifdef Py_GIL_DISABLED can_cache = can_cache && _PyObject_HasDeferredRefcount(descriptor); #endif From f9509d32c7ffe5928eafe493adc3baabc9699645 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 17 Dec 2024 09:35:21 +0900 Subject: [PATCH 14/18] Address code review --- Include/internal/pycore_opcode_metadata.h | 4 +-- Include/internal/pycore_uop_metadata.h | 4 +-- Lib/test/test_opcache.py | 2 +- Python/bytecodes.c | 21 ++++++-------- Python/executor_cases.c.h | 34 +++++++++++------------ Python/generated_cases.c.h | 18 ++++++------ Python/optimizer_cases.c.h | 5 ++++ 7 files changed, 44 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 81dde66a6f26c2..d0c97be9f51582 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -994,7 +994,7 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { return 0; } case BINARY_SUBSCR: { - *effect = 0; + *effect = 1; return 0; } case BINARY_SUBSCR_DICT: { @@ -1002,7 +1002,7 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { return 0; } case BINARY_SUBSCR_GETITEM: { - *effect = 0; + *effect = 1; return 0; } case BINARY_SUBSCR_LIST_INT: { diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 7107149afd3abc..1109d33e1d4a8b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -89,7 +89,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SUBSCR_CHECK_FUNC] = HAS_DEOPT_FLAG, - [_BINARY_SUBSCR_INIT_CALL] = HAS_DEOPT_FLAG, + [_BINARY_SUBSCR_INIT_CALL] = 0, [_LIST_APPEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -720,7 +720,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _BINARY_SUBSCR_CHECK_FUNC: return 0; case _BINARY_SUBSCR_INIT_CALL: - return 2; + return 3; case _LIST_APPEND: return 1; case _SET_ADD: diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 213d3312ac18e0..84151a91107ba2 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1245,7 +1245,7 @@ def f(o, n): f(test_obj, 1) self.assertEqual(test_obj.b, 0) -# gh-115999: BINARY_SUBSCR_GETITEM will only cache __getitem__ methods that +# gh-127274: BINARY_SUBSCR_GETITEM will only cache __getitem__ methods that # are deferred. We only defer functions defined at the top-level. class CGetItem: def __init__(self, val): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e85ee77a24ba38..36be6b41cda4a4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -864,27 +864,24 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) { + op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused, getitem)) { PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); - DEOPT_IF(getitem == NULL); - assert(PyFunction_Check(getitem)); + PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem); + DEOPT_IF(getitem_o == NULL); + assert(PyFunction_Check(getitem_o)); uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); - DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version); - PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o); + getitem = PyStackRef_FromPyObjectNew(getitem_o); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); STAT_INC(BINARY_SUBSCR, hit); } - op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) { - PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); - DEOPT_IF(getitem == NULL); - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); + op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _PyInterpreterFrame* )) { + new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; INPUTS_DEAD(); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 95d8cfa362a81b..7f2d6816c51953 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1125,6 +1125,7 @@ case _BINARY_SUBSCR_CHECK_FUNC: { _PyStackRef container; + _PyStackRef getitem; container = stack_pointer[-2]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); if (!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)) { @@ -1132,46 +1133,45 @@ JUMP_TO_JUMP_TARGET(); } PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); - if (getitem == NULL) { + PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem); + if (getitem_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - assert(PyFunction_Check(getitem)); + assert(PyFunction_Check(getitem_o)); uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); - if (((PyFunctionObject *)getitem)->func_version != cached_version) { + if (((PyFunctionObject *)getitem_o)->func_version != cached_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o); + getitem = PyStackRef_FromPyObjectNew(getitem_o); assert(code->co_argcount == 2); if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } STAT_INC(BINARY_SUBSCR, hit); + stack_pointer[0] = getitem; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_SUBSCR_INIT_CALL: { + _PyStackRef getitem; _PyStackRef sub; _PyStackRef container; _PyInterpreterFrame *new_frame; - sub = stack_pointer[-1]; - container = stack_pointer[-2]; - PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); - if (getitem == NULL) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); - } - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); + getitem = stack_pointer[-1]; + sub = stack_pointer[-2]; + container = stack_pointer[-3]; + new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = 2 ; - stack_pointer[-2].bits = (uintptr_t)new_frame; - stack_pointer += -1; + stack_pointer[-3].bits = (uintptr_t)new_frame; + stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e67fbeccd3e347..831e36e626647a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -505,6 +505,7 @@ INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM); static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size"); _PyStackRef container; + _PyStackRef getitem; _PyStackRef sub; _PyInterpreterFrame *new_frame; /* Skip 1 cache entry */ @@ -518,12 +519,13 @@ PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); - DEOPT_IF(getitem == NULL, BINARY_SUBSCR); - assert(PyFunction_Check(getitem)); + PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem); + DEOPT_IF(getitem_o == NULL, BINARY_SUBSCR); + assert(PyFunction_Check(getitem_o)); uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); - DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR); - PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version, BINARY_SUBSCR); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o); + getitem = PyStackRef_FromPyObjectNew(getitem_o); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); @@ -531,11 +533,7 @@ // _BINARY_SUBSCR_INIT_CALL { sub = stack_pointer[-1]; - PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = FT_ATOMIC_LOAD_PTR_RELAXED(ht->_spec_cache.getitem); - DEOPT_IF(getitem == NULL, BINARY_SUBSCR); - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); + new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = 2 ; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index f77a5aa35bdf82..e720fe9a0c1a09 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -592,6 +592,11 @@ } case _BINARY_SUBSCR_CHECK_FUNC: { + _Py_UopsSymbol *getitem; + getitem = sym_new_not_null(ctx); + stack_pointer[0] = getitem; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } From e3227e0c008b18521271966a3e5de1b2f02692f0 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 17 Dec 2024 10:07:09 +0900 Subject: [PATCH 15/18] Update --- Programs/test_frozenmain.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index c936622c020e3c..4b15829a6e51ed 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,6 +1,6 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { - 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, + 227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0, 0,0,0,0,0,243,168,0,0,0,149,0,89,0,79,0, 70,0,111,0,89,0,79,0,70,1,111,1,88,2,31,0, 79,1,49,1,0,0,0,0,0,0,29,0,88,2,31,0, @@ -12,26 +12,26 @@ unsigned char M_test_frozenmain[] = { 0,0,111,6,88,2,31,0,79,5,88,6,12,0,79,6, 88,5,88,6,2,0,0,0,12,0,47,4,49,1,0,0, 0,0,0,0,29,0,72,22,0,0,9,0,29,0,79,0, - 33,0,41,7,78,122,18,70,114,111,122,101,110,32,72,101, - 108,108,111,32,87,111,114,108,100,122,8,115,121,115,46,97, + 33,0,41,7,78,218,18,70,114,111,122,101,110,32,72,101, + 108,108,111,32,87,111,114,108,100,218,8,115,121,115,46,97, 114,103,118,218,6,99,111,110,102,105,103,41,5,218,12,112, 114,111,103,114,97,109,95,110,97,109,101,218,10,101,120,101, 99,117,116,97,98,108,101,218,15,117,115,101,95,101,110,118, 105,114,111,110,109,101,110,116,218,17,99,111,110,102,105,103, 117,114,101,95,99,95,115,116,100,105,111,218,14,98,117,102, - 102,101,114,101,100,95,115,116,100,105,111,122,7,99,111,110, - 102,105,103,32,122,2,58,32,41,7,218,3,115,121,115,218, + 102,101,114,101,100,95,115,116,100,105,111,218,7,99,111,110, + 102,105,103,32,218,2,58,32,41,7,218,3,115,121,115,218, 17,95,116,101,115,116,105,110,116,101,114,110,97,108,99,97, 112,105,218,5,112,114,105,110,116,218,4,97,114,103,118,218, - 11,103,101,116,95,99,111,110,102,105,103,115,114,2,0,0, + 11,103,101,116,95,99,111,110,102,105,103,115,114,4,0,0, 0,218,3,107,101,121,169,0,243,0,0,0,0,218,18,116, 101,115,116,95,102,114,111,122,101,110,109,97,105,110,46,112, - 121,218,8,60,109,111,100,117,108,101,62,114,17,0,0,0, + 121,218,8,60,109,111,100,117,108,101,62,114,21,0,0,0, 1,0,0,0,115,94,0,0,0,240,3,1,1,1,243,8, 0,1,11,219,0,24,225,0,5,208,6,26,212,0,27,217, 0,5,128,106,144,35,151,40,145,40,212,0,27,216,9,26, 215,9,38,210,9,38,211,9,40,168,24,209,9,50,128,6, 243,2,6,12,2,128,67,241,14,0,5,10,136,71,144,67, 144,53,152,2,152,54,160,35,153,59,152,45,208,10,40,214, - 4,41,243,15,6,12,2,114,15,0,0,0, + 4,41,243,15,6,12,2,114,19,0,0,0, }; From 3aa9426a1946285f8039b0aa35948102cd50f87c Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 17 Dec 2024 10:58:01 +0900 Subject: [PATCH 16/18] fix --- Programs/test_frozenmain.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 4b15829a6e51ed..99b0fa48e01c8b 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -12,26 +12,26 @@ unsigned char M_test_frozenmain[] = { 0,0,111,6,88,2,31,0,79,5,88,6,12,0,79,6, 88,5,88,6,2,0,0,0,12,0,47,4,49,1,0,0, 0,0,0,0,29,0,72,22,0,0,9,0,29,0,79,0, - 33,0,41,7,78,218,18,70,114,111,122,101,110,32,72,101, - 108,108,111,32,87,111,114,108,100,218,8,115,121,115,46,97, + 33,0,41,7,78,122,18,70,114,111,122,101,110,32,72,101, + 108,108,111,32,87,111,114,108,100,122,8,115,121,115,46,97, 114,103,118,218,6,99,111,110,102,105,103,41,5,218,12,112, 114,111,103,114,97,109,95,110,97,109,101,218,10,101,120,101, 99,117,116,97,98,108,101,218,15,117,115,101,95,101,110,118, 105,114,111,110,109,101,110,116,218,17,99,111,110,102,105,103, 117,114,101,95,99,95,115,116,100,105,111,218,14,98,117,102, - 102,101,114,101,100,95,115,116,100,105,111,218,7,99,111,110, - 102,105,103,32,218,2,58,32,41,7,218,3,115,121,115,218, + 102,101,114,101,100,95,115,116,100,105,111,122,7,99,111,110, + 102,105,103,32,122,2,58,32,41,7,218,3,115,121,115,218, 17,95,116,101,115,116,105,110,116,101,114,110,97,108,99,97, 112,105,218,5,112,114,105,110,116,218,4,97,114,103,118,218, - 11,103,101,116,95,99,111,110,102,105,103,115,114,4,0,0, + 11,103,101,116,95,99,111,110,102,105,103,115,114,2,0,0, 0,218,3,107,101,121,169,0,243,0,0,0,0,218,18,116, 101,115,116,95,102,114,111,122,101,110,109,97,105,110,46,112, - 121,218,8,60,109,111,100,117,108,101,62,114,21,0,0,0, + 121,218,8,60,109,111,100,117,108,101,62,114,17,0,0,0, 1,0,0,0,115,94,0,0,0,240,3,1,1,1,243,8, 0,1,11,219,0,24,225,0,5,208,6,26,212,0,27,217, 0,5,128,106,144,35,151,40,145,40,212,0,27,216,9,26, 215,9,38,210,9,38,211,9,40,168,24,209,9,50,128,6, 243,2,6,12,2,128,67,241,14,0,5,10,136,71,144,67, 144,53,152,2,152,54,160,35,153,59,152,45,208,10,40,214, - 4,41,243,15,6,12,2,114,19,0,0,0, + 4,41,243,15,6,12,2,114,15,0,0,0, }; From 6ef74ac24043eee35bf7c35d1773d6b4464ea2a5 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 18 Dec 2024 12:08:25 +0900 Subject: [PATCH 17/18] Address code review --- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 36be6b41cda4a4..e11cb71bdc43ae 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -874,9 +874,9 @@ dummy_func( uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version); PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o); - getitem = PyStackRef_FromPyObjectNew(getitem_o); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); + getitem = PyStackRef_FromPyObjectNew(getitem_o); STAT_INC(BINARY_SUBSCR, hit); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7f2d6816c51953..efc4f65c7bd4d1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1145,12 +1145,12 @@ JUMP_TO_JUMP_TARGET(); } PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o); - getitem = PyStackRef_FromPyObjectNew(getitem_o); assert(code->co_argcount == 2); if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + getitem = PyStackRef_FromPyObjectNew(getitem_o); STAT_INC(BINARY_SUBSCR, hit); stack_pointer[0] = getitem; stack_pointer += 1; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 831e36e626647a..d9e46641777682 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -525,9 +525,9 @@ uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o); - getitem = PyStackRef_FromPyObjectNew(getitem_o); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); + getitem = PyStackRef_FromPyObjectNew(getitem_o); STAT_INC(BINARY_SUBSCR, hit); } // _BINARY_SUBSCR_INIT_CALL From 47b80b440f98378848d446f43d64b6a9b3d932c2 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Thu, 19 Dec 2024 03:46:43 +0900 Subject: [PATCH 18/18] Address code review --- Python/optimizer_bytecodes.c | 3 ++- Python/optimizer_cases.c.h | 11 +++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 42bdbd9ca8d0cd..bbb7d6008afa55 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -349,9 +349,10 @@ dummy_func(void) { GETLOCAL(this_instr->operand0) = res; } - op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _Py_UOpsAbstractFrame *)) { (void)container; (void)sub; + (void)getitem; new_frame = NULL; ctx->done = true; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index e720fe9a0c1a09..2657ec900710cb 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -601,17 +601,20 @@ } case _BINARY_SUBSCR_INIT_CALL: { + _Py_UopsSymbol *getitem; _Py_UopsSymbol *sub; _Py_UopsSymbol *container; _Py_UOpsAbstractFrame *new_frame; - sub = stack_pointer[-1]; - container = stack_pointer[-2]; + getitem = stack_pointer[-1]; + sub = stack_pointer[-2]; + container = stack_pointer[-3]; (void)container; (void)sub; + (void)getitem; new_frame = NULL; ctx->done = true; - stack_pointer[-2] = (_Py_UopsSymbol *)new_frame; - stack_pointer += -1; + stack_pointer[-3] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; }