From 56d3ed1b25882e0fe210a4d68f1d55805d61e302 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Fri, 20 Sep 2024 11:56:07 +0900 Subject: [PATCH 1/8] optimize PyType_GetBaseByToken() dedicatedly Unlike PyType_GetModuleByDef(), this uses PyType_HasFeature() for differentiation. --- Objects/typeobject.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 68e481f8e5163b..1501b825e824a9 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5297,7 +5297,7 @@ get_base_by_token_recursive(PyTypeObject *type, void *token) Py_ssize_t n = PyTuple_GET_SIZE(bases); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(bases, i)); - if (!_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { + if (!PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { continue; } if (((PyHeapTypeObject*)base)->ht_token == token) { @@ -5328,7 +5328,7 @@ get_base_by_token_from_mro(PyTypeObject *type, void *token) Py_ssize_t n = PyTuple_GET_SIZE(mro); for (Py_ssize_t i = 1; i < n; i++) { PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(mro, i)); - if (!_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { + if (!PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { continue; } if (((PyHeapTypeObject*)base)->ht_token == token) { @@ -5340,24 +5340,25 @@ get_base_by_token_from_mro(PyTypeObject *type, void *token) static int check_base_by_token(PyTypeObject *type, void *token) { - // Chain the branches, which will be optimized exclusive here if (token == NULL) { PyErr_Format(PyExc_SystemError, "PyType_GetBaseByToken called with token=NULL"); return -1; } - else if (!PyType_Check(type)) { + if (!PyType_Check(type)) { PyErr_Format(PyExc_TypeError, "expected a type, got a '%T' object", type); return -1; } - else if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + // Ensure the flag is checked by a function, not directly. PyType_Check() + // above also uses PyType_HasFeature() instead of the internal duplicate. + if (!PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { return 0; } - else if (((PyHeapTypeObject*)type)->ht_token == token) { + if (((PyHeapTypeObject*)type)->ht_token == token) { return 1; } - else if (type->tp_mro != NULL) { + if (type->tp_mro != NULL) { // This will not be inlined return get_base_by_token_from_mro(type, token) ? 1 : 0; } @@ -5374,14 +5375,15 @@ PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) // branches will become trivial to optimize. return check_base_by_token(type, token); } + PyTypeObject *base; + // Chain the branches below (not above), which will avoid being less + // pgoptimized by MSVC together with check_base_by_token() and other + // functions, unless they have if-else statements with similar conds. if (token == NULL || !PyType_Check(type)) { *result = NULL; return check_base_by_token(type, token); } - - // Chain the branches, which will be optimized exclusive here - PyTypeObject *base; - if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + else if (!PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // No static type has a heaptype superclass, // which is ensured by type_ready_mro(). *result = NULL; From f7db086bcbce5425ac13ace68eeec0c31fc23953 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Fri, 20 Sep 2024 12:00:47 +0900 Subject: [PATCH 2/8] update defdict_or() in _collectionsmodule.c --- Modules/_collectionsmodule.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index fbfed59995c21e..aef04248c7e73c 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -2179,6 +2179,8 @@ typedef struct { PyObject *default_factory; } defdictobject; +static PyType_Spec defdict_spec; + PyDoc_STRVAR(defdict_missing_doc, "__missing__(key) # Called by __getitem__ for missing key; pseudo-code:\n\ if self.default_factory is None: raise KeyError((key,))\n\ @@ -2358,23 +2360,16 @@ defdict_or(PyObject* left, PyObject* right) { PyObject *self, *other; - // Find module state - PyTypeObject *tp = Py_TYPE(left); - PyObject *mod = PyType_GetModuleByDef(tp, &_collectionsmodule); - if (mod == NULL) { - PyErr_Clear(); - tp = Py_TYPE(right); - mod = PyType_GetModuleByDef(tp, &_collectionsmodule); + int ret = PyType_GetBaseByToken(Py_TYPE(left), &defdict_spec, NULL); + if (ret < 0) { + return NULL; } - assert(mod != NULL); - collections_state *state = get_module_state(mod); - - if (PyObject_TypeCheck(left, state->defdict_type)) { + if (ret) { self = left; other = right; } else { - assert(PyObject_TypeCheck(right, state->defdict_type)); + assert(PyType_GetBaseByToken(Py_TYPE(right), &defdict_spec, NULL) == 1); self = right; other = left; } @@ -2454,6 +2449,7 @@ passed to the dict constructor, including keyword arguments.\n\ #define DEFERRED_ADDRESS(ADDR) 0 static PyType_Slot defdict_slots[] = { + {Py_tp_token, Py_TP_USE_SPEC}, {Py_tp_dealloc, defdict_dealloc}, {Py_tp_repr, defdict_repr}, {Py_nb_or, defdict_or}, From c93c010184d47f85fffd995a04f953e492b1eb9d Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Fri, 20 Sep 2024 12:10:35 +0900 Subject: [PATCH 3/8] replace _PyType_GetModuleByDef2() in _decimal.c This will not be inlined, but hopefully 4% faster on Windows PGO. The forceinline version was less efficient (2% faster). --- Modules/_decimal/_decimal.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index e99a96ab93392e..5c4e2657d9c67b 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -122,6 +122,7 @@ get_module_state(PyObject *mod) } static struct PyModuleDef _decimal_module; +static PyType_Spec dec_spec; static inline decimal_state * get_module_state_by_def(PyTypeObject *tp) @@ -134,10 +135,16 @@ get_module_state_by_def(PyTypeObject *tp) static inline decimal_state * find_state_left_or_right(PyObject *left, PyObject *right) { - PyObject *mod = _PyType_GetModuleByDef2(Py_TYPE(left), Py_TYPE(right), - &_decimal_module); - assert(mod != NULL); - return get_module_state(mod); + PyTypeObject *base; + if (PyType_GetBaseByToken(Py_TYPE(left), &dec_spec, &base) != 1) { + assert(!PyErr_Occurred()); + PyType_GetBaseByToken(Py_TYPE(right), &dec_spec, &base); + } + assert(base != NULL); + void *state = _PyType_GetModuleState(base); + assert(state != NULL); + Py_DECREF(base); + return (decimal_state *)state; } @@ -745,7 +752,7 @@ signaldict_richcompare(PyObject *v, PyObject *w, int op) { PyObject *res = Py_NotImplemented; - decimal_state *state = find_state_left_or_right(v, w); + decimal_state *state = get_module_state_by_def(Py_TYPE(v)); assert(PyDecSignalDict_Check(state, v)); if ((SdFlagAddr(v) == NULL) || (SdFlagAddr(w) == NULL)) { @@ -4653,7 +4660,7 @@ dec_richcompare(PyObject *v, PyObject *w, int op) uint32_t status = 0; int a_issnan, b_issnan; int r; - decimal_state *state = find_state_left_or_right(v, w); + decimal_state *state = get_module_state_by_def(Py_TYPE(v)); #ifdef Py_DEBUG assert(PyDec_Check(state, v)); @@ -5041,6 +5048,7 @@ static PyMethodDef dec_methods [] = }; static PyType_Slot dec_slots[] = { + {Py_tp_token, Py_TP_USE_SPEC}, {Py_tp_dealloc, dec_dealloc}, {Py_tp_getattro, PyObject_GenericGetAttr}, {Py_tp_traverse, dec_traverse}, From 967d197467ed2cdb05635a8d61927184f979c3d3 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Fri, 20 Sep 2024 19:31:20 +0900 Subject: [PATCH 4/8] leave dec_richcompare() as is PyType_GetModuleByDef() is now slower. --- Modules/_decimal/_decimal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 5c4e2657d9c67b..68d1da9faab867 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -4660,7 +4660,7 @@ dec_richcompare(PyObject *v, PyObject *w, int op) uint32_t status = 0; int a_issnan, b_issnan; int r; - decimal_state *state = get_module_state_by_def(Py_TYPE(v)); + decimal_state *state = find_state_left_or_right(v, w); #ifdef Py_DEBUG assert(PyDec_Check(state, v)); From 8c5fc034ecd95b52709d2e8dd1b8f88ffd9dca64 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Fri, 20 Sep 2024 20:15:45 +0900 Subject: [PATCH 5/8] keep using _PyType_HasFeature() in loops --- Objects/typeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 1501b825e824a9..aa95b284c29160 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5297,7 +5297,7 @@ get_base_by_token_recursive(PyTypeObject *type, void *token) Py_ssize_t n = PyTuple_GET_SIZE(bases); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(bases, i)); - if (!PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { + if (!_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { continue; } if (((PyHeapTypeObject*)base)->ht_token == token) { @@ -5328,7 +5328,7 @@ get_base_by_token_from_mro(PyTypeObject *type, void *token) Py_ssize_t n = PyTuple_GET_SIZE(mro); for (Py_ssize_t i = 1; i < n; i++) { PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(mro, i)); - if (!PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { + if (!_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { continue; } if (((PyHeapTypeObject*)base)->ht_token == token) { From af8b92c6e57115aafa0d54d94b96a4f642b0431b Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Mon, 23 Sep 2024 01:36:23 +0900 Subject: [PATCH 6/8] roll back typeobject.c to main --- Objects/typeobject.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index aa95b284c29160..68e481f8e5163b 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5340,25 +5340,24 @@ get_base_by_token_from_mro(PyTypeObject *type, void *token) static int check_base_by_token(PyTypeObject *type, void *token) { + // Chain the branches, which will be optimized exclusive here if (token == NULL) { PyErr_Format(PyExc_SystemError, "PyType_GetBaseByToken called with token=NULL"); return -1; } - if (!PyType_Check(type)) { + else if (!PyType_Check(type)) { PyErr_Format(PyExc_TypeError, "expected a type, got a '%T' object", type); return -1; } - // Ensure the flag is checked by a function, not directly. PyType_Check() - // above also uses PyType_HasFeature() instead of the internal duplicate. - if (!PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + else if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { return 0; } - if (((PyHeapTypeObject*)type)->ht_token == token) { + else if (((PyHeapTypeObject*)type)->ht_token == token) { return 1; } - if (type->tp_mro != NULL) { + else if (type->tp_mro != NULL) { // This will not be inlined return get_base_by_token_from_mro(type, token) ? 1 : 0; } @@ -5375,15 +5374,14 @@ PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) // branches will become trivial to optimize. return check_base_by_token(type, token); } - PyTypeObject *base; - // Chain the branches below (not above), which will avoid being less - // pgoptimized by MSVC together with check_base_by_token() and other - // functions, unless they have if-else statements with similar conds. if (token == NULL || !PyType_Check(type)) { *result = NULL; return check_base_by_token(type, token); } - else if (!PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + + // Chain the branches, which will be optimized exclusive here + PyTypeObject *base; + if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // No static type has a heaptype superclass, // which is ensured by type_ready_mro(). *result = NULL; From 643f758385216feda06b7bed4add5af89a66c2bb Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Wed, 25 Sep 2024 06:23:35 +0900 Subject: [PATCH 7/8] remove _PyType_GetModuleByDef2 form header --- Include/internal/pycore_typeobject.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index ca5a1e2adb4787..118bc98b35d5e3 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -209,7 +209,6 @@ extern PyObject * _PyType_GetBases(PyTypeObject *type); extern PyObject * _PyType_GetMRO(PyTypeObject *type); extern PyObject* _PyType_GetSubclasses(PyTypeObject *); extern int _PyType_HasSubclasses(PyTypeObject *); -PyAPI_FUNC(PyObject *) _PyType_GetModuleByDef2(PyTypeObject *, PyTypeObject *, PyModuleDef *); // Export for _testinternalcapi extension. PyAPI_FUNC(PyObject *) _PyType_GetSlotWrapperNames(void); From a4e712a7827fa7a116d25ade7bc358452e9c5726 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Wed, 25 Sep 2024 07:17:21 +0900 Subject: [PATCH 8/8] remove _PyType_GetModuleByDef2() impl --- Objects/typeobject.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 68e481f8e5163b..3368c1ef577d14 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5207,8 +5207,8 @@ PyType_GetModuleState(PyTypeObject *type) /* Get the module of the first superclass where the module has the * given PyModuleDef. */ -static inline PyObject * -get_module_by_def(PyTypeObject *type, PyModuleDef *def) +PyObject * +PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) { assert(PyType_Check(type)); @@ -5241,7 +5241,7 @@ get_module_by_def(PyTypeObject *type, PyModuleDef *def) Py_ssize_t n = PyTuple_GET_SIZE(mro); for (Py_ssize_t i = 1; i < n; i++) { PyObject *super = PyTuple_GET_ITEM(mro, i); - if(!_PyType_HasFeature((PyTypeObject *)super, Py_TPFLAGS_HEAPTYPE)) { + if (!_PyType_HasFeature((PyTypeObject *)super, Py_TPFLAGS_HEAPTYPE)) { // Static types in the MRO need to be skipped continue; } @@ -5254,37 +5254,14 @@ get_module_by_def(PyTypeObject *type, PyModuleDef *def) } } END_TYPE_LOCK(); - return res; -} -PyObject * -PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) -{ - PyObject *module = get_module_by_def(type, def); - if (module == NULL) { + if (res == NULL) { PyErr_Format( PyExc_TypeError, "PyType_GetModuleByDef: No superclass of '%s' has the given module", type->tp_name); } - return module; -} - -PyObject * -_PyType_GetModuleByDef2(PyTypeObject *left, PyTypeObject *right, - PyModuleDef *def) -{ - PyObject *module = get_module_by_def(left, def); - if (module == NULL) { - module = get_module_by_def(right, def); - if (module == NULL) { - PyErr_Format( - PyExc_TypeError, - "PyType_GetModuleByDef: No superclass of '%s' nor '%s' has " - "the given module", left->tp_name, right->tp_name); - } - } - return module; + return res; }