Skip to content

Commit be60e30

Browse files
pythongh-122417: Implement per-thread heap type refcounts
The free-threaded build partially stores heap type reference counts in distributed manner in per-thread arrays. This avoids reference count contention when creating or destroying instances. Co-authored-by: Ken Jin <kenjin@python.org>
1 parent 490e0ad commit be60e30

13 files changed

+413
-71
lines changed

Include/cpython/object.h

+3
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,9 @@ typedef struct _heaptypeobject {
270270
PyObject *ht_module;
271271
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
272272
struct _specialization_cache _spec_cache; // For use by the specializer.
273+
#ifdef Py_GIL_DISABLED
274+
Py_ssize_t _ht_id; // ID used for thread-local refcounting
275+
#endif
273276
/* here are optional user slots, followed by the members. */
274277
} PyHeapTypeObject;
275278

Include/internal/pycore_gc.h

-4
Original file line numberDiff line numberDiff line change
@@ -381,10 +381,6 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
381381
extern void _Py_ScheduleGC(PyThreadState *tstate);
382382
extern void _Py_RunGC(PyThreadState *tstate);
383383

384-
#ifdef Py_GIL_DISABLED
385-
// gh-117783: Immortalize objects that use deferred reference counting
386-
extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp);
387-
#endif
388384

389385
#ifdef __cplusplus
390386
}

Include/internal/pycore_interp.h

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ extern "C" {
3535
#include "pycore_qsbr.h" // struct _qsbr_state
3636
#include "pycore_tstate.h" // _PyThreadStateImpl
3737
#include "pycore_tuple.h" // struct _Py_tuple_state
38+
#include "pycore_typeid.h" // struct _Py_type_id_pool
3839
#include "pycore_typeobject.h" // struct types_state
3940
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
4041
#include "pycore_warnings.h" // struct _warnings_runtime_state
@@ -220,6 +221,7 @@ struct _is {
220221
#if defined(Py_GIL_DISABLED)
221222
struct _mimalloc_interp_state mimalloc;
222223
struct _brc_state brc; // biased reference counting state
224+
struct _Py_type_id_pool type_ids;
223225
PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
224226
#endif
225227

Include/internal/pycore_object.h

+66-1
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,19 @@ extern "C" {
1414
#include "pycore_interp.h" // PyInterpreterState.gc
1515
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED
1616
#include "pycore_pystate.h" // _PyInterpreterState_GET()
17+
#include "pycore_typeid.h" // _PyType_IncrefSlow
1718

1819

1920
#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1)
2021

22+
// This value is added to `ob_ref_shared` for objects that use deferred
23+
// reference counting so that they are not immediately deallocated when the
24+
// non-deferred reference count drops to zero.
25+
//
26+
// The value is half the maximum shared refcount because the low two bits of
27+
// `ob_ref_shared` are used for flags.
28+
#define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8)
29+
2130
// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when
2231
// comparing the reference count to stay compatible with C extensions built
2332
// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF
@@ -280,6 +289,62 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp);
280289
extern void _PyObject_FiniState(PyInterpreterState *interp);
281290
extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);
282291

292+
static inline void
293+
_Py_INCREF_TYPE(PyTypeObject *type)
294+
{
295+
#ifndef Py_GIL_DISABLED
296+
Py_INCREF(type);
297+
#else
298+
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
299+
assert(_Py_IsImmortal(type));
300+
return;
301+
}
302+
303+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
304+
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;
305+
306+
Py_ssize_t ht_id = ht->_ht_id;
307+
if ((size_t)ht_id >= (size_t)tstate->types.size) {
308+
_PyType_IncrefSlow(ht);
309+
}
310+
else {
311+
# ifdef Py_REF_DEBUG
312+
_Py_INCREF_IncRefTotal();
313+
# endif
314+
_Py_INCREF_STAT_INC();
315+
tstate->types.refcounts[ht_id]++;
316+
}
317+
#endif
318+
}
319+
320+
static inline void
321+
_Py_DECREF_TYPE(PyTypeObject *type)
322+
{
323+
#ifndef Py_GIL_DISABLED
324+
Py_DECREF(type);
325+
#else
326+
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
327+
assert(_Py_IsImmortal(type));
328+
return;
329+
}
330+
331+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
332+
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;
333+
334+
Py_ssize_t ht_id = ht->_ht_id;
335+
if ((size_t)ht_id >= (size_t)tstate->types.size) {
336+
Py_DECREF(type);
337+
}
338+
else {
339+
# ifdef Py_REF_DEBUG
340+
_Py_DECREF_DecRefTotal();
341+
# endif
342+
_Py_DECREF_STAT_INC();
343+
tstate->types.refcounts[ht_id]--;
344+
}
345+
#endif
346+
}
347+
283348
/* Inline functions trading binary compatibility for speed:
284349
_PyObject_Init() is the fast version of PyObject_Init(), and
285350
_PyObject_InitVar() is the fast version of PyObject_InitVar().
@@ -291,7 +356,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj)
291356
assert(op != NULL);
292357
Py_SET_TYPE(op, typeobj);
293358
assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj));
294-
Py_INCREF(typeobj);
359+
_Py_INCREF_TYPE(typeobj);
295360
_Py_NewReference(op);
296361
}
297362

Include/internal/pycore_tstate.h

+10
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ typedef struct _PyThreadStateImpl {
3131
struct _mimalloc_thread_state mimalloc;
3232
struct _Py_freelists freelists;
3333
struct _brc_thread_state brc;
34+
struct {
35+
// The thread-local refcounts for heap type objects
36+
Py_ssize_t *refcounts;
37+
38+
// Size of the refcounts array.
39+
Py_ssize_t size;
40+
41+
// If set, don't use thread-local refcounts
42+
int is_finalized;
43+
} types;
3444
#endif
3545

3646
#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)

Include/internal/pycore_typeid.h

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#ifndef Py_INTERNAL_TYPEID_H
2+
#define Py_INTERNAL_TYPEID_H
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
#ifndef Py_BUILD_CORE
8+
# error "this header requires Py_BUILD_CORE define"
9+
#endif
10+
11+
#ifdef Py_GIL_DISABLED
12+
13+
// This contains code for allocating unique ids to heap type objects
14+
// and re-using those ids when the type is deallocated.
15+
//
16+
// The type ids are used to implement per-thread reference counts of
17+
// heap type objects to avoid contention on the reference count fields
18+
// of heap type objects. (Non-heap type objects are immortal, so contention
19+
// is not an issue.)
20+
//
21+
// Type id of -1 is used to indicate a type doesn't use thread-local
22+
// refcounting.
23+
//
24+
// Each entry implicitly represents a type id based on it's offset in the
25+
// table. Non-allocated entries form a free-list via the 'next' pointer.
26+
// Allocated entries store the corresponding PyTypeObject.
27+
typedef union _Py_type_id_entry {
28+
// Points to the next free type id, when part of the freelist
29+
union _Py_type_id_entry *next;
30+
31+
// Stores the type object when the id is assigned
32+
PyHeapTypeObject *type;
33+
} _Py_type_id_entry;
34+
35+
struct _Py_type_id_pool {
36+
PyMutex mutex;
37+
38+
// combined table of types with allocated type ids and unallocated
39+
// type ids.
40+
_Py_type_id_entry *table;
41+
42+
// Next entry to allocate inside 'table' or NULL
43+
_Py_type_id_entry *freelist;
44+
45+
// size of 'table'
46+
Py_ssize_t size;
47+
};
48+
49+
// Assigns the next id from the pool of type ids.
50+
extern void _PyType_AssignId(PyHeapTypeObject *type);
51+
52+
// Releases the allocated type id back to the pool.
53+
extern void _PyType_ReleaseId(PyHeapTypeObject *type);
54+
55+
// Merges the thread-local reference counts into the corresponding types.
56+
extern void _PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate);
57+
58+
// Like _PyType_MergeThreadLocalRefcounts, but also frees the thread-local
59+
// array of refcounts.
60+
extern void _PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate);
61+
62+
// Frees the interpreter's pool of type ids.
63+
extern void _PyType_FinalizeIdPool(PyInterpreterState *interp);
64+
65+
// Increfs the type, resizing the thread-local refcount array if necessary.
66+
PyAPI_FUNC(void) _PyType_IncrefSlow(PyHeapTypeObject *type);
67+
68+
#endif /* Py_GIL_DISABLED */
69+
70+
#ifdef __cplusplus
71+
}
72+
#endif
73+
#endif /* !Py_INTERNAL_TYPEID_H */

Lib/test/test_sys.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,7 @@ def delx(self): del self.__x
17101710
fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
17111711
s = vsize(fmt)
17121712
check(int, s)
1713+
typeid = 'n' if support.Py_GIL_DISABLED else ''
17131714
# class
17141715
s = vsize(fmt + # PyTypeObject
17151716
'4P' # PyAsyncMethods
@@ -1718,7 +1719,8 @@ def delx(self): del self.__x
17181719
'10P' # PySequenceMethods
17191720
'2P' # PyBufferProcs
17201721
'6P'
1721-
'1PIP' # Specializer cache
1722+
'1PIP' # Specializer cache
1723+
+ typeid # heap type id (free-threaded only)
17221724
)
17231725
class newstyleclass(object): pass
17241726
# Separate block for PyDictKeysObject with 8 keys and 5 entries

Makefile.pre.in

+2
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ PYTHON_OBJS= \
480480
Python/structmember.o \
481481
Python/symtable.o \
482482
Python/sysmodule.o \
483+
Python/typeid.o \
483484
Python/thread.o \
484485
Python/traceback.o \
485486
Python/tracemalloc.o \
@@ -1257,6 +1258,7 @@ PYTHON_HEADERS= \
12571258
$(srcdir)/Include/internal/pycore_tracemalloc.h \
12581259
$(srcdir)/Include/internal/pycore_tstate.h \
12591260
$(srcdir)/Include/internal/pycore_tuple.h \
1261+
$(srcdir)/Include/internal/pycore_typeid.h \
12601262
$(srcdir)/Include/internal/pycore_typeobject.h \
12611263
$(srcdir)/Include/internal/pycore_typevarobject.h \
12621264
$(srcdir)/Include/internal/pycore_ucnhash.h \

Objects/object.c

+1-9
Original file line numberDiff line numberDiff line change
@@ -2470,15 +2470,7 @@ _PyObject_SetDeferredRefcount(PyObject *op)
24702470
assert(_Py_IsOwnedByCurrentThread(op));
24712471
assert(op->ob_ref_shared == 0);
24722472
_PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED);
2473-
PyInterpreterState *interp = _PyInterpreterState_GET();
2474-
if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) {
2475-
// gh-117696: immortalize objects instead of using deferred reference
2476-
// counting for now.
2477-
_Py_SetImmortal(op);
2478-
return;
2479-
}
2480-
op->ob_ref_local += 1;
2481-
op->ob_ref_shared = _Py_REF_QUEUED;
2473+
op->ob_ref_shared = _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
24822474
#endif
24832475
}
24842476

Objects/typeobject.c

+17-5
Original file line numberDiff line numberDiff line change
@@ -2439,7 +2439,8 @@ subtype_dealloc(PyObject *self)
24392439
// Don't read type memory after calling basedealloc() since basedealloc()
24402440
// can deallocate the type and free its memory.
24412441
int type_needs_decref = (type->tp_flags & Py_TPFLAGS_HEAPTYPE
2442-
&& !(base->tp_flags & Py_TPFLAGS_HEAPTYPE));
2442+
&& !(base->tp_flags & Py_TPFLAGS_HEAPTYPE)
2443+
&& !_Py_IsImmortal(type));
24432444

24442445
assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0);
24452446

@@ -2452,7 +2453,7 @@ subtype_dealloc(PyObject *self)
24522453
reference counting. Only decref if the base type is not already a heap
24532454
allocated type. Otherwise, basedealloc should have decref'd it already */
24542455
if (type_needs_decref) {
2455-
Py_DECREF(type);
2456+
_Py_DECREF_TYPE(type);
24562457
}
24572458

24582459
/* Done */
@@ -2552,7 +2553,8 @@ subtype_dealloc(PyObject *self)
25522553
// Don't read type memory after calling basedealloc() since basedealloc()
25532554
// can deallocate the type and free its memory.
25542555
int type_needs_decref = (type->tp_flags & Py_TPFLAGS_HEAPTYPE
2555-
&& !(base->tp_flags & Py_TPFLAGS_HEAPTYPE));
2556+
&& !(base->tp_flags & Py_TPFLAGS_HEAPTYPE)
2557+
&& !(_Py_IsImmortal(type)));
25562558

25572559
assert(basedealloc);
25582560
basedealloc(self);
@@ -2562,7 +2564,7 @@ subtype_dealloc(PyObject *self)
25622564
reference counting. Only decref if the base type is not already a heap
25632565
allocated type. Otherwise, basedealloc should have decref'd it already */
25642566
if (type_needs_decref) {
2565-
Py_DECREF(type);
2567+
_Py_DECREF_TYPE(type);
25662568
}
25672569

25682570
endlabel:
@@ -3913,7 +3915,9 @@ type_new_alloc(type_new_ctx *ctx)
39133915
et->ht_module = NULL;
39143916
et->_ht_tpname = NULL;
39153917

3916-
_PyObject_SetDeferredRefcount((PyObject *)et);
3918+
#ifdef Py_GIL_DISABLED
3919+
_PyType_AssignId(et);
3920+
#endif
39173921

39183922
return type;
39193923
}
@@ -4965,6 +4969,11 @@ _PyType_FromMetaclass_impl(
49654969
type->tp_weaklistoffset = weaklistoffset;
49664970
type->tp_dictoffset = dictoffset;
49674971

4972+
#ifdef Py_GIL_DISABLED
4973+
// Assign a type id to enable thread-local refcounting
4974+
_PyType_AssignId(res);
4975+
#endif
4976+
49684977
/* Ready the type (which includes inheritance).
49694978
*
49704979
* After this call we should generally only touch up what's
@@ -5914,6 +5923,9 @@ type_dealloc(PyObject *self)
59145923
}
59155924
Py_XDECREF(et->ht_module);
59165925
PyMem_Free(et->_ht_tpname);
5926+
#ifdef Py_GIL_DISABLED
5927+
_PyType_ReleaseId(et);
5928+
#endif
59175929
Py_TYPE(type)->tp_free((PyObject *)type);
59185930
}
59195931

0 commit comments

Comments
 (0)