Skip to content

Commit 6dbe49b

Browse files
mpageebonnal
authored andcommitted
pythongh-115999: Specialize LOAD_GLOBAL in free-threaded builds (python#126607)
Enable specialization of LOAD_GLOBAL in free-threaded builds. Thread-safety of specialization in free-threaded builds is provided by the following: A critical section is held on both the globals and builtins objects during specialization. This ensures we get an atomic view of both builtins and globals during specialization. Generation of new keys versions is made atomic in free-threaded builds. Existing helpers are used to atomically modify the opcode. Thread-safety of specialized instructions in free-threaded builds is provided by the following: Relaxed atomics are used when loading and storing dict keys versions. This avoids potential data races as the dict keys versions are read without holding the dictionary's per-object lock in version guards. Dicts keys objects are passed from keys version guards to the downstream uops. This ensures that we are loading from the correct offset in the keys object. Once a unicode key has been stored in a keys object for a combined dictionary in free-threaded builds, the offset that it is stored in will never be reused for a different key. Once the version guard passes, we know that we are reading from the correct offset. The dictionary read fast-path is used to read values from the dictionary once we know the correct offset.
1 parent 6beecf6 commit 6dbe49b

File tree

11 files changed

+222
-69
lines changed

11 files changed

+222
-69
lines changed

Include/internal/pycore_dict.h

+11
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,17 @@ extern PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *);
9090
extern uint32_t _PyDictKeys_GetVersionForCurrentState(
9191
PyInterpreterState *interp, PyDictKeysObject *dictkeys);
9292

93+
/* Gets a version number unique to the current state of the keys of dict, if possible.
94+
*
95+
* In free-threaded builds ensures that the dict can be used for lock-free
96+
* reads if a version was assigned.
97+
*
98+
* The caller must hold the per-object lock on dict.
99+
*
100+
* Returns the version number, or zero if it was not possible to get a version number. */
101+
extern uint32_t _PyDict_GetKeysVersionForCurrentState(
102+
PyInterpreterState *interp, PyDictObject *dict);
103+
93104
extern size_t _PyDict_KeysSize(PyDictKeysObject *keys);
94105

95106
extern void _PyDictKeys_DecRef(PyDictKeysObject *keys);

Include/internal/pycore_object.h

+15
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ extern "C" {
1414
#include "pycore_interp.h" // PyInterpreterState.gc
1515
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED
1616
#include "pycore_pystate.h" // _PyInterpreterState_GET()
17+
#include "pycore_stackref.h"
1718
#include "pycore_uniqueid.h" // _PyObject_ThreadIncrefSlow()
1819

1920
// This value is added to `ob_ref_shared` for objects that use deferred
@@ -595,6 +596,20 @@ _Py_TryIncrefCompare(PyObject **src, PyObject *op)
595596
return 1;
596597
}
597598

599+
static inline int
600+
_Py_TryIncrefCompareStackRef(PyObject **src, PyObject *op, _PyStackRef *out)
601+
{
602+
if (_Py_IsImmortal(op) || _PyObject_HasDeferredRefcount(op)) {
603+
*out = (_PyStackRef){ .bits = (intptr_t)op | Py_TAG_DEFERRED };
604+
return 1;
605+
}
606+
if (_Py_TryIncrefCompare(src, op)) {
607+
*out = PyStackRef_FromPyObjectSteal(op);
608+
return 1;
609+
}
610+
return 0;
611+
}
612+
598613
/* Loads and increfs an object from ptr, which may contain a NULL value.
599614
Safe with concurrent (atomic) updates to ptr.
600615
NOTE: The writer must set maybe-weakref on the stored object! */

Lib/test/test_opcache.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,6 @@ def count_args(self, *args):
546546

547547

548548
@threading_helper.requires_working_threading()
549-
@requires_specialization
550549
class TestRacesDoNotCrash(TestBase):
551550
# Careful with these. Bigger numbers have a higher chance of catching bugs,
552551
# but you can also burn through a *ton* of type/dict/function versions:
@@ -588,6 +587,7 @@ def assert_races_do_not_crash(
588587
for writer in writers:
589588
writer.join()
590589

590+
@requires_specialization
591591
def test_binary_subscr_getitem(self):
592592
def get_items():
593593
class C:
@@ -617,6 +617,7 @@ def write(items):
617617
opname = "BINARY_SUBSCR_GETITEM"
618618
self.assert_races_do_not_crash(opname, get_items, read, write)
619619

620+
@requires_specialization
620621
def test_binary_subscr_list_int(self):
621622
def get_items():
622623
items = []
@@ -640,6 +641,7 @@ def write(items):
640641
opname = "BINARY_SUBSCR_LIST_INT"
641642
self.assert_races_do_not_crash(opname, get_items, read, write)
642643

644+
@requires_specialization
643645
def test_for_iter_gen(self):
644646
def get_items():
645647
def g():
@@ -671,6 +673,7 @@ def write(items):
671673
opname = "FOR_ITER_GEN"
672674
self.assert_races_do_not_crash(opname, get_items, read, write)
673675

676+
@requires_specialization
674677
def test_for_iter_list(self):
675678
def get_items():
676679
items = []
@@ -692,6 +695,7 @@ def write(items):
692695
opname = "FOR_ITER_LIST"
693696
self.assert_races_do_not_crash(opname, get_items, read, write)
694697

698+
@requires_specialization
695699
def test_load_attr_class(self):
696700
def get_items():
697701
class C:
@@ -721,6 +725,7 @@ def write(items):
721725
opname = "LOAD_ATTR_CLASS"
722726
self.assert_races_do_not_crash(opname, get_items, read, write)
723727

728+
@requires_specialization
724729
def test_load_attr_getattribute_overridden(self):
725730
def get_items():
726731
class C:
@@ -750,6 +755,7 @@ def write(items):
750755
opname = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN"
751756
self.assert_races_do_not_crash(opname, get_items, read, write)
752757

758+
@requires_specialization
753759
def test_load_attr_instance_value(self):
754760
def get_items():
755761
class C:
@@ -773,6 +779,7 @@ def write(items):
773779
opname = "LOAD_ATTR_INSTANCE_VALUE"
774780
self.assert_races_do_not_crash(opname, get_items, read, write)
775781

782+
@requires_specialization
776783
def test_load_attr_method_lazy_dict(self):
777784
def get_items():
778785
class C(Exception):
@@ -802,6 +809,7 @@ def write(items):
802809
opname = "LOAD_ATTR_METHOD_LAZY_DICT"
803810
self.assert_races_do_not_crash(opname, get_items, read, write)
804811

812+
@requires_specialization
805813
def test_load_attr_method_no_dict(self):
806814
def get_items():
807815
class C:
@@ -832,6 +840,7 @@ def write(items):
832840
opname = "LOAD_ATTR_METHOD_NO_DICT"
833841
self.assert_races_do_not_crash(opname, get_items, read, write)
834842

843+
@requires_specialization
835844
def test_load_attr_method_with_values(self):
836845
def get_items():
837846
class C:
@@ -861,6 +870,7 @@ def write(items):
861870
opname = "LOAD_ATTR_METHOD_WITH_VALUES"
862871
self.assert_races_do_not_crash(opname, get_items, read, write)
863872

873+
@requires_specialization
864874
def test_load_attr_module(self):
865875
def get_items():
866876
items = []
@@ -885,6 +895,7 @@ def write(items):
885895
opname = "LOAD_ATTR_MODULE"
886896
self.assert_races_do_not_crash(opname, get_items, read, write)
887897

898+
@requires_specialization
888899
def test_load_attr_property(self):
889900
def get_items():
890901
class C:
@@ -914,6 +925,7 @@ def write(items):
914925
opname = "LOAD_ATTR_PROPERTY"
915926
self.assert_races_do_not_crash(opname, get_items, read, write)
916927

928+
@requires_specialization
917929
def test_load_attr_with_hint(self):
918930
def get_items():
919931
class C:
@@ -940,6 +952,7 @@ def write(items):
940952
opname = "LOAD_ATTR_WITH_HINT"
941953
self.assert_races_do_not_crash(opname, get_items, read, write)
942954

955+
@requires_specialization_ft
943956
def test_load_global_module(self):
944957
def get_items():
945958
items = []
@@ -961,6 +974,7 @@ def write(items):
961974
opname, get_items, read, write, check_items=True
962975
)
963976

977+
@requires_specialization
964978
def test_store_attr_instance_value(self):
965979
def get_items():
966980
class C:
@@ -983,6 +997,7 @@ def write(items):
983997
opname = "STORE_ATTR_INSTANCE_VALUE"
984998
self.assert_races_do_not_crash(opname, get_items, read, write)
985999

1000+
@requires_specialization
9861001
def test_store_attr_with_hint(self):
9871002
def get_items():
9881003
class C:
@@ -1008,6 +1023,7 @@ def write(items):
10081023
opname = "STORE_ATTR_WITH_HINT"
10091024
self.assert_races_do_not_crash(opname, get_items, read, write)
10101025

1026+
@requires_specialization
10111027
def test_store_subscr_list_int(self):
10121028
def get_items():
10131029
items = []
@@ -1031,6 +1047,7 @@ def write(items):
10311047
opname = "STORE_SUBSCR_LIST_INT"
10321048
self.assert_races_do_not_crash(opname, get_items, read, write)
10331049

1050+
@requires_specialization
10341051
def test_unpack_sequence_list(self):
10351052
def get_items():
10361053
items = []

Objects/dictobject.c

+58-10
Original file line numberDiff line numberDiff line change
@@ -1285,6 +1285,20 @@ ensure_shared_on_resize(PyDictObject *mp)
12851285
#endif
12861286
}
12871287

1288+
static inline void
1289+
ensure_shared_on_keys_version_assignment(PyDictObject *mp)
1290+
{
1291+
ASSERT_DICT_LOCKED((PyObject *) mp);
1292+
#ifdef Py_GIL_DISABLED
1293+
if (!IS_DICT_SHARED(mp)) {
1294+
// This ensures that a concurrent resize operation will delay
1295+
// freeing the old keys or values using QSBR, which is necessary to
1296+
// safely allow concurrent reads without locking.
1297+
SET_DICT_SHARED(mp);
1298+
}
1299+
#endif
1300+
}
1301+
12881302
#ifdef Py_GIL_DISABLED
12891303

12901304
static inline Py_ALWAYS_INLINE int
@@ -1644,7 +1658,7 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp,
16441658
}
16451659

16461660
_PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value);
1647-
mp->ma_keys->dk_version = 0;
1661+
FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0);
16481662

16491663
Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
16501664
dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
@@ -1686,7 +1700,7 @@ insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash)
16861700
ix = unicodekeys_lookup_unicode(keys, key, hash);
16871701
if (ix == DKIX_EMPTY && keys->dk_usable > 0) {
16881702
// Insert into new slot
1689-
keys->dk_version = 0;
1703+
FT_ATOMIC_STORE_UINT32_RELAXED(keys->dk_version, 0);
16901704
Py_ssize_t hashpos = find_empty_slot(keys, hash);
16911705
ix = keys->dk_nentries;
16921706
dictkeys_set_index(keys, hashpos, ix);
@@ -2617,7 +2631,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
26172631
ASSERT_CONSISTENT(mp);
26182632
}
26192633
else {
2620-
mp->ma_keys->dk_version = 0;
2634+
FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0);
26212635
dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
26222636
if (DK_IS_UNICODE(mp->ma_keys)) {
26232637
PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix];
@@ -4429,7 +4443,7 @@ dict_popitem_impl(PyDictObject *self)
44294443
return NULL;
44304444
}
44314445
}
4432-
self->ma_keys->dk_version = 0;
4446+
FT_ATOMIC_STORE_UINT32_RELAXED(self->ma_keys->dk_version, 0);
44334447

44344448
/* Pop last item */
44354449
PyObject *key, *value;
@@ -7417,20 +7431,54 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys)
74177431
dictkeys_decref(interp, keys, false);
74187432
}
74197433

7420-
uint32_t _PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp,
7421-
PyDictKeysObject *dictkeys)
7434+
static inline uint32_t
7435+
get_next_dict_keys_version(PyInterpreterState *interp)
74227436
{
7423-
if (dictkeys->dk_version != 0) {
7424-
return dictkeys->dk_version;
7425-
}
7437+
#ifdef Py_GIL_DISABLED
7438+
uint32_t v;
7439+
do {
7440+
v = _Py_atomic_load_uint32_relaxed(
7441+
&interp->dict_state.next_keys_version);
7442+
if (v == 0) {
7443+
return 0;
7444+
}
7445+
} while (!_Py_atomic_compare_exchange_uint32(
7446+
&interp->dict_state.next_keys_version, &v, v + 1));
7447+
#else
74267448
if (interp->dict_state.next_keys_version == 0) {
74277449
return 0;
74287450
}
74297451
uint32_t v = interp->dict_state.next_keys_version++;
7430-
dictkeys->dk_version = v;
7452+
#endif
74317453
return v;
74327454
}
74337455

7456+
// In free-threaded builds the caller must ensure that the keys object is not
7457+
// being mutated concurrently by another thread.
7458+
uint32_t
7459+
_PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp,
7460+
PyDictKeysObject *dictkeys)
7461+
{
7462+
uint32_t dk_version = FT_ATOMIC_LOAD_UINT32_RELAXED(dictkeys->dk_version);
7463+
if (dk_version != 0) {
7464+
return dk_version;
7465+
}
7466+
dk_version = get_next_dict_keys_version(interp);
7467+
FT_ATOMIC_STORE_UINT32_RELAXED(dictkeys->dk_version, dk_version);
7468+
return dk_version;
7469+
}
7470+
7471+
uint32_t
7472+
_PyDict_GetKeysVersionForCurrentState(PyInterpreterState *interp,
7473+
PyDictObject *dict)
7474+
{
7475+
ASSERT_DICT_LOCKED((PyObject *) dict);
7476+
uint32_t dk_version =
7477+
_PyDictKeys_GetVersionForCurrentState(interp, dict->ma_keys);
7478+
ensure_shared_on_keys_version_assignment(dict);
7479+
return dk_version;
7480+
}
7481+
74347482
static inline int
74357483
validate_watcher_id(PyInterpreterState *interp, int watcher_id)
74367484
{

Objects/funcobject.c

+2
Original file line numberDiff line numberDiff line change
@@ -289,12 +289,14 @@ functions is running.
289289
290290
*/
291291

292+
#ifndef Py_GIL_DISABLED
292293
static inline struct _func_version_cache_item *
293294
get_cache_item(PyInterpreterState *interp, uint32_t version)
294295
{
295296
return interp->func_state.func_version_cache +
296297
(version % FUNC_VERSION_CACHE_SIZE);
297298
}
299+
#endif
298300

299301
void
300302
_PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)

0 commit comments

Comments
 (0)