Skip to content

Commit 294e724

Browse files
authored
gh-117657: Fix data races reported by TSAN in some set methods (#120914)
Refactor the fast Unicode hash check into `_PyObject_HashFast` and use relaxed atomic loads in the free-threaded build. After this change, the TSAN doesn't report data races for this method.
1 parent 9bcb7d8 commit 294e724

File tree

6 files changed

+81
-121
lines changed

6 files changed

+81
-121
lines changed

Include/internal/pycore_object.h

+14
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,20 @@ _PyObject_IS_GC(PyObject *obj)
613613
&& (type->tp_is_gc == NULL || type->tp_is_gc(obj)));
614614
}
615615

616+
// Fast inlined version of PyObject_Hash()
617+
static inline Py_hash_t
618+
_PyObject_HashFast(PyObject *op)
619+
{
620+
if (PyUnicode_CheckExact(op)) {
621+
Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(
622+
_PyASCIIObject_CAST(op)->hash);
623+
if (hash != -1) {
624+
return hash;
625+
}
626+
}
627+
return PyObject_Hash(op);
628+
}
629+
616630
// Fast inlined version of PyType_IS_GC()
617631
#define _PyType_IS_GC(t) _PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC)
618632

Modules/_collectionsmodule.c

+3-6
Original file line numberDiff line numberDiff line change
@@ -2537,12 +2537,9 @@ _collections__count_elements_impl(PyObject *module, PyObject *mapping,
25372537
if (key == NULL)
25382538
break;
25392539

2540-
if (!PyUnicode_CheckExact(key) ||
2541-
(hash = _PyASCIIObject_CAST(key)->hash) == -1)
2542-
{
2543-
hash = PyObject_Hash(key);
2544-
if (hash == -1)
2545-
goto done;
2540+
hash = _PyObject_HashFast(key);
2541+
if (hash == -1) {
2542+
goto done;
25462543
}
25472544

25482545
oldval = _PyDict_GetItem_KnownHash(mapping, key, hash);

Objects/dictobject.c

+51-84
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ static inline Py_hash_t
433433
unicode_get_hash(PyObject *o)
434434
{
435435
assert(PyUnicode_CheckExact(o));
436-
return _PyASCIIObject_CAST(o)->hash;
436+
return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(o)->hash);
437437
}
438438

439439
/* Print summary info about the state of the optimized allocator */
@@ -2177,13 +2177,10 @@ dict_getitem(PyObject *op, PyObject *key, const char *warnmsg)
21772177
}
21782178
PyDictObject *mp = (PyDictObject *)op;
21792179

2180-
Py_hash_t hash;
2181-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2182-
hash = PyObject_Hash(key);
2183-
if (hash == -1) {
2184-
PyErr_FormatUnraisable(warnmsg);
2185-
return NULL;
2186-
}
2180+
Py_hash_t hash = _PyObject_HashFast(key);
2181+
if (hash == -1) {
2182+
PyErr_FormatUnraisable(warnmsg);
2183+
return NULL;
21872184
}
21882185

21892186
PyThreadState *tstate = _PyThreadState_GET();
@@ -2232,12 +2229,9 @@ _PyDict_LookupIndex(PyDictObject *mp, PyObject *key)
22322229
assert(PyDict_CheckExact((PyObject*)mp));
22332230
assert(PyUnicode_CheckExact(key));
22342231

2235-
Py_hash_t hash = unicode_get_hash(key);
2232+
Py_hash_t hash = _PyObject_HashFast(key);
22362233
if (hash == -1) {
2237-
hash = PyObject_Hash(key);
2238-
if (hash == -1) {
2239-
return -1;
2240-
}
2234+
return -1;
22412235
}
22422236

22432237
return _Py_dict_lookup(mp, key, hash, &value);
@@ -2308,14 +2302,10 @@ PyDict_GetItemRef(PyObject *op, PyObject *key, PyObject **result)
23082302
return -1;
23092303
}
23102304

2311-
Py_hash_t hash;
2312-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1)
2313-
{
2314-
hash = PyObject_Hash(key);
2315-
if (hash == -1) {
2316-
*result = NULL;
2317-
return -1;
2318-
}
2305+
Py_hash_t hash = _PyObject_HashFast(key);
2306+
if (hash == -1) {
2307+
*result = NULL;
2308+
return -1;
23192309
}
23202310

23212311
return _PyDict_GetItemRef_KnownHash((PyDictObject *)op, key, hash, result);
@@ -2327,13 +2317,10 @@ _PyDict_GetItemRef_Unicode_LockHeld(PyDictObject *op, PyObject *key, PyObject **
23272317
ASSERT_DICT_LOCKED(op);
23282318
assert(PyUnicode_CheckExact(key));
23292319

2330-
Py_hash_t hash;
2331-
if ((hash = unicode_get_hash(key)) == -1) {
2332-
hash = PyObject_Hash(key);
2333-
if (hash == -1) {
2334-
*result = NULL;
2335-
return -1;
2336-
}
2320+
Py_hash_t hash = _PyObject_HashFast(key);
2321+
if (hash == -1) {
2322+
*result = NULL;
2323+
return -1;
23372324
}
23382325

23392326
PyObject *value;
@@ -2367,12 +2354,9 @@ PyDict_GetItemWithError(PyObject *op, PyObject *key)
23672354
PyErr_BadInternalCall();
23682355
return NULL;
23692356
}
2370-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1)
2371-
{
2372-
hash = PyObject_Hash(key);
2373-
if (hash == -1) {
2374-
return NULL;
2375-
}
2357+
hash = _PyObject_HashFast(key);
2358+
if (hash == -1) {
2359+
return NULL;
23762360
}
23772361

23782362
#ifdef Py_GIL_DISABLED
@@ -2440,10 +2424,9 @@ _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
24402424
Py_hash_t hash;
24412425
PyObject *value;
24422426

2443-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2444-
hash = PyObject_Hash(key);
2445-
if (hash == -1)
2446-
return NULL;
2427+
hash = _PyObject_HashFast(key);
2428+
if (hash == -1) {
2429+
return NULL;
24472430
}
24482431

24492432
/* namespace 1: globals */
@@ -2468,14 +2451,11 @@ setitem_take2_lock_held(PyDictObject *mp, PyObject *key, PyObject *value)
24682451
assert(key);
24692452
assert(value);
24702453
assert(PyDict_Check(mp));
2471-
Py_hash_t hash;
2472-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2473-
hash = PyObject_Hash(key);
2474-
if (hash == -1) {
2475-
Py_DECREF(key);
2476-
Py_DECREF(value);
2477-
return -1;
2478-
}
2454+
Py_hash_t hash = _PyObject_HashFast(key);
2455+
if (hash == -1) {
2456+
Py_DECREF(key);
2457+
Py_DECREF(value);
2458+
return -1;
24792459
}
24802460

24812461
PyInterpreterState *interp = _PyInterpreterState_GET();
@@ -2624,12 +2604,10 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
26242604
int
26252605
PyDict_DelItem(PyObject *op, PyObject *key)
26262606
{
2627-
Py_hash_t hash;
26282607
assert(key);
2629-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2630-
hash = PyObject_Hash(key);
2631-
if (hash == -1)
2632-
return -1;
2608+
Py_hash_t hash = _PyObject_HashFast(key);
2609+
if (hash == -1) {
2610+
return -1;
26332611
}
26342612

26352613
return _PyDict_DelItem_KnownHash(op, key, hash);
@@ -2953,15 +2931,12 @@ pop_lock_held(PyObject *op, PyObject *key, PyObject **result)
29532931
return 0;
29542932
}
29552933

2956-
Py_hash_t hash;
2957-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2958-
hash = PyObject_Hash(key);
2959-
if (hash == -1) {
2960-
if (result) {
2961-
*result = NULL;
2962-
}
2963-
return -1;
2934+
Py_hash_t hash = _PyObject_HashFast(key);
2935+
if (hash == -1) {
2936+
if (result) {
2937+
*result = NULL;
29642938
}
2939+
return -1;
29652940
}
29662941
return _PyDict_Pop_KnownHash(dict, key, hash, result);
29672942
}
@@ -3293,10 +3268,9 @@ dict_subscript(PyObject *self, PyObject *key)
32933268
Py_hash_t hash;
32943269
PyObject *value;
32953270

3296-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
3297-
hash = PyObject_Hash(key);
3298-
if (hash == -1)
3299-
return NULL;
3271+
hash = _PyObject_HashFast(key);
3272+
if (hash == -1) {
3273+
return NULL;
33003274
}
33013275
ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value);
33023276
if (ix == DKIX_ERROR)
@@ -4183,10 +4157,9 @@ dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value)
41834157
Py_hash_t hash;
41844158
Py_ssize_t ix;
41854159

4186-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
4187-
hash = PyObject_Hash(key);
4188-
if (hash == -1)
4189-
return NULL;
4160+
hash = _PyObject_HashFast(key);
4161+
if (hash == -1) {
4162+
return NULL;
41904163
}
41914164
ix = _Py_dict_lookup_threadsafe(self, key, hash, &val);
41924165
if (ix == DKIX_ERROR)
@@ -4216,14 +4189,12 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
42164189
return -1;
42174190
}
42184191

4219-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
4220-
hash = PyObject_Hash(key);
4221-
if (hash == -1) {
4222-
if (result) {
4223-
*result = NULL;
4224-
}
4225-
return -1;
4192+
hash = _PyObject_HashFast(key);
4193+
if (hash == -1) {
4194+
if (result) {
4195+
*result = NULL;
42264196
}
4197+
return -1;
42274198
}
42284199

42294200
if (mp->ma_keys == Py_EMPTY_KEYS) {
@@ -4655,12 +4626,10 @@ static PyMethodDef mapp_methods[] = {
46554626
int
46564627
PyDict_Contains(PyObject *op, PyObject *key)
46574628
{
4658-
Py_hash_t hash;
4629+
Py_hash_t hash = _PyObject_HashFast(key);
46594630

4660-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
4661-
hash = PyObject_Hash(key);
4662-
if (hash == -1)
4663-
return -1;
4631+
if (hash == -1) {
4632+
return -1;
46644633
}
46654634

46664635
return _PyDict_Contains_KnownHash(op, key, hash);
@@ -6743,11 +6712,9 @@ int
67436712
_PyDict_SetItem_LockHeld(PyDictObject *dict, PyObject *name, PyObject *value)
67446713
{
67456714
if (value == NULL) {
6746-
Py_hash_t hash;
6747-
if (!PyUnicode_CheckExact(name) || (hash = unicode_get_hash(name)) == -1) {
6748-
hash = PyObject_Hash(name);
6749-
if (hash == -1)
6750-
return -1;
6715+
Py_hash_t hash = _PyObject_HashFast(name);
6716+
if (hash == -1) {
6717+
return -1;
67516718
}
67526719
return delitem_knownhash_lock_held((PyObject *)dict, name, hash);
67536720
} else {

Objects/setobject.c

+9-21
Original file line numberDiff line numberDiff line change
@@ -365,41 +365,29 @@ set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
365365
static int
366366
set_add_key(PySetObject *so, PyObject *key)
367367
{
368-
Py_hash_t hash;
369-
370-
if (!PyUnicode_CheckExact(key) ||
371-
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
372-
hash = PyObject_Hash(key);
373-
if (hash == -1)
374-
return -1;
368+
Py_hash_t hash = _PyObject_HashFast(key);
369+
if (hash == -1) {
370+
return -1;
375371
}
376372
return set_add_entry(so, key, hash);
377373
}
378374

379375
static int
380376
set_contains_key(PySetObject *so, PyObject *key)
381377
{
382-
Py_hash_t hash;
383-
384-
if (!PyUnicode_CheckExact(key) ||
385-
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
386-
hash = PyObject_Hash(key);
387-
if (hash == -1)
388-
return -1;
378+
Py_hash_t hash = _PyObject_HashFast(key);
379+
if (hash == -1) {
380+
return -1;
389381
}
390382
return set_contains_entry(so, key, hash);
391383
}
392384

393385
static int
394386
set_discard_key(PySetObject *so, PyObject *key)
395387
{
396-
Py_hash_t hash;
397-
398-
if (!PyUnicode_CheckExact(key) ||
399-
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
400-
hash = PyObject_Hash(key);
401-
if (hash == -1)
402-
return -1;
388+
Py_hash_t hash = _PyObject_HashFast(key);
389+
if (hash == -1) {
390+
return -1;
403391
}
404392
return set_discard_entry(so, key, hash);
405393
}

Objects/typeobject.c

+4-9
Original file line numberDiff line numberDiff line change
@@ -5251,15 +5251,10 @@ find_name_in_mro(PyTypeObject *type, PyObject *name, int *error)
52515251
{
52525252
ASSERT_TYPE_LOCK_HELD();
52535253

5254-
Py_hash_t hash;
5255-
if (!PyUnicode_CheckExact(name) ||
5256-
(hash = _PyASCIIObject_CAST(name)->hash) == -1)
5257-
{
5258-
hash = PyObject_Hash(name);
5259-
if (hash == -1) {
5260-
*error = -1;
5261-
return NULL;
5262-
}
5254+
Py_hash_t hash = _PyObject_HashFast(name);
5255+
if (hash == -1) {
5256+
*error = -1;
5257+
return NULL;
52635258
}
52645259

52655260
/* Look in tp_dict of types in MRO */

Tools/tsan/suppressions_free_threading.txt

-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ race_top:assign_version_tag
3030
race_top:insertdict
3131
race_top:lookup_tp_dict
3232
race_top:new_reference
33-
race_top:set_contains_key
3433
# https://gist.github.com/colesbury/d13d033f413b4ad07929d044bed86c35
3534
race_top:set_discard_entry
3635
race_top:_PyDict_CheckConsistency

0 commit comments

Comments
 (0)