Skip to content

Commit 06fd745

Browse files
miss-islingtonaisk
andauthored
[3.13] gh-117657: Fix data races reported by TSAN in some set methods (GH-120914) (#121240)
Refactor the fast Unicode hash check into `_PyObject_HashFast` and use relaxed atomic loads in the free-threaded build. After this change, the TSAN doesn't report data races for this method. (cherry picked from commit 294e724) Co-authored-by: AN Long <aisk@users.noreply.github.com>
1 parent fc0b1cb commit 06fd745

File tree

6 files changed

+81
-121
lines changed

6 files changed

+81
-121
lines changed

Diff for: Include/internal/pycore_object.h

+14
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,20 @@ _PyObject_IS_GC(PyObject *obj)
628628
&& (type->tp_is_gc == NULL || type->tp_is_gc(obj)));
629629
}
630630

631+
// Fast inlined version of PyObject_Hash()
632+
static inline Py_hash_t
633+
_PyObject_HashFast(PyObject *op)
634+
{
635+
if (PyUnicode_CheckExact(op)) {
636+
Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(
637+
_PyASCIIObject_CAST(op)->hash);
638+
if (hash != -1) {
639+
return hash;
640+
}
641+
}
642+
return PyObject_Hash(op);
643+
}
644+
631645
// Fast inlined version of PyType_IS_GC()
632646
#define _PyType_IS_GC(t) _PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC)
633647

Diff for: Modules/_collectionsmodule.c

+3-6
Original file line numberDiff line numberDiff line change
@@ -2537,12 +2537,9 @@ _collections__count_elements_impl(PyObject *module, PyObject *mapping,
25372537
if (key == NULL)
25382538
break;
25392539

2540-
if (!PyUnicode_CheckExact(key) ||
2541-
(hash = _PyASCIIObject_CAST(key)->hash) == -1)
2542-
{
2543-
hash = PyObject_Hash(key);
2544-
if (hash == -1)
2545-
goto done;
2540+
hash = _PyObject_HashFast(key);
2541+
if (hash == -1) {
2542+
goto done;
25462543
}
25472544

25482545
oldval = _PyDict_GetItem_KnownHash(mapping, key, hash);

Diff for: Objects/dictobject.c

+51-84
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ static inline Py_hash_t
427427
unicode_get_hash(PyObject *o)
428428
{
429429
assert(PyUnicode_CheckExact(o));
430-
return _PyASCIIObject_CAST(o)->hash;
430+
return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(o)->hash);
431431
}
432432

433433
/* Print summary info about the state of the optimized allocator */
@@ -2170,13 +2170,10 @@ dict_getitem(PyObject *op, PyObject *key, const char *warnmsg)
21702170
}
21712171
PyDictObject *mp = (PyDictObject *)op;
21722172

2173-
Py_hash_t hash;
2174-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2175-
hash = PyObject_Hash(key);
2176-
if (hash == -1) {
2177-
PyErr_FormatUnraisable(warnmsg);
2178-
return NULL;
2179-
}
2173+
Py_hash_t hash = _PyObject_HashFast(key);
2174+
if (hash == -1) {
2175+
PyErr_FormatUnraisable(warnmsg);
2176+
return NULL;
21802177
}
21812178

21822179
PyThreadState *tstate = _PyThreadState_GET();
@@ -2225,12 +2222,9 @@ _PyDict_LookupIndex(PyDictObject *mp, PyObject *key)
22252222
assert(PyDict_CheckExact((PyObject*)mp));
22262223
assert(PyUnicode_CheckExact(key));
22272224

2228-
Py_hash_t hash = unicode_get_hash(key);
2225+
Py_hash_t hash = _PyObject_HashFast(key);
22292226
if (hash == -1) {
2230-
hash = PyObject_Hash(key);
2231-
if (hash == -1) {
2232-
return -1;
2233-
}
2227+
return -1;
22342228
}
22352229

22362230
return _Py_dict_lookup(mp, key, hash, &value);
@@ -2301,14 +2295,10 @@ PyDict_GetItemRef(PyObject *op, PyObject *key, PyObject **result)
23012295
return -1;
23022296
}
23032297

2304-
Py_hash_t hash;
2305-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1)
2306-
{
2307-
hash = PyObject_Hash(key);
2308-
if (hash == -1) {
2309-
*result = NULL;
2310-
return -1;
2311-
}
2298+
Py_hash_t hash = _PyObject_HashFast(key);
2299+
if (hash == -1) {
2300+
*result = NULL;
2301+
return -1;
23122302
}
23132303

23142304
return _PyDict_GetItemRef_KnownHash((PyDictObject *)op, key, hash, result);
@@ -2320,13 +2310,10 @@ _PyDict_GetItemRef_Unicode_LockHeld(PyDictObject *op, PyObject *key, PyObject **
23202310
ASSERT_DICT_LOCKED(op);
23212311
assert(PyUnicode_CheckExact(key));
23222312

2323-
Py_hash_t hash;
2324-
if ((hash = unicode_get_hash(key)) == -1) {
2325-
hash = PyObject_Hash(key);
2326-
if (hash == -1) {
2327-
*result = NULL;
2328-
return -1;
2329-
}
2313+
Py_hash_t hash = _PyObject_HashFast(key);
2314+
if (hash == -1) {
2315+
*result = NULL;
2316+
return -1;
23302317
}
23312318

23322319
PyObject *value;
@@ -2360,12 +2347,9 @@ PyDict_GetItemWithError(PyObject *op, PyObject *key)
23602347
PyErr_BadInternalCall();
23612348
return NULL;
23622349
}
2363-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1)
2364-
{
2365-
hash = PyObject_Hash(key);
2366-
if (hash == -1) {
2367-
return NULL;
2368-
}
2350+
hash = _PyObject_HashFast(key);
2351+
if (hash == -1) {
2352+
return NULL;
23692353
}
23702354

23712355
#ifdef Py_GIL_DISABLED
@@ -2433,10 +2417,9 @@ _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
24332417
Py_hash_t hash;
24342418
PyObject *value;
24352419

2436-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2437-
hash = PyObject_Hash(key);
2438-
if (hash == -1)
2439-
return NULL;
2420+
hash = _PyObject_HashFast(key);
2421+
if (hash == -1) {
2422+
return NULL;
24402423
}
24412424

24422425
/* namespace 1: globals */
@@ -2461,14 +2444,11 @@ setitem_take2_lock_held(PyDictObject *mp, PyObject *key, PyObject *value)
24612444
assert(key);
24622445
assert(value);
24632446
assert(PyDict_Check(mp));
2464-
Py_hash_t hash;
2465-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2466-
hash = PyObject_Hash(key);
2467-
if (hash == -1) {
2468-
Py_DECREF(key);
2469-
Py_DECREF(value);
2470-
return -1;
2471-
}
2447+
Py_hash_t hash = _PyObject_HashFast(key);
2448+
if (hash == -1) {
2449+
Py_DECREF(key);
2450+
Py_DECREF(value);
2451+
return -1;
24722452
}
24732453

24742454
PyInterpreterState *interp = _PyInterpreterState_GET();
@@ -2617,12 +2597,10 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
26172597
int
26182598
PyDict_DelItem(PyObject *op, PyObject *key)
26192599
{
2620-
Py_hash_t hash;
26212600
assert(key);
2622-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2623-
hash = PyObject_Hash(key);
2624-
if (hash == -1)
2625-
return -1;
2601+
Py_hash_t hash = _PyObject_HashFast(key);
2602+
if (hash == -1) {
2603+
return -1;
26262604
}
26272605

26282606
return _PyDict_DelItem_KnownHash(op, key, hash);
@@ -2946,15 +2924,12 @@ pop_lock_held(PyObject *op, PyObject *key, PyObject **result)
29462924
return 0;
29472925
}
29482926

2949-
Py_hash_t hash;
2950-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
2951-
hash = PyObject_Hash(key);
2952-
if (hash == -1) {
2953-
if (result) {
2954-
*result = NULL;
2955-
}
2956-
return -1;
2927+
Py_hash_t hash = _PyObject_HashFast(key);
2928+
if (hash == -1) {
2929+
if (result) {
2930+
*result = NULL;
29572931
}
2932+
return -1;
29582933
}
29592934
return _PyDict_Pop_KnownHash(dict, key, hash, result);
29602935
}
@@ -3285,10 +3260,9 @@ dict_subscript(PyObject *self, PyObject *key)
32853260
Py_hash_t hash;
32863261
PyObject *value;
32873262

3288-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
3289-
hash = PyObject_Hash(key);
3290-
if (hash == -1)
3291-
return NULL;
3263+
hash = _PyObject_HashFast(key);
3264+
if (hash == -1) {
3265+
return NULL;
32923266
}
32933267
ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value);
32943268
if (ix == DKIX_ERROR)
@@ -4172,10 +4146,9 @@ dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value)
41724146
Py_hash_t hash;
41734147
Py_ssize_t ix;
41744148

4175-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
4176-
hash = PyObject_Hash(key);
4177-
if (hash == -1)
4178-
return NULL;
4149+
hash = _PyObject_HashFast(key);
4150+
if (hash == -1) {
4151+
return NULL;
41794152
}
41804153
ix = _Py_dict_lookup_threadsafe(self, key, hash, &val);
41814154
if (ix == DKIX_ERROR)
@@ -4205,14 +4178,12 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
42054178
return -1;
42064179
}
42074180

4208-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
4209-
hash = PyObject_Hash(key);
4210-
if (hash == -1) {
4211-
if (result) {
4212-
*result = NULL;
4213-
}
4214-
return -1;
4181+
hash = _PyObject_HashFast(key);
4182+
if (hash == -1) {
4183+
if (result) {
4184+
*result = NULL;
42154185
}
4186+
return -1;
42164187
}
42174188

42184189
if (mp->ma_keys == Py_EMPTY_KEYS) {
@@ -4642,12 +4613,10 @@ static PyMethodDef mapp_methods[] = {
46424613
int
46434614
PyDict_Contains(PyObject *op, PyObject *key)
46444615
{
4645-
Py_hash_t hash;
4616+
Py_hash_t hash = _PyObject_HashFast(key);
46464617

4647-
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
4648-
hash = PyObject_Hash(key);
4649-
if (hash == -1)
4650-
return -1;
4618+
if (hash == -1) {
4619+
return -1;
46514620
}
46524621

46534622
return _PyDict_Contains_KnownHash(op, key, hash);
@@ -6727,11 +6696,9 @@ int
67276696
_PyDict_SetItem_LockHeld(PyDictObject *dict, PyObject *name, PyObject *value)
67286697
{
67296698
if (value == NULL) {
6730-
Py_hash_t hash;
6731-
if (!PyUnicode_CheckExact(name) || (hash = unicode_get_hash(name)) == -1) {
6732-
hash = PyObject_Hash(name);
6733-
if (hash == -1)
6734-
return -1;
6699+
Py_hash_t hash = _PyObject_HashFast(name);
6700+
if (hash == -1) {
6701+
return -1;
67356702
}
67366703
return delitem_knownhash_lock_held((PyObject *)dict, name, hash);
67376704
} else {

Diff for: Objects/setobject.c

+9-21
Original file line numberDiff line numberDiff line change
@@ -365,41 +365,29 @@ set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
365365
static int
366366
set_add_key(PySetObject *so, PyObject *key)
367367
{
368-
Py_hash_t hash;
369-
370-
if (!PyUnicode_CheckExact(key) ||
371-
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
372-
hash = PyObject_Hash(key);
373-
if (hash == -1)
374-
return -1;
368+
Py_hash_t hash = _PyObject_HashFast(key);
369+
if (hash == -1) {
370+
return -1;
375371
}
376372
return set_add_entry(so, key, hash);
377373
}
378374

379375
static int
380376
set_contains_key(PySetObject *so, PyObject *key)
381377
{
382-
Py_hash_t hash;
383-
384-
if (!PyUnicode_CheckExact(key) ||
385-
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
386-
hash = PyObject_Hash(key);
387-
if (hash == -1)
388-
return -1;
378+
Py_hash_t hash = _PyObject_HashFast(key);
379+
if (hash == -1) {
380+
return -1;
389381
}
390382
return set_contains_entry(so, key, hash);
391383
}
392384

393385
static int
394386
set_discard_key(PySetObject *so, PyObject *key)
395387
{
396-
Py_hash_t hash;
397-
398-
if (!PyUnicode_CheckExact(key) ||
399-
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
400-
hash = PyObject_Hash(key);
401-
if (hash == -1)
402-
return -1;
388+
Py_hash_t hash = _PyObject_HashFast(key);
389+
if (hash == -1) {
390+
return -1;
403391
}
404392
return set_discard_entry(so, key, hash);
405393
}

Diff for: Objects/typeobject.c

+4-9
Original file line numberDiff line numberDiff line change
@@ -5069,15 +5069,10 @@ find_name_in_mro(PyTypeObject *type, PyObject *name, int *error)
50695069
{
50705070
ASSERT_TYPE_LOCK_HELD();
50715071

5072-
Py_hash_t hash;
5073-
if (!PyUnicode_CheckExact(name) ||
5074-
(hash = _PyASCIIObject_CAST(name)->hash) == -1)
5075-
{
5076-
hash = PyObject_Hash(name);
5077-
if (hash == -1) {
5078-
*error = -1;
5079-
return NULL;
5080-
}
5072+
Py_hash_t hash = _PyObject_HashFast(name);
5073+
if (hash == -1) {
5074+
*error = -1;
5075+
return NULL;
50815076
}
50825077

50835078
/* Look in tp_dict of types in MRO */

Diff for: Tools/tsan/suppressions_free_threading.txt

-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ race_top:assign_version_tag
3030
race_top:insertdict
3131
race_top:lookup_tp_dict
3232
race_top:new_reference
33-
race_top:set_contains_key
3433
# https://gist.github.com/colesbury/d13d033f413b4ad07929d044bed86c35
3534
race_top:set_discard_entry
3635
race_top:_PyDict_CheckConsistency

0 commit comments

Comments
 (0)