Skip to content

Commit c128718

Browse files
[3.13] gh-121368: Fix seq lock memory ordering in _PyType_Lookup (GH-121388) (#121505)
The `_PySeqLock_EndRead` function needs an acquire fence to ensure that the load of the sequence happens after any loads within the read side critical section. The missing fence can trigger bugs on macOS arm64. Additionally, we need a release fence in `_PySeqLock_LockWrite` to ensure that the sequence update is visible before any modifications to the cache entry. (cherry picked from commit 1d3cf79) Co-authored-by: Sam Gross <colesbury@gmail.com>
1 parent eef5c64 commit c128718

File tree

9 files changed

+50
-16
lines changed

9 files changed

+50
-16
lines changed

Include/cpython/pyatomic.h

+3
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,9 @@ _Py_atomic_load_ssize_acquire(const Py_ssize_t *obj);
510510
// See https://en.cppreference.com/w/cpp/atomic/atomic_thread_fence
511511
static inline void _Py_atomic_fence_seq_cst(void);
512512

513+
// Acquire fence
514+
static inline void _Py_atomic_fence_acquire(void);
515+
513516
// Release fence
514517
static inline void _Py_atomic_fence_release(void);
515518

Include/cpython/pyatomic_gcc.h

+4
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,10 @@ static inline void
542542
_Py_atomic_fence_seq_cst(void)
543543
{ __atomic_thread_fence(__ATOMIC_SEQ_CST); }
544544

545+
static inline void
546+
_Py_atomic_fence_acquire(void)
547+
{ __atomic_thread_fence(__ATOMIC_ACQUIRE); }
548+
545549
static inline void
546550
_Py_atomic_fence_release(void)
547551
{ __atomic_thread_fence(__ATOMIC_RELEASE); }

Include/cpython/pyatomic_msc.h

+12
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,18 @@ _Py_atomic_fence_seq_cst(void)
10661066
#else
10671067
# error "no implementation of _Py_atomic_fence_seq_cst"
10681068
#endif
1069+
}
1070+
1071+
static inline void
1072+
_Py_atomic_fence_acquire(void)
1073+
{
1074+
#if defined(_M_ARM64)
1075+
__dmb(_ARM64_BARRIER_ISHLD);
1076+
#elif defined(_M_X64) || defined(_M_IX86)
1077+
_ReadBarrier();
1078+
#else
1079+
# error "no implementation of _Py_atomic_fence_acquire"
1080+
#endif
10691081
}
10701082

10711083
static inline void

Include/cpython/pyatomic_std.h

+7
Original file line numberDiff line numberDiff line change
@@ -961,6 +961,13 @@ _Py_atomic_fence_seq_cst(void)
961961
atomic_thread_fence(memory_order_seq_cst);
962962
}
963963

964+
static inline void
965+
_Py_atomic_fence_acquire(void)
966+
{
967+
_Py_USING_STD;
968+
atomic_thread_fence(memory_order_acquire);
969+
}
970+
964971
static inline void
965972
_Py_atomic_fence_release(void)
966973
{

Include/internal/pycore_lock.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,12 @@ PyAPI_FUNC(void) _PySeqLock_AbandonWrite(_PySeqLock *seqlock);
228228
PyAPI_FUNC(uint32_t) _PySeqLock_BeginRead(_PySeqLock *seqlock);
229229

230230
// End the read operation and confirm that the sequence number has not changed.
231-
// Returns 1 if the read was successful or 0 if the read should be re-tried.
232-
PyAPI_FUNC(uint32_t) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous);
231+
// Returns 1 if the read was successful or 0 if the read should be retried.
232+
PyAPI_FUNC(int) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous);
233233

234234
// Check if the lock was held during a fork and clear the lock. Returns 1
235-
// if the lock was held and any associated datat should be cleared.
236-
PyAPI_FUNC(uint32_t) _PySeqLock_AfterFork(_PySeqLock *seqlock);
235+
// if the lock was held and any associated data should be cleared.
236+
PyAPI_FUNC(int) _PySeqLock_AfterFork(_PySeqLock *seqlock);
237237

238238
#ifdef __cplusplus
239239
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix race condition in ``_PyType_Lookup`` in the free-threaded build due to
2+
a missing memory fence. This could lead to ``_PyType_Lookup`` returning
3+
incorrect results on arm64.

Modules/_testcapi/pyatomic.c

+1
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ test_atomic_fences(PyObject *self, PyObject *obj) {
125125
// Just make sure that the fences compile. We are not
126126
// testing any synchronizing ordering.
127127
_Py_atomic_fence_seq_cst();
128+
_Py_atomic_fence_acquire();
128129
_Py_atomic_fence_release();
129130
Py_RETURN_NONE;
130131
}

Objects/typeobject.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -5205,7 +5205,7 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
52055205
#ifdef Py_GIL_DISABLED
52065206
// synchronize-with other writing threads by doing an acquire load on the sequence
52075207
while (1) {
5208-
int sequence = _PySeqLock_BeginRead(&entry->sequence);
5208+
uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
52095209
uint32_t entry_version = _Py_atomic_load_uint32_relaxed(&entry->version);
52105210
uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag);
52115211
if (entry_version == type_version &&

Python/lock.c

+15-11
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ void _PySeqLock_LockWrite(_PySeqLock *seqlock)
514514
}
515515
else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) {
516516
// We've locked the cache
517+
_Py_atomic_fence_release();
517518
break;
518519
}
519520
else {
@@ -547,28 +548,31 @@ uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock)
547548
return sequence;
548549
}
549550

550-
uint32_t _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
551+
int _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
551552
{
552-
// Synchronize again and validate that the entry hasn't been updated
553-
// while we were readying the values.
554-
if (_Py_atomic_load_uint32_acquire(&seqlock->sequence) == previous) {
553+
// gh-121368: We need an explicit acquire fence here to ensure that
554+
// this load of the sequence number is not reordered before any loads
555+
// within the read lock.
556+
_Py_atomic_fence_acquire();
557+
558+
if (_Py_atomic_load_uint32_relaxed(&seqlock->sequence) == previous) {
555559
return 1;
556-
}
560+
}
557561

558-
_Py_yield();
559-
return 0;
562+
_Py_yield();
563+
return 0;
560564
}
561565

562-
uint32_t _PySeqLock_AfterFork(_PySeqLock *seqlock)
566+
int _PySeqLock_AfterFork(_PySeqLock *seqlock)
563567
{
564568
// Synchronize again and validate that the entry hasn't been updated
565569
// while we were readying the values.
566-
if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
570+
if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
567571
seqlock->sequence = 0;
568572
return 1;
569-
}
573+
}
570574

571-
return 0;
575+
return 0;
572576
}
573577

574578
#undef PyMutex_Lock

0 commit comments

Comments
 (0)