From ef7ded0b34a166fffbe5a35956c357a53f781b62 Mon Sep 17 00:00:00 2001 From: Jeremy Day Date: Fri, 24 Jan 2025 13:59:54 -0800 Subject: [PATCH 1/6] Switch Win32 pipes to PIPE_WAIT --- src/event/event_windows.c | 3 +++ src/io.c | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/event/event_windows.c b/src/event/event_windows.c index 94674a3bf..31613ca00 100644 --- a/src/event/event_windows.c +++ b/src/event/event_windows.c @@ -277,6 +277,9 @@ _dispatch_pipe_monitor_thread(void *context) char cBuffer[1]; DWORD dwNumberOfBytesTransferred; OVERLAPPED ov = {0}; + // Block on a 0-byte read; this will only resume when data is + // available in the pipe. The pipe must be PIPE_WAIT or this thread + // will spin. BOOL bSuccess = ReadFile(hPipe, cBuffer, /* nNumberOfBytesToRead */ 0, &dwNumberOfBytesTransferred, &ov); DWORD dwBytesAvailable; diff --git a/src/io.c b/src/io.c index 165f82ac6..3f1f79a97 100644 --- a/src/io.c +++ b/src/io.c @@ -1463,20 +1463,20 @@ _dispatch_fd_entry_create_with_fd(dispatch_fd_t fd, uintptr_t hash) int result = ioctlsocket((SOCKET)fd, (long)FIONBIO, &value); (void)dispatch_assume_zero(result); } else { - // Try to make writing nonblocking, although pipes not coming - // from Foundation.Pipe may not have FILE_WRITE_ATTRIBUTES. + // The _dispatch_pipe_monitor_thread expects pipes to be + // PIPE_WAIT and exploits this assumption by using a blocking + // 0-byte read as a synchronization mechanism. DWORD dwPipeMode = 0; if (GetNamedPipeHandleState((HANDLE)fd, &dwPipeMode, NULL, - NULL, NULL, NULL, 0) && !(dwPipeMode & PIPE_NOWAIT)) { - dwPipeMode |= PIPE_NOWAIT; + NULL, NULL, NULL, 0) && !(dwPipeMode & PIPE_WAIT)) { + dwPipeMode |= PIPE_WAIT; if (!SetNamedPipeHandleState((HANDLE)fd, &dwPipeMode, NULL, NULL)) { - // We may end up blocking on subsequent writes, but we - // don't have a good alternative. - // The WriteQuotaAvailable from NtQueryInformationFile - // erroneously returns 0 when there is a blocking read - // on the other end of the pipe. - _dispatch_fd_entry_debug("failed to set PIPE_NOWAIT", + // If setting the pipe to PIPE_WAIT fails, the + // monitoring thread will spin constantly, saturating + // a core, which is undesirable but non-fatal. + // The semantics will still be correct in this case. + _dispatch_fd_entry_debug("failed to set PIPE_WAIT", fd_entry); } } From 207a8162caa1a2ea0c22f0f2012affa25468fa3a Mon Sep 17 00:00:00 2001 From: Jeremy Day Date: Fri, 24 Jan 2025 14:21:30 -0800 Subject: [PATCH 2/6] Clamp write to bufsize-1 bytes to afford sentinel value for no-progress --- src/io.c | 39 +++++++++++++++++++++++++++++++++------ tests/dispatch_io_pipe.c | 7 ++++++- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/src/io.c b/src/io.c index 3f1f79a97..8290aec05 100644 --- a/src/io.c +++ b/src/io.c @@ -2548,13 +2548,40 @@ _dispatch_operation_perform(dispatch_operation_t op) NTSTATUS status = _dispatch_NtQueryInformationFile(hFile, &iosb, &fpli, sizeof(fpli), FilePipeLocalInformation); if (NT_SUCCESS(status)) { - // WriteQuotaAvailable is unreliable in the presence - // of a blocking reader, when it can return zero, so only - // account for it otherwise - if (fpli.WriteQuotaAvailable > 0) { - len = MIN(len, fpli.WriteQuotaAvailable); + // WriteQuotaAvailable is the free space in the output buffer + // that has not already been reserved for reading. In other words, + // WriteQuotaAvailable = + // OutboundQuota - WriteQuotaUsed - QueuedReadSize. + // It is not documented that QueuedReadSize is part of this + // calculation, but this behavior has been observed experimentally. + // Unfortunately, this means that it is not possible to distinguish + // between a full output buffer and a reader blocked waiting for a + // full buffer's worth of data. This is a problem because if the + // output buffer is full and no reader is waiting for data, then + // attempting to write to the buffer of a PIPE_WAIT, non- + // overlapped I/O pipe will block the dispatch queue thread. + // + // In order to work around this idiosyncrasy, we bound the size of + // the write to be OutboundQuota - 1. This affords us a sentinel value + // in WriteQuotaAvailable that can be used to detect if a reader is + // making progress or not. + // WriteQuotaAvailable = 0 => a reader is blocked waiting for data. + // WriteQuotaAvailable = 1 => the pipe has been written to, but no + // reader is making progress. + // When we detect that WriteQuotaAvailable == 1, we write 0 bytes to + // avoid blocking the dispatch queue thread. + if (fpli.WriteQuotaAvailable == 0) { + // This condition can only occur when we have a reader blocked + // waiting for data on the pipe. In this case, write a full + // buffer's worth of data (less one byte to preserve this + // sentinel value of WriteQuotaAvailable == 0). + len = MIN(len, fpli.OutboundQuota - 1); + } else { + // Subtract 1 from WriteQuotaAvailable to ensure we do not fill + // the pipe and preserve the sentinel value of + // WriteQuotaAvailable == 1. + len = MIN(len, fpli.WriteQuotaAvailable - 1); } - len = MIN(len, fpli.OutboundQuota); } OVERLAPPED ovlOverlapped = {}; diff --git a/tests/dispatch_io_pipe.c b/tests/dispatch_io_pipe.c index f94438483..50ddc8a87 100644 --- a/tests/dispatch_io_pipe.c +++ b/tests/dispatch_io_pipe.c @@ -404,7 +404,12 @@ test_dispatch_write(int kind, int delay) dispatch_group_t g = dispatch_group_create(); dispatch_group_enter(g); - const size_t bufsize = test_get_pipe_buffer_size(kind); + // The libdispatch implementation writes at most bufsize-1 bytes + // before requiring a reader to start making progress. Because + // these tests operate serially, the reader will not make progress + // until the write finishes, and a write of >= bufsize will not + // finish until the reader starts draining the pipe. + const size_t bufsize = test_get_pipe_buffer_size(kind) - 1; char *buf = calloc(bufsize, 1); assert(buf); From 9e1b3bbb30c34fcacb0dea3b10ae9f1a8da71086 Mon Sep 17 00:00:00 2001 From: Jeremy Day Date: Fri, 8 Aug 2025 11:30:10 -0700 Subject: [PATCH 3/6] Revert "IO: switch Win32 pipes to `PIPE_WAIT` and use a sentinel byte" --- src/event/event_windows.c | 3 -- src/io.c | 59 +++++++++++---------------------------- tests/dispatch_io_pipe.c | 7 +---- 3 files changed, 17 insertions(+), 52 deletions(-) diff --git a/src/event/event_windows.c b/src/event/event_windows.c index 31613ca00..94674a3bf 100644 --- a/src/event/event_windows.c +++ b/src/event/event_windows.c @@ -277,9 +277,6 @@ _dispatch_pipe_monitor_thread(void *context) char cBuffer[1]; DWORD dwNumberOfBytesTransferred; OVERLAPPED ov = {0}; - // Block on a 0-byte read; this will only resume when data is - // available in the pipe. The pipe must be PIPE_WAIT or this thread - // will spin. BOOL bSuccess = ReadFile(hPipe, cBuffer, /* nNumberOfBytesToRead */ 0, &dwNumberOfBytesTransferred, &ov); DWORD dwBytesAvailable; diff --git a/src/io.c b/src/io.c index 8290aec05..165f82ac6 100644 --- a/src/io.c +++ b/src/io.c @@ -1463,20 +1463,20 @@ _dispatch_fd_entry_create_with_fd(dispatch_fd_t fd, uintptr_t hash) int result = ioctlsocket((SOCKET)fd, (long)FIONBIO, &value); (void)dispatch_assume_zero(result); } else { - // The _dispatch_pipe_monitor_thread expects pipes to be - // PIPE_WAIT and exploits this assumption by using a blocking - // 0-byte read as a synchronization mechanism. + // Try to make writing nonblocking, although pipes not coming + // from Foundation.Pipe may not have FILE_WRITE_ATTRIBUTES. DWORD dwPipeMode = 0; if (GetNamedPipeHandleState((HANDLE)fd, &dwPipeMode, NULL, - NULL, NULL, NULL, 0) && !(dwPipeMode & PIPE_WAIT)) { - dwPipeMode |= PIPE_WAIT; + NULL, NULL, NULL, 0) && !(dwPipeMode & PIPE_NOWAIT)) { + dwPipeMode |= PIPE_NOWAIT; if (!SetNamedPipeHandleState((HANDLE)fd, &dwPipeMode, NULL, NULL)) { - // If setting the pipe to PIPE_WAIT fails, the - // monitoring thread will spin constantly, saturating - // a core, which is undesirable but non-fatal. - // The semantics will still be correct in this case. - _dispatch_fd_entry_debug("failed to set PIPE_WAIT", + // We may end up blocking on subsequent writes, but we + // don't have a good alternative. + // The WriteQuotaAvailable from NtQueryInformationFile + // erroneously returns 0 when there is a blocking read + // on the other end of the pipe. + _dispatch_fd_entry_debug("failed to set PIPE_NOWAIT", fd_entry); } } @@ -2548,40 +2548,13 @@ _dispatch_operation_perform(dispatch_operation_t op) NTSTATUS status = _dispatch_NtQueryInformationFile(hFile, &iosb, &fpli, sizeof(fpli), FilePipeLocalInformation); if (NT_SUCCESS(status)) { - // WriteQuotaAvailable is the free space in the output buffer - // that has not already been reserved for reading. In other words, - // WriteQuotaAvailable = - // OutboundQuota - WriteQuotaUsed - QueuedReadSize. - // It is not documented that QueuedReadSize is part of this - // calculation, but this behavior has been observed experimentally. - // Unfortunately, this means that it is not possible to distinguish - // between a full output buffer and a reader blocked waiting for a - // full buffer's worth of data. This is a problem because if the - // output buffer is full and no reader is waiting for data, then - // attempting to write to the buffer of a PIPE_WAIT, non- - // overlapped I/O pipe will block the dispatch queue thread. - // - // In order to work around this idiosyncrasy, we bound the size of - // the write to be OutboundQuota - 1. This affords us a sentinel value - // in WriteQuotaAvailable that can be used to detect if a reader is - // making progress or not. - // WriteQuotaAvailable = 0 => a reader is blocked waiting for data. - // WriteQuotaAvailable = 1 => the pipe has been written to, but no - // reader is making progress. - // When we detect that WriteQuotaAvailable == 1, we write 0 bytes to - // avoid blocking the dispatch queue thread. - if (fpli.WriteQuotaAvailable == 0) { - // This condition can only occur when we have a reader blocked - // waiting for data on the pipe. In this case, write a full - // buffer's worth of data (less one byte to preserve this - // sentinel value of WriteQuotaAvailable == 0). - len = MIN(len, fpli.OutboundQuota - 1); - } else { - // Subtract 1 from WriteQuotaAvailable to ensure we do not fill - // the pipe and preserve the sentinel value of - // WriteQuotaAvailable == 1. - len = MIN(len, fpli.WriteQuotaAvailable - 1); + // WriteQuotaAvailable is unreliable in the presence + // of a blocking reader, when it can return zero, so only + // account for it otherwise + if (fpli.WriteQuotaAvailable > 0) { + len = MIN(len, fpli.WriteQuotaAvailable); } + len = MIN(len, fpli.OutboundQuota); } OVERLAPPED ovlOverlapped = {}; diff --git a/tests/dispatch_io_pipe.c b/tests/dispatch_io_pipe.c index 50ddc8a87..f94438483 100644 --- a/tests/dispatch_io_pipe.c +++ b/tests/dispatch_io_pipe.c @@ -404,12 +404,7 @@ test_dispatch_write(int kind, int delay) dispatch_group_t g = dispatch_group_create(); dispatch_group_enter(g); - // The libdispatch implementation writes at most bufsize-1 bytes - // before requiring a reader to start making progress. Because - // these tests operate serially, the reader will not make progress - // until the write finishes, and a write of >= bufsize will not - // finish until the reader starts draining the pipe. - const size_t bufsize = test_get_pipe_buffer_size(kind) - 1; + const size_t bufsize = test_get_pipe_buffer_size(kind); char *buf = calloc(bufsize, 1); assert(buf); From 83d98f63415cdfd8024288a699f58f444c3f27b3 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Tue, 15 Jul 2025 10:41:35 -0700 Subject: [PATCH 4/6] event: ensure acquire release semantics for muxnote disposal While it is sufficient to use the relaxed ordering for the acquire semantics in the `_dispatch_muxnote_retain`, we need to use the acquire release semantics on the release in `_dispatch_muxnote_release` to ensure that any pending retains are not interrupted. This should hopefully alleviate the occasional crashes that have been observed with the muxnote reference counting. Fixes: #887, #844 --- src/event/event_windows.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/event/event_windows.c b/src/event/event_windows.c index 94674a3bf..af67dfd4c 100644 --- a/src/event/event_windows.c +++ b/src/event/event_windows.c @@ -260,8 +260,16 @@ _dispatch_muxnote_retain(dispatch_muxnote_t dmn) static void _dispatch_muxnote_release(dispatch_muxnote_t dmn) { - uintptr_t refcount = os_atomic_dec(&dmn->dmn_refcount, relaxed); + // We perform a minor optimization here - perform the decrement with + // release semantics. In the case that we are going to dispose of the + // value, we perform the acquire fence. This reduces the cost on the + // normal path by avoiding the acquire fence. This should be more + // beneficial on ARM64, as X64 being TSO'ed doesn't gain much. However, + // `mfence` being isolated should hopefully be a bit more efficient than + // the repeated `lock` if there is contention. + uintptr_t refcount = os_atomic_dec(&dmn->dmn_refcount, release); if (refcount == 0) { + os_atomic_thread_fence(acquire); _dispatch_muxnote_dispose(dmn); } else if (refcount == UINTPTR_MAX) { DISPATCH_INTERNAL_CRASH(0, "muxnote refcount underflow"); From 7461878a64d45b6a36777a183ce5a479b232548b Mon Sep 17 00:00:00 2001 From: Finagolfin Date: Mon, 11 Aug 2025 22:20:06 +0530 Subject: [PATCH 5/6] build: Use new 16 KB page alignment on 64-bit Android --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3211ba2f1..626368d01 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -274,6 +274,7 @@ check_symbol_exists(__printflike "bsd/sys/cdefs.h" HAVE_PRINTFLIKE) if(CMAKE_SYSTEM_NAME STREQUAL Android) set(ENABLE_DTRACE_DEFAULT OFF) + add_link_options("LINKER:-z,max-page-size=16384") endif() if(CMAKE_SYSTEM_NAME STREQUAL FreeBSD) From 03a8ad13b82c40e748113239bf7f4e3eb06ed492 Mon Sep 17 00:00:00 2001 From: Alastair Houghton Date: Mon, 29 Sep 2025 15:29:10 +0100 Subject: [PATCH 6/6] [Build] Enable frame pointers. Frame pointers should be enabled everywhere. rdar://160759746 (cherry picked from commit a8b689a278a045b8ea6bd1f2ddfd3c52369db89d) --- CMakeLists.txt | 2 ++ cmake/modules/EnableFramePointers.cmake | 13 +++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 cmake/modules/EnableFramePointers.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 626368d01..f9d08a34c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,6 +126,8 @@ include(DispatchCompilerWarnings) include(DTrace) include(SwiftSupport) +include(EnableFramePointers) + # NOTE(abdulras) this is the CMake supported way to control whether we generate # shared or static libraries. This impacts the behaviour of `add_library` in # what type of library it generates. diff --git a/cmake/modules/EnableFramePointers.cmake b/cmake/modules/EnableFramePointers.cmake new file mode 100644 index 000000000..8573d8dc4 --- /dev/null +++ b/cmake/modules/EnableFramePointers.cmake @@ -0,0 +1,13 @@ +# +# Including this file enables frame pointers, if we know how. +# + +include(CheckCompilerFlag) + +# Check if the compiler supports -fno-omit-frame-pointer +check_compiler_flag(C -fno-omit-frame-pointer SUPPORTS_NO_OMIT_FP) + +# If it does, use it +if (SUPPORTS_NO_OMIT_FP) + add_compile_options($<$:-fno-omit-frame-pointer>) +endif()