diff --git a/lib/std/Io/Threaded.zig b/lib/std/Io/Threaded.zig index 03670a9eb7..f285a51d6c 100644 --- a/lib/std/Io/Threaded.zig +++ b/lib/std/Io/Threaded.zig @@ -201,7 +201,7 @@ const Closure = struct { const Start = *const fn (*Closure, *Threaded) void; fn requestCancel(closure: *Closure, t: *Threaded) void { - var signal_id = switch (@atomicRmw(CancelStatus, &closure.cancel_status, .Xchg, .requested, .monotonic).unpack()) { + const signal_id = switch (@atomicRmw(CancelStatus, &closure.cancel_status, .Xchg, .requested, .monotonic).unpack()) { .none, .acknowledged, .requested => return, .signal_id => |signal_id| signal_id, }; @@ -214,54 +214,32 @@ const Closure = struct { // The task will enter a blocking syscall before checking for cancellation again. // We can send a signal to interrupt the syscall, but if it arrives before - // the syscall instruction, it will be missed. Therefore, this code tries - // again until the cancellation request is acknowledged. - - // 1 << 10 ns is about 1 microsecond, approximately syscall overhead. - // 1 << 20 ns is about 1 millisecond. - // 1 << 30 ns is about 1 second. + // the syscall instruction, it will be missed. // - // On a heavily loaded Linux 6.17.5, I observed a maximum of 20 - // attempts not acknowledged before the timeout (including exponential - // backoff) was sufficient, despite the heavy load. - // - // The time wasted here sleeping is mitigated by the fact that, later - // on, the system will likely wait for the canceled task, causing it - // to indefinitely yield until the canceled task finishes, and the - // task must acknowledge the cancel before it proceeds to that point. - const max_attempts = 22; - - for (0..max_attempts) |attempt_index| { - if (std.Thread.use_pthreads) { - if (std.c.pthread_kill(signal_id, .IO) != 0) return; - } else if (native_os == .linux) { - const pid: posix.pid_t = p: { - const cached_pid = @atomicLoad(Pid, &t.pid, .monotonic); - if (cached_pid != .unknown) break :p @intFromEnum(cached_pid); - const pid = std.os.linux.getpid(); - @atomicStore(Pid, &t.pid, @enumFromInt(pid), .monotonic); - break :p pid; - }; - if (std.os.linux.tgkill(pid, @bitCast(signal_id), .IO) != 0) return; - } else { - return; - } - - var timespec: posix.timespec = .{ - .sec = 0, - .nsec = @as(isize, 1) << @intCast(attempt_index), + // Unfortunately, trying again until the cancellation request is + // acknowledged has been observed to incur a large amount of overhead, + // and usually strong cancellation guarantees are not needed, so the + // race condition is not handled here. Users who want to avoid this + // have this menu of options instead: + // * Use no libc, in which case Zig std lib can avoid the race (tracking + // issue: https://codeberg.org/ziglang/zig/issues/30049) + // * Use musl libc instead of glibc + // * Use `std.Io.Evented`. But this is not implemented yet. Tracked by + // - https://codeberg.org/ziglang/zig/issues/30050 + // - https://codeberg.org/ziglang/zig/issues/30051 + if (std.Thread.use_pthreads) { + if (std.c.pthread_kill(signal_id, .IO) != 0) return; + } else if (native_os == .linux) { + const pid: posix.pid_t = p: { + const cached_pid = @atomicLoad(Pid, &t.pid, .monotonic); + if (cached_pid != .unknown) break :p @intFromEnum(cached_pid); + const pid = std.os.linux.getpid(); + @atomicStore(Pid, &t.pid, @enumFromInt(pid), .monotonic); + break :p pid; }; - if (native_os == .linux) { - _ = std.os.linux.clock_nanosleep(posix.CLOCK.MONOTONIC, .{ .ABSTIME = false }, ×pec, ×pec); - } else { - _ = posix.system.nanosleep(×pec, ×pec); - } - - switch (@atomicRmw(CancelStatus, &closure.cancel_status, .Xchg, .requested, .monotonic).unpack()) { - .requested => continue, // Retry needed in case other thread hasn't yet entered the syscall. - .none, .acknowledged => return, - .signal_id => |new_signal_id| signal_id = new_signal_id, - } + if (std.os.linux.tgkill(pid, @bitCast(signal_id), .IO) != 0) return; + } else { + return; } } }; @@ -303,7 +281,7 @@ pub fn init( .mask = posix.sigemptyset(), .flags = 0, }; - if (have_sig_io) posix.sigaction(.IO, &act, &t.old_sig_io); + if (!is_musl and have_sig_io) posix.sigaction(.IO, &act, &t.old_sig_io); if (have_sig_pipe) posix.sigaction(.PIPE, &act, &t.old_sig_pipe); t.have_signal_handler = true; } @@ -341,7 +319,7 @@ pub fn deinit(t: *Threaded) void { if (ws2_32.WSACleanup() != 0) recoverableOsBugDetected(); } if (posix.Sigaction != void and t.have_signal_handler) { - if (have_sig_io) posix.sigaction(.IO, &t.old_sig_io, null); + if (!is_musl and have_sig_io) posix.sigaction(.IO, &t.old_sig_io, null); if (have_sig_pipe) posix.sigaction(.PIPE, &t.old_sig_pipe, null); } t.* = undefined;