Merge pull request #23464 from rootbeer/futex-casts

Linux futex (v1 and v2) API fixes, tests and Ziggification
This commit is contained in:
Alex Rønne Petersen 2025-06-20 10:08:22 +02:00 committed by GitHub
commit 14ad8378a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 402 additions and 98 deletions

View file

@ -1539,10 +1539,10 @@ const LinuxThreadImpl = struct {
continue;
}
switch (linux.E.init(linux.futex_wait(
switch (linux.E.init(linux.futex_4arg(
&self.thread.child_tid.raw,
linux.FUTEX.WAIT,
tid,
.{ .cmd = .WAIT, .private = false },
@bitCast(tid),
null,
))) {
.SUCCESS => continue,

View file

@ -262,10 +262,10 @@ const LinuxImpl = struct {
ts.nsec = @as(@TypeOf(ts.nsec), @intCast(timeout_ns % std.time.ns_per_s));
}
const rc = linux.futex_wait(
@as(*const i32, @ptrCast(&ptr.raw)),
linux.FUTEX.PRIVATE_FLAG | linux.FUTEX.WAIT,
@as(i32, @bitCast(expect)),
const rc = linux.futex_4arg(
&ptr.raw,
.{ .cmd = .WAIT, .private = true },
expect,
if (timeout != null) &ts else null,
);
@ -284,10 +284,10 @@ const LinuxImpl = struct {
}
fn wake(ptr: *const atomic.Value(u32), max_waiters: u32) void {
const rc = linux.futex_wake(
@as(*const i32, @ptrCast(&ptr.raw)),
linux.FUTEX.PRIVATE_FLAG | linux.FUTEX.WAKE,
std.math.cast(i32, max_waiters) orelse std.math.maxInt(i32),
const rc = linux.futex_3arg(
&ptr.raw,
.{ .cmd = .WAKE, .private = true },
@min(max_waiters, std.math.maxInt(i32)),
);
switch (linux.E.init(rc)) {

View file

@ -673,23 +673,43 @@ pub fn fallocate(fd: i32, mode: i32, offset: i64, length: i64) usize {
}
}
pub fn futex_wait(uaddr: *const i32, futex_op: u32, val: i32, timeout: ?*const timespec) usize {
return syscall4(.futex, @intFromPtr(uaddr), futex_op, @as(u32, @bitCast(val)), @intFromPtr(timeout));
// The 4th parameter to the v1 futex syscall can either be an optional
// pointer to a timespec, or a uint32, depending on which "op" is being
// performed.
pub const futex_param4 = extern union {
timeout: ?*const timespec,
/// On all platforms only the bottom 32-bits of `val2` are relevant.
/// This is 64-bit to match the pointer in the union.
val2: usize,
};
/// The futex v1 syscall, see also the newer the futex2_{wait,wakeup,requeue,waitv} syscalls.
///
/// The futex_op parameter is a sub-command and flags. The sub-command
/// defines which of the subsequent paramters are relevant.
pub fn futex(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, val2timeout: futex_param4, uaddr2: ?*const anyopaque, val3: u32) usize {
return syscall6(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val, @intFromPtr(val2timeout.timeout), @intFromPtr(uaddr2), val3);
}
pub fn futex_wake(uaddr: *const i32, futex_op: u32, val: i32) usize {
return syscall3(.futex, @intFromPtr(uaddr), futex_op, @as(u32, @bitCast(val)));
/// Three-argument variation of the v1 futex call. Only suitable for a
/// futex_op that ignores the remaining arguments (e.g., FUTUX_OP.WAKE).
pub fn futex_3arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32) usize {
return syscall3(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val);
}
/// Given an array of `futex_waitv`, wait on each uaddr.
/// Four-argument variation on the v1 futex call. Only suitable for
/// futex_op that ignores the remaining arguments (e.g., FUTEX_OP.WAIT).
pub fn futex_4arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, timeout: ?*const timespec) usize {
return syscall4(.futex, @intFromPtr(uaddr), @as(u32, @bitCast(futex_op)), val, @intFromPtr(timeout));
}
/// Given an array of `futex2_waitone`, wait on each uaddr.
/// The thread wakes if a futex_wake() is performed at any uaddr.
/// The syscall returns immediately if any waiter has *uaddr != val.
/// timeout is an optional timeout value for the operation.
/// Each waiter has individual flags.
/// The `flags` argument for the syscall should be used solely for specifying
/// the timeout as realtime, if needed.
/// Flags for private futexes, sizes, etc. should be used on the
/// individual flags of each waiter.
/// The syscall returns immediately if any futex has *uaddr != val.
/// timeout is an optional, absolute timeout value for the operation.
/// The `flags` argument is for future use and currently should be `.{}`.
/// Flags for private futexes, sizes, etc. should be set on the
/// individual flags of each `futex2_waitone`.
///
/// Returns the array index of one of the woken futexes.
/// No further information is provided: any number of other futexes may also
@ -697,42 +717,43 @@ pub fn futex_wake(uaddr: *const i32, futex_op: u32, val: i32) usize {
/// the returned index may refer to any one of them.
/// (It is not necessaryily the futex with the smallest index, nor the one
/// most recently woken, nor...)
///
/// Requires at least kernel v5.16.
pub fn futex2_waitv(
/// List of futexes to wait on.
waiters: [*]futex_waitv,
/// Length of `waiters`.
futexes: [*]const futex2_waitone,
/// Length of `futexes`. Max of FUTEX2_WAITONE_MAX.
nr_futexes: u32,
/// Flag for timeout (monotonic/realtime).
flags: u32,
/// Optional absolute timeout.
timeout: ?*const timespec,
flags: FUTEX2_FLAGS_WAITV,
/// Optional absolute timeout. Always 64-bit, even on 32-bit platforms.
timeout: ?*const kernel_timespec,
/// Clock to be used for the timeout, realtime or monotonic.
clockid: clockid_t,
) usize {
return syscall5(
.futex_waitv,
@intFromPtr(waiters),
@intFromPtr(futexes),
nr_futexes,
flags,
@as(u32, @bitCast(flags)),
@intFromPtr(timeout),
@bitCast(@as(isize, @intFromEnum(clockid))),
@intFromEnum(clockid),
);
}
/// Wait on a futex.
/// Identical to the traditional `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the
/// futex2 familiy of calls.
/// Wait on a single futex.
/// Identical to the futex v1 `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the
/// futex2 family of calls.
///
/// Requires at least kernel v6.7.
pub fn futex2_wait(
/// Address of the futex to wait on.
uaddr: *const anyopaque,
/// Value of `uaddr`.
val: usize,
/// Bitmask.
/// Bitmask to match against incoming wakeup masks. Must not be zero.
mask: usize,
/// `FUTEX2` flags.
flags: u32,
/// Optional absolute timeout.
timeout: ?*const timespec,
flags: FUTEX2_FLAGS,
/// Optional absolute timeout. Always 64-bit, even on 32-bit platforms.
timeout: ?*const kernel_timespec,
/// Clock to be used for the timeout, realtime or monotonic.
clockid: clockid_t,
) usize {
@ -741,52 +762,55 @@ pub fn futex2_wait(
@intFromPtr(uaddr),
val,
mask,
flags,
@as(u32, @bitCast(flags)),
@intFromPtr(timeout),
@bitCast(@as(isize, @intFromEnum(clockid))),
@intFromEnum(clockid),
);
}
/// Wake a number of futexes.
/// Identical to the traditional `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the
/// Wake (subset of) waiters on given futex.
/// Identical to the traditional `FUTEX.FUTEX_WAKE_BITSET` op, except it is part of the
/// futex2 family of calls.
///
/// Requires at least kernel v6.7.
pub fn futex2_wake(
/// Address of the futex(es) to wake.
/// Futex to wake
uaddr: *const anyopaque,
/// Bitmask
/// Bitmask to match against waiters.
mask: usize,
/// Number of the futexes to wake.
nr: i32,
/// `FUTEX2` flags.
flags: u32,
/// Maximum number of waiters on the futex to wake.
nr_wake: i32,
flags: FUTEX2_FLAGS,
) usize {
return syscall4(
.futex_wake,
@intFromPtr(uaddr),
mask,
@bitCast(@as(isize, nr)),
flags,
@as(u32, @bitCast(nr_wake)),
@as(u32, @bitCast(flags)),
);
}
/// Requeue a waiter from one futex to another.
/// Wake and/or requeue waiter(s) from one futex to another.
/// Identical to `FUTEX.CMP_REQUEUE`, except it is part of the futex2 family of calls.
///
/// Requires at least kernel v6.7.
pub fn futex2_requeue(
/// Array describing the source and destination futex.
waiters: [*]futex_waitv,
/// Unused.
flags: u32,
/// Number of futexes to wake.
/// The source and destination futexes. Must be a 2-element array.
waiters: [*]const futex2_waitone,
/// Currently unused.
flags: FUTEX2_FLAGS_REQUEUE,
/// Maximum number of waiters to wake on the source futex.
nr_wake: i32,
/// Number of futexes to requeue.
/// Maximum number of waiters to transfer to the destination futex.
nr_requeue: i32,
) usize {
return syscall4(
.futex_requeue,
@intFromPtr(waiters),
flags,
@bitCast(@as(isize, nr_wake)),
@bitCast(@as(isize, nr_requeue)),
@as(u32, @bitCast(flags)),
@as(u32, @bitCast(nr_wake)),
@as(u32, @bitCast(nr_requeue)),
);
}
@ -3385,37 +3409,97 @@ pub const FALLOC = struct {
pub const FL_UNSHARE_RANGE = 0x40;
};
pub const FUTEX = struct {
pub const WAIT = 0;
pub const WAKE = 1;
pub const FD = 2;
pub const REQUEUE = 3;
pub const CMP_REQUEUE = 4;
pub const WAKE_OP = 5;
pub const LOCK_PI = 6;
pub const UNLOCK_PI = 7;
pub const TRYLOCK_PI = 8;
pub const WAIT_BITSET = 9;
pub const WAKE_BITSET = 10;
pub const WAIT_REQUEUE_PI = 11;
pub const CMP_REQUEUE_PI = 12;
pub const PRIVATE_FLAG = 128;
pub const CLOCK_REALTIME = 256;
/// Max numbers of elements in a `futex_waitv` array.
pub const WAITV_MAX = 128;
// Futex v1 API commands. See futex man page for each command's
// interpretation of the futex arguments.
pub const FUTEX_COMMAND = enum(u7) {
WAIT = 0,
WAKE = 1,
FD = 2,
REQUEUE = 3,
CMP_REQUEUE = 4,
WAKE_OP = 5,
LOCK_PI = 6,
UNLOCK_PI = 7,
TRYLOCK_PI = 8,
WAIT_BITSET = 9,
WAKE_BITSET = 10,
WAIT_REQUEUE_PI = 11,
CMP_REQUEUE_PI = 12,
};
pub const FUTEX2 = struct {
pub const SIZE_U8 = 0x00;
pub const SIZE_U16 = 0x01;
pub const SIZE_U32 = 0x02;
pub const SIZE_U64 = 0x03;
pub const NUMA = 0x04;
/// Futex v1 API command and flags for the `futex_op` parameter
pub const FUTEX_OP = packed struct(u32) {
cmd: FUTEX_COMMAND,
private: bool,
realtime: bool = false, // realtime clock vs. monotonic clock
_reserved: u23 = 0,
};
pub const PRIVATE = FUTEX.PRIVATE_FLAG;
/// Futex v1 FUTEX_WAKE_OP `val3` operation:
pub const FUTEX_WAKE_OP = packed struct(u32) {
cmd: FUTEX_WAKE_OP_CMD,
/// From C API `FUTEX_OP_ARG_SHIFT`: Use (1 << oparg) as operand
arg_shift: bool = false,
cmp: FUTEX_WAKE_OP_CMP,
oparg: u12,
cmdarg: u12,
};
/// Futex v1 cmd for FUTEX_WAKE_OP `val3` command.
pub const FUTEX_WAKE_OP_CMD = enum(u3) {
/// uaddr2 = oparg
SET = 0,
/// uaddr2 += oparg
ADD = 1,
/// uaddr2 |= oparg
OR = 2,
/// uaddr2 &= ~oparg
ANDN = 3,
/// uaddr2 ^= oparg
XOR = 4,
};
/// Futex v1 comparison op for FUTEX_WAKE_OP `val3` cmp
pub const FUTEX_WAKE_OP_CMP = enum(u4) {
EQ = 0,
NE = 1,
LT = 2,
LE = 3,
GT = 4,
GE = 5,
};
/// Max numbers of elements in a `futex2_waitone` array.
pub const FUTEX2_WAITONE_MAX = 128;
/// For futex v2 API, the size of the futex at the uaddr. v1 futex are
/// always implicitly U32. As of kernel v6.14, only U32 is implemented
/// for v2 futexes.
pub const FUTEX2_SIZE = enum(u2) {
U8 = 0,
U16 = 1,
U32 = 2,
U64 = 3,
};
/// As of kernel 6.14 there are no defined flags to futex2_waitv.
pub const FUTEX2_FLAGS_WAITV = packed struct(u32) {
_reserved: u32 = 0,
};
/// As of kernel 6.14 there are no defined flags to futex2_requeue.
pub const FUTEX2_FLAGS_REQUEUE = packed struct(u32) {
_reserved: u32 = 0,
};
/// Flags for futex v2 APIs (futex2_wait, futex2_wake, futex2_requeue, but
/// not the futex2_waitv syscall, but also used in the futex2_waitone struct).
pub const FUTEX2_FLAGS = packed struct(u32) {
size: FUTEX2_SIZE,
numa: bool = false,
_reserved: u4 = 0,
private: bool,
_undefined: u24 = 0,
};
pub const PROT = struct {
@ -9281,17 +9365,17 @@ pub const PTRACE = struct {
pub const GET_SYSCALL_INFO = 0x420e;
};
/// A waiter for vectorized wait.
pub const futex_waitv = extern struct {
// Expected value at uaddr
/// For futex2_waitv and futex2_requeue. Arrays of `futex2_waitone` allow
/// waiting on multiple futexes in one call.
pub const futex2_waitone = extern struct {
/// Expected value at uaddr, should match size of futex.
val: u64,
/// User address to wait on.
/// User address to wait on. Top-bits must be 0 on 32-bit.
uaddr: u64,
/// Flags for this waiter.
flags: u32,
flags: FUTEX2_FLAGS,
/// Reserved member to preserve alignment.
/// Should be 0.
__reserved: u32,
__reserved: u32 = 0,
};
pub const cache_stat_range = extern struct {

View file

@ -207,6 +207,226 @@ test "sysinfo" {
try expect(info.mem_unit <= std.heap.page_size_max);
}
comptime {
std.debug.assert(128 == @as(u32, @bitCast(linux.FUTEX_OP{ .cmd = @enumFromInt(0), .private = true, .realtime = false })));
std.debug.assert(256 == @as(u32, @bitCast(linux.FUTEX_OP{ .cmd = @enumFromInt(0), .private = false, .realtime = true })));
// Check futex_param4 union is packed correctly
const param_union = linux.futex_param4{
.val2 = 0xaabbcc,
};
std.debug.assert(@intFromPtr(param_union.timeout) == 0xaabbcc);
}
test "futex v1" {
var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
var rc: usize = 0;
// No-op wait, lock value is not expected value
rc = linux.futex(&lock.raw, .{ .cmd = .WAIT, .private = true }, 2, .{ .timeout = null }, null, 0);
try expectEqual(.AGAIN, linux.E.init(rc));
rc = linux.futex_4arg(&lock.raw, .{ .cmd = .WAIT, .private = true }, 2, null);
try expectEqual(.AGAIN, linux.E.init(rc));
// Short-fuse wait, timeout kicks in
rc = linux.futex(&lock.raw, .{ .cmd = .WAIT, .private = true }, 1, .{ .timeout = &.{ .sec = 0, .nsec = 2 } }, null, 0);
try expectEqual(.TIMEDOUT, linux.E.init(rc));
rc = linux.futex_4arg(&lock.raw, .{ .cmd = .WAIT, .private = true }, 1, &.{ .sec = 0, .nsec = 2 });
try expectEqual(.TIMEDOUT, linux.E.init(rc));
// Wakeup (no waiters)
rc = linux.futex(&lock.raw, .{ .cmd = .WAKE, .private = true }, 2, .{ .timeout = null }, null, 0);
try expectEqual(0, rc);
rc = linux.futex_3arg(&lock.raw, .{ .cmd = .WAKE, .private = true }, 2);
try expectEqual(0, rc);
// CMP_REQUEUE - val3 mismatch
rc = linux.futex(&lock.raw, .{ .cmd = .CMP_REQUEUE, .private = true }, 2, .{ .val2 = 0 }, null, 99);
try expectEqual(.AGAIN, linux.E.init(rc));
// CMP_REQUEUE - requeue (but no waiters, so ... not much)
{
const val3 = 1;
const wake_nr = 3;
const requeue_max = std.math.maxInt(u31);
var target_lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
rc = linux.futex(&lock.raw, .{ .cmd = .CMP_REQUEUE, .private = true }, wake_nr, .{ .val2 = requeue_max }, &target_lock.raw, val3);
try expectEqual(0, rc);
}
// WAKE_OP - just to see if we can construct the arguments ...
{
var lock2: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
const wake1_nr = 2;
const wake2_nr = 3;
const wake_op = linux.FUTEX_WAKE_OP{
.cmd = .ANDN,
.arg_shift = true,
.cmp = .LT,
.oparg = 4,
.cmdarg = 5,
};
rc = linux.futex(&lock.raw, .{ .cmd = .WAKE_OP, .private = true }, wake1_nr, .{ .val2 = wake2_nr }, &lock2.raw, @bitCast(wake_op));
try expectEqual(0, rc);
}
// WAIT_BITSET
{
// val1 return early
rc = linux.futex(&lock.raw, .{ .cmd = .WAIT_BITSET, .private = true }, 2, .{ .timeout = null }, null, 0xfff);
try expectEqual(.AGAIN, linux.E.init(rc));
// timeout wait
const timeout: linux.timespec = .{ .sec = 0, .nsec = 2 };
rc = linux.futex(&lock.raw, .{ .cmd = .WAIT_BITSET, .private = true }, 1, .{ .timeout = &timeout }, null, 0xfff);
try expectEqual(.TIMEDOUT, linux.E.init(rc));
}
// WAKE_BITSET
{
rc = linux.futex(&lock.raw, .{ .cmd = .WAKE_BITSET, .private = true }, 2, .{ .timeout = null }, null, 0xfff000);
try expectEqual(0, rc);
// bitmask must have at least 1 bit set:
rc = linux.futex(&lock.raw, .{ .cmd = .WAKE_BITSET, .private = true }, 2, .{ .timeout = null }, null, 0);
try expectEqual(.INVAL, linux.E.init(rc));
}
}
comptime {
std.debug.assert(2 == @as(u32, @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = false })));
std.debug.assert(128 == @as(u32, @bitCast(linux.FUTEX2_FLAGS{ .size = @enumFromInt(0), .private = true })));
}
test "futex2_waitv" {
const locks = [_]std.atomic.Value(u32){
std.atomic.Value(u32).init(1),
std.atomic.Value(u32).init(1),
std.atomic.Value(u32).init(1),
};
const futexes = [_]linux.futex2_waitone{
.{
.val = 1,
.uaddr = @intFromPtr(&locks[0].raw),
.flags = .{ .size = .U32, .private = true },
},
.{
.val = 1,
.uaddr = @intFromPtr(&locks[1].raw),
.flags = .{ .size = .U32, .private = true },
},
.{
.val = 1,
.uaddr = @intFromPtr(&locks[2].raw),
.flags = .{ .size = .U32, .private = true },
},
};
const timeout = linux.kernel_timespec{ .sec = 0, .nsec = 2 }; // absolute timeout, so this is 1970...
const rc = linux.futex2_waitv(&futexes, futexes.len, .{}, &timeout, .MONOTONIC);
switch (linux.E.init(rc)) {
.NOSYS => return error.SkipZigTest, // futex2_waitv added in kernel v5.16
else => |err| try expectEqual(.TIMEDOUT, err),
}
}
// Futex v2 API is only supported on recent kernels (v6.7), so skip tests if the syscalls
// return ENOSYS.
fn futex2_skip_if_unsupported() !void {
const lock: u32 = 0;
const rc = linux.futex2_wake(&lock, 0, 1, .{ .size = .U32, .private = true });
if (linux.E.init(rc) == .NOSYS) {
return error.SkipZigTest;
}
}
test "futex2_wait" {
var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
var rc: usize = 0;
const mask = 0x1;
try futex2_skip_if_unsupported();
// The API for 8,16,64 bit futexes is defined, but as of kernel v6.14
// (at least) they're not implemented.
if (false) {
rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U8, .private = true }, null, .MONOTONIC);
try expectEqual(.INVAL, linux.E.init(rc));
rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U16, .private = true }, null, .MONOTONIC);
try expectEqual(.INVAL, linux.E.init(rc));
rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U64, .private = true }, null, .MONOTONIC);
try expectEqual(.INVAL, linux.E.init(rc));
}
const flags = linux.FUTEX2_FLAGS{ .size = .U32, .private = true };
// no-wait, lock state mismatch
rc = linux.futex2_wait(&lock.raw, 2, mask, flags, null, .MONOTONIC);
try expectEqual(.AGAIN, linux.E.init(rc));
// hit timeout on wait
rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &.{ .sec = 0, .nsec = 2 }, .MONOTONIC);
try expectEqual(.TIMEDOUT, linux.E.init(rc));
// timeout is absolute
{
var curr: linux.timespec = undefined;
rc = linux.clock_gettime(.MONOTONIC, &curr); // gettime() uses platform timespec
try expectEqual(0, rc);
// ... but futex2_wait always uses 64-bit timespec
var timeout: linux.kernel_timespec = .{
.sec = curr.sec,
.nsec = curr.nsec + 2,
};
rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &timeout, .MONOTONIC);
try expectEqual(.TIMEDOUT, linux.E.init(rc));
}
rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &.{ .sec = 0, .nsec = 2 }, .REALTIME);
try expectEqual(.TIMEDOUT, linux.E.init(rc));
}
test "futex2_wake" {
var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
try futex2_skip_if_unsupported();
const rc = linux.futex2_wake(&lock.raw, 0xFF, 1, .{ .size = .U32, .private = true });
try expectEqual(0, rc);
}
test "futex2_requeue" {
try futex2_skip_if_unsupported();
const locks = [_]std.atomic.Value(u32){
std.atomic.Value(u32).init(1),
std.atomic.Value(u32).init(1),
};
const futexes = [_]linux.futex2_waitone{
.{
.val = 1,
.uaddr = @intFromPtr(&locks[0].raw),
.flags = .{ .size = .U32, .private = true },
},
.{
.val = 1,
.uaddr = @intFromPtr(&locks[1].raw),
.flags = .{ .size = .U32, .private = true },
},
};
const rc = linux.futex2_requeue(&futexes, .{}, 2, 2);
try expectEqual(0, rc);
}
test {
_ = linux.IoUring;
}