zig/lib/std/os/linux/x86_64.zig
mlugg a18fd41064
std: rework/remove ucontext_t
Our usage of `ucontext_t` in the standard library was kind of
problematic. We unnecessarily mimiced libc-specific structures, and our
`getcontext` implementation was overkill for our use case of stack
tracing.

This commit introduces a new namespace, `std.debug.cpu_context`, which
contains "context" types for various architectures (currently x86,
x86_64, ARM, and AARCH64) containing the general-purpose CPU registers;
the ones needed in practice for stack unwinding. Each implementation has
a function `current` which populates the structure using inline
assembly. The structure is user-overrideable, though that should only be
necessary if the standard library does not have an implementation for
the *architecture*: that is to say, none of this is OS-dependent.

Of course, in POSIX signal handlers, we get a `ucontext_t` from the
kernel. The function `std.debug.cpu_context.fromPosixSignalContext`
converts this to a `std.debug.cpu_context.Native` with a big ol' target
switch.

This functionality is not exposed from `std.c` or `std.posix`, and
neither are `ucontext_t`, `mcontext_t`, or `getcontext`. The rationale
is that these types and functions do not conform to a specific ABI, and
in fact tend to get updated over time based on CPU features and
extensions; in addition, different libcs use different structures which
are "partially compatible" with the kernel structure. Overall, it's a
mess, but all we need is the kernel context, so we can just define a
kernel-compatible structure as long as we don't claim C compatibility by
putting it in `std.c` or `std.posix`.

This change resulted in a few nice `std.debug` simplifications, but
nothing too noteworthy. However, the main benefit of this change is that
DWARF unwinding---sometimes necessary for collecting stack traces
reliably---now requires far less target-specific integration.

Also fix a bug I noticed in `PageAllocator` (I found this due to a bug
in my distro's QEMU distribution; thanks, broken QEMU patch!) and I
think a couple of minor bugs in `std.debug`.

Resolves: #23801
Resolves: #23802
2025-09-30 13:44:54 +01:00

354 lines
8.6 KiB
Zig

const builtin = @import("builtin");
const std = @import("../../std.zig");
const maxInt = std.math.maxInt;
const linux = std.os.linux;
const SYS = linux.SYS;
const iovec = std.posix.iovec;
const iovec_const = std.posix.iovec_const;
const pid_t = linux.pid_t;
const uid_t = linux.uid_t;
const gid_t = linux.gid_t;
const clock_t = linux.clock_t;
const stack_t = linux.stack_t;
const sigset_t = linux.sigset_t;
const sockaddr = linux.sockaddr;
const socklen_t = linux.socklen_t;
const timespec = linux.timespec;
pub fn syscall0(number: SYS) usize {
return asm volatile ("syscall"
: [ret] "={rax}" (-> usize),
: [number] "{rax}" (@intFromEnum(number)),
: .{ .rcx = true, .r11 = true, .memory = true });
}
pub fn syscall1(number: SYS, arg1: usize) usize {
return asm volatile ("syscall"
: [ret] "={rax}" (-> usize),
: [number] "{rax}" (@intFromEnum(number)),
[arg1] "{rdi}" (arg1),
: .{ .rcx = true, .r11 = true, .memory = true });
}
pub fn syscall2(number: SYS, arg1: usize, arg2: usize) usize {
return asm volatile ("syscall"
: [ret] "={rax}" (-> usize),
: [number] "{rax}" (@intFromEnum(number)),
[arg1] "{rdi}" (arg1),
[arg2] "{rsi}" (arg2),
: .{ .rcx = true, .r11 = true, .memory = true });
}
pub fn syscall3(number: SYS, arg1: usize, arg2: usize, arg3: usize) usize {
return asm volatile ("syscall"
: [ret] "={rax}" (-> usize),
: [number] "{rax}" (@intFromEnum(number)),
[arg1] "{rdi}" (arg1),
[arg2] "{rsi}" (arg2),
[arg3] "{rdx}" (arg3),
: .{ .rcx = true, .r11 = true, .memory = true });
}
pub fn syscall4(number: SYS, arg1: usize, arg2: usize, arg3: usize, arg4: usize) usize {
return asm volatile ("syscall"
: [ret] "={rax}" (-> usize),
: [number] "{rax}" (@intFromEnum(number)),
[arg1] "{rdi}" (arg1),
[arg2] "{rsi}" (arg2),
[arg3] "{rdx}" (arg3),
[arg4] "{r10}" (arg4),
: .{ .rcx = true, .r11 = true, .memory = true });
}
pub fn syscall5(number: SYS, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) usize {
return asm volatile ("syscall"
: [ret] "={rax}" (-> usize),
: [number] "{rax}" (@intFromEnum(number)),
[arg1] "{rdi}" (arg1),
[arg2] "{rsi}" (arg2),
[arg3] "{rdx}" (arg3),
[arg4] "{r10}" (arg4),
[arg5] "{r8}" (arg5),
: .{ .rcx = true, .r11 = true, .memory = true });
}
pub fn syscall6(
number: SYS,
arg1: usize,
arg2: usize,
arg3: usize,
arg4: usize,
arg5: usize,
arg6: usize,
) usize {
return asm volatile ("syscall"
: [ret] "={rax}" (-> usize),
: [number] "{rax}" (@intFromEnum(number)),
[arg1] "{rdi}" (arg1),
[arg2] "{rsi}" (arg2),
[arg3] "{rdx}" (arg3),
[arg4] "{r10}" (arg4),
[arg5] "{r8}" (arg5),
[arg6] "{r9}" (arg6),
: .{ .rcx = true, .r11 = true, .memory = true });
}
pub fn clone() callconv(.naked) usize {
asm volatile (
\\ movl $56,%%eax // SYS_clone
\\ movq %%rdi,%%r11
\\ movq %%rdx,%%rdi
\\ movq %%r8,%%rdx
\\ movq %%r9,%%r8
\\ movq 8(%%rsp),%%r10
\\ movq %%r11,%%r9
\\ andq $-16,%%rsi
\\ subq $8,%%rsi
\\ movq %%rcx,(%%rsi)
\\ syscall
\\ testq %%rax,%%rax
\\ jz 1f
\\ retq
\\
\\1:
);
if (builtin.unwind_tables != .none or !builtin.strip_debug_info) asm volatile (
\\ .cfi_undefined %%rip
);
asm volatile (
\\ xorl %%ebp,%%ebp
\\
\\ popq %%rdi
\\ callq *%%r9
\\ movl %%eax,%%edi
\\ movl $60,%%eax // SYS_exit
\\ syscall
\\
);
}
pub const restore = restore_rt;
pub fn restore_rt() callconv(.naked) noreturn {
switch (@import("builtin").zig_backend) {
.stage2_c => asm volatile (
\\ movl %[number], %%eax
\\ syscall
:
: [number] "i" (@intFromEnum(SYS.rt_sigreturn)),
),
else => asm volatile (
\\ syscall
:
: [number] "{rax}" (@intFromEnum(SYS.rt_sigreturn)),
),
}
}
pub const mode_t = usize;
pub const time_t = isize;
pub const nlink_t = usize;
pub const blksize_t = isize;
pub const blkcnt_t = isize;
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
pub const SETFD = 2;
pub const GETFL = 3;
pub const SETFL = 4;
pub const GETLK = 5;
pub const SETLK = 6;
pub const SETLKW = 7;
pub const SETOWN = 8;
pub const GETOWN = 9;
pub const SETSIG = 10;
pub const GETSIG = 11;
pub const SETOWN_EX = 15;
pub const GETOWN_EX = 16;
pub const GETOWNER_UIDS = 17;
pub const RDLCK = 0;
pub const WRLCK = 1;
pub const UNLCK = 2;
};
pub const VDSO = struct {
pub const CGT_SYM = "__vdso_clock_gettime";
pub const CGT_VER = "LINUX_2.6";
pub const GETCPU_SYM = "__vdso_getcpu";
pub const GETCPU_VER = "LINUX_2.6";
};
pub const ARCH = struct {
pub const SET_GS = 0x1001;
pub const SET_FS = 0x1002;
pub const GET_FS = 0x1003;
pub const GET_GS = 0x1004;
};
pub const REG = struct {
pub const R8 = 0;
pub const R9 = 1;
pub const R10 = 2;
pub const R11 = 3;
pub const R12 = 4;
pub const R13 = 5;
pub const R14 = 6;
pub const R15 = 7;
pub const RDI = 8;
pub const RSI = 9;
pub const RBP = 10;
pub const RBX = 11;
pub const RDX = 12;
pub const RAX = 13;
pub const RCX = 14;
pub const RSP = 15;
pub const RIP = 16;
pub const EFL = 17;
pub const CSGSFS = 18;
pub const ERR = 19;
pub const TRAPNO = 20;
pub const OLDMASK = 21;
pub const CR2 = 22;
};
pub const Flock = extern struct {
type: i16,
whence: i16,
start: off_t,
len: off_t,
pid: pid_t,
};
pub const off_t = i64;
pub const ino_t = u64;
pub const dev_t = u64;
// The `stat` definition used by the Linux kernel.
pub const Stat = extern struct {
dev: dev_t,
ino: ino_t,
nlink: usize,
mode: u32,
uid: uid_t,
gid: gid_t,
__pad0: u32,
rdev: dev_t,
size: off_t,
blksize: isize,
blocks: i64,
atim: timespec,
mtim: timespec,
ctim: timespec,
__unused: [3]isize,
pub fn atime(self: @This()) timespec {
return self.atim;
}
pub fn mtime(self: @This()) timespec {
return self.mtim;
}
pub fn ctime(self: @This()) timespec {
return self.ctim;
}
};
pub const timeval = extern struct {
sec: isize,
usec: isize,
};
pub const timezone = extern struct {
minuteswest: i32,
dsttime: i32,
};
pub const Elf_Symndx = u32;
pub const greg_t = usize;
pub const gregset_t = [23]greg_t;
pub const fpstate = extern struct {
cwd: u16,
swd: u16,
ftw: u16,
fop: u16,
rip: usize,
rdp: usize,
mxcsr: u32,
mxcr_mask: u32,
st: [8]extern struct {
significand: [4]u16,
exponent: u16,
padding: [3]u16 = undefined,
},
xmm: [16]extern struct {
element: [4]u32,
},
padding: [24]u32 = undefined,
};
pub const fpregset_t = *fpstate;
pub const sigcontext = extern struct {
r8: usize,
r9: usize,
r10: usize,
r11: usize,
r12: usize,
r13: usize,
r14: usize,
r15: usize,
rdi: usize,
rsi: usize,
rbp: usize,
rbx: usize,
rdx: usize,
rax: usize,
rcx: usize,
rsp: usize,
rip: usize,
eflags: usize,
cs: u16,
gs: u16,
fs: u16,
pad0: u16 = undefined,
err: usize,
trapno: usize,
oldmask: usize,
cr2: usize,
fpstate: *fpstate,
reserved1: [8]usize = undefined,
};
pub const mcontext_t = extern struct {
gregs: gregset_t,
fpregs: fpregset_t,
reserved1: [8]usize = undefined,
};
/// ucontext_t is part of the state pushed on the stack by the kernel for
/// a signal handler. And also a subset of the state returned from the
/// makecontext/getcontext/swapcontext POSIX APIs.
///
/// Currently this structure matches the glibc/musl layout. It contains a
/// 1024-bit signal mask, and `fpregs_mem`. This structure should be
/// split into one for the kernel ABI and c.zig should define a glibc/musl
/// compatible structure.
pub const ucontext_t = extern struct {
flags: usize,
link: ?*ucontext_t,
stack: stack_t,
mcontext: mcontext_t,
sigmask: [1024 / @bitSizeOf(c_ulong)]c_ulong, // Currently a glibc-compatible (1024-bit) sigmask.
fpregs_mem: [64]usize, // Not part of kernel ABI, only part of glibc ucontext_t
};