std.debug: allow fp unwind from context

It's easy to do FP unwinding from a CPU context: you just report the captured ip/pc value first, and then unwind from the captured fp value. All this really needed was a couple of new functions on the `std.debug.cpu_context` implementations so that we don't need to rely on `std.debug.Dwarf` to access the captured registers. Resolves: #25576
2025-12-06 05:44:20 +00:00 · 2025-11-08 11:12:40 +00:00 · 2025-11-08 11:12:40 +00:00 · 92bc619c49
commit 92bc619c49
parent 49e19fc94f
3 changed files with 169 additions and 41 deletions
--- a/lib/std/debug.zig
+++ b/lib/std/debug.zig
@ -617,7 +617,7 @@ pub const StackUnwindOptions = struct {
 pub noinline fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize) StackTrace {
    const empty_trace: StackTrace = .{ .index = 0, .instruction_addresses = &.{} };
    if (!std.options.allow_stack_tracing) return empty_trace;
-    var it = StackIterator.init(options.context) catch return empty_trace;
+    var it: StackIterator = .init(options.context);
    defer it.deinit();
    if (!it.stratOk(options.allow_unsafe_unwind)) return empty_trace;
    var total_frames: usize = 0;
@ -671,14 +671,7 @@ pub noinline fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Wri
            return;
        },
    };
-    var it = StackIterator.init(options.context) catch |err| switch (err) {
-        error.CannotUnwindFromContext => {
-            tty_config.setColor(writer, .dim) catch {};
-            try writer.print("Cannot print stack trace: context unwind unavailable for target\n", .{});
-            tty_config.setColor(writer, .reset) catch {};
-            return;
-        },
-    };
+    var it: StackIterator = .init(options.context);
    defer it.deinit();
    if (!it.stratOk(options.allow_unsafe_unwind)) {
        tty_config.setColor(writer, .dim) catch {};
@ -821,22 +814,32 @@ pub fn dumpStackTrace(st: *const StackTrace) void {
 }

 const StackIterator = union(enum) {
+    /// We will first report the current PC of this `CpuContextPtr`, then we will switch to a
+    /// different strategy to actually unwind.
+    ctx_first: CpuContextPtr,
    /// Unwinding using debug info (e.g. DWARF CFI).
-    di: if (SelfInfo != void and SelfInfo.can_unwind) SelfInfo.UnwindContext else noreturn,
-    /// We will first report the *current* PC of this `UnwindContext`, then we will switch to `di`.
-    di_first: if (SelfInfo != void and SelfInfo.can_unwind) SelfInfo.UnwindContext else noreturn,
+    di: if (SelfInfo != void and SelfInfo.can_unwind and fp_usability != .ideal)
+        SelfInfo.UnwindContext
+    else
+        noreturn,
    /// Naive frame-pointer-based unwinding. Very simple, but typically unreliable.
    fp: usize,

    /// It is important that this function is marked `inline` so that it can safely use
    /// `@frameAddress` and `cpu_context.Native.current` as the caller's stack frame and
    /// our own are one and the same.
-    inline fn init(opt_context_ptr: ?CpuContextPtr) error{CannotUnwindFromContext}!StackIterator {
+    ///
+    /// `opt_context_ptr` must remain valid while the `StackIterator` is used.
+    inline fn init(opt_context_ptr: ?CpuContextPtr) StackIterator {
        if (opt_context_ptr) |context_ptr| {
-            if (SelfInfo == void or !SelfInfo.can_unwind) return error.CannotUnwindFromContext;
-            // Use `di_first` here so we report the PC in the context before unwinding any further.
-            return .{ .di_first = .init(context_ptr) };
+            // Use `ctx_first` here so we report the PC in the context before unwinding any further.
+            return .{ .ctx_first = context_ptr };
        }
+
+        // Otherwise, we're going to capture the current context or frame address, so we don't need
+        // `ctx_first`, because the first PC is in `std.debug` and we need to unwind before reaching
+        // a frame we want to report.
+
        // Workaround the C backend being unable to use inline assembly on MSVC by disabling the
        // call to `current`. This effectively constrains stack trace collection and dumping to FP
        // unwinding when building with CBE for MSVC.
@ -846,8 +849,6 @@ const StackIterator = union(enum) {
            cpu_context.Native != noreturn and
            fp_usability != .ideal)
        {
-            // We don't need `di_first` here, because our PC is in `std.debug`; we're only interested
-            // in our caller's frame and above.
            return .{ .di = .init(&.current()) };
        }
        return .{
@ -866,8 +867,9 @@ const StackIterator = union(enum) {
    }
    fn deinit(si: *StackIterator) void {
        switch (si.*) {
+            .ctx_first => {},
            .fp => {},
-            .di, .di_first => |*unwind_context| unwind_context.deinit(getDebugInfoAllocator()),
+            .di => |*unwind_context| unwind_context.deinit(getDebugInfoAllocator()),
        }
    }

@ -931,7 +933,7 @@ const StackIterator = union(enum) {
    /// Whether the current unwind strategy is allowed given `allow_unsafe`.
    fn stratOk(it: *const StackIterator, allow_unsafe: bool) bool {
        return switch (it.*) {
-            .di, .di_first => true,
+            .ctx_first, .di => true,
            // If we omitted frame pointers from *this* compilation, FP unwinding would crash
            // immediately regardless of anything. But FPs could also be omitted from a different
            // linked object, so it's not guaranteed to be safe, unless the target specifically
@ -959,13 +961,16 @@ const StackIterator = union(enum) {

    fn next(it: *StackIterator) Result {
        switch (it.*) {
-            .di_first => |unwind_context| {
-                const first_pc = unwind_context.pc;
-                if (first_pc == 0) return .end;
-                it.* = .{ .di = unwind_context };
+            .ctx_first => |context_ptr| {
+                // After the first frame, start actually unwinding.
+                it.* = if (SelfInfo != void and SelfInfo.can_unwind and fp_usability != .ideal)
+                    .{ .di = .init(context_ptr) }
+                else
+                    .{ .fp = context_ptr.getFp() };
+
                // The caller expects *return* addresses, where they will subtract 1 to find the address of the call.
                // However, we have the actual current PC, which should not be adjusted. Compensate by adding 1.
-                return .{ .frame = first_pc +| 1 };
+                return .{ .frame = context_ptr.getPc() +| 1 };
            },
            .di => |*unwind_context| {
                const di = getSelfDebugInfo() catch unreachable;
--- a/lib/std/debug/Dwarf/SelfUnwinder.zig
+++ b/lib/std/debug/Dwarf/SelfUnwinder.zig
@ -47,16 +47,9 @@ pub const CacheEntry = struct {
 };

 pub fn init(cpu_context: *const std.debug.cpu_context.Native) SelfUnwinder {
-    // `@constCast` is safe because we aren't going to store to the resulting pointer.
-    const raw_pc_ptr = regNative(@constCast(cpu_context), ip_reg_num) catch |err| switch (err) {
-        error.InvalidRegister => unreachable, // `ip_reg_num` is definitely valid
-        error.UnsupportedRegister => unreachable, // the implementation needs to support ip
-        error.IncompatibleRegisterSize => unreachable, // ip is definitely `usize`-sized
-    };
-    const pc = stripInstructionPtrAuthCode(raw_pc_ptr.*);
    return .{
        .cpu_state = cpu_context.*,
-        .pc = pc,
+        .pc = stripInstructionPtrAuthCode(cpu_context.getPc()),
        .cfi_vm = .{},
        .expr_vm = .{},
    };
@ -69,13 +62,7 @@ pub fn deinit(unwinder: *SelfUnwinder, gpa: Allocator) void {
 }

 pub fn getFp(unwinder: *const SelfUnwinder) usize {
-    // `@constCast` is safe because we aren't going to store to the resulting pointer.
-    const ptr = regNative(@constCast(&unwinder.cpu_state), fp_reg_num) catch |err| switch (err) {
-        error.InvalidRegister => unreachable, // `fp_reg_num` is definitely valid
-        error.UnsupportedRegister => unreachable, // the implementation needs to support fp
-        error.IncompatibleRegisterSize => unreachable, // fp is a pointer so is `usize`-sized
-    };
-    return ptr.*;
+    return unwinder.cpu_state.getFp();
 }

 /// Compute the rule set for the address `unwinder.pc` from the information in `unwind`. The caller
@ -332,7 +319,6 @@ fn applyOffset(base: usize, offset: i64) !usize {
 }

 const ip_reg_num = Dwarf.ipRegNum(builtin.target.cpu.arch).?;
-const fp_reg_num = Dwarf.fpRegNum(builtin.target.cpu.arch);
 const sp_reg_num = Dwarf.spRegNum(builtin.target.cpu.arch);

 const std = @import("std");
--- a/lib/std/debug/cpu_context.zig
+++ b/lib/std/debug/cpu_context.zig
@ -250,6 +250,13 @@ const Aarch64 = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Aarch64) u64 {
+        return ctx.x[29];
+    }
+    pub fn getPc(ctx: *const Aarch64) u64 {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Aarch64, register_num: u16) DwarfRegisterError![]u8 {
        // DWARF for the Arm(r) 64-bit Architecture (AArch64) § 4.1 "DWARF register names"
        switch (register_num) {
@ -324,6 +331,13 @@ const Arc = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Arc) u32 {
+        return ctx.r[27];
+    }
+    pub fn getPc(ctx: *const Arc) u32 {
+        return ctx.pcl;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Arc, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...31 => return @ptrCast(&ctx.r[register_num]),
@ -356,6 +370,13 @@ const Arm = struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Arm) u32 {
+        return ctx.r[11];
+    }
+    pub fn getPc(ctx: *const Arm) u32 {
+        return ctx.r[15];
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Arm, register_num: u16) DwarfRegisterError![]u8 {
        // DWARF for the Arm(r) Architecture § 4.1 "DWARF register names"
        switch (register_num) {
@ -415,6 +436,13 @@ const Csky = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Csky) u32 {
+        return ctx.r[14];
+    }
+    pub fn getPc(ctx: *const Csky) u32 {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Csky, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...31 => return @ptrCast(&ctx.r[register_num]),
@ -476,6 +504,13 @@ const Hexagon = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Hexagon) u32 {
+        return ctx.r[30];
+    }
+    pub fn getPc(ctx: *const Hexagon) u32 {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Hexagon, register_num: u16) DwarfRegisterError![]u8 {
        // Sourced from LLVM's HexagonRegisterInfo.td, which disagrees with LLDB...
        switch (register_num) {
@ -544,6 +579,13 @@ const Kvx = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Kvx) u64 {
+        return ctx.r[14];
+    }
+    pub fn getPc(ctx: *const Kvx) u64 {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Kvx, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...63 => return @ptrCast(&ctx.r[register_num]),
@ -604,6 +646,13 @@ const Lanai = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Lanai) u32 {
+        return ctx.r[5];
+    }
+    pub fn getPc(ctx: *const Lanai) u32 {
+        return ctx.r[2];
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Lanai, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...31 => return @ptrCast(&ctx.s[register_num]),
@ -701,6 +750,13 @@ const LoongArch = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const LoongArch) Gpr {
+        return ctx.r[22];
+    }
+    pub fn getPc(ctx: *const LoongArch) Gpr {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *LoongArch, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...31 => return @ptrCast(&ctx.r[register_num]),
@ -733,6 +789,13 @@ const M68k = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const M68k) u32 {
+        return ctx.a[6];
+    }
+    pub fn getPc(ctx: *const M68k) u32 {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *M68k, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...7 => return @ptrCast(&ctx.d[register_num]),
@ -845,6 +908,15 @@ const Mips = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Mips) usize {
+        // On N32, `Gpr` is 64 bits but `usize` is 32 bits.
+        return @intCast(ctx.r[30]);
+    }
+    pub fn getPc(ctx: *const Mips) usize {
+        // On N32, `Gpr` is 64 bits but `usize` is 32 bits.
+        return @intCast(ctx.pc);
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Mips, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...31 => return @ptrCast(&ctx.r[register_num]),
@ -917,6 +989,13 @@ const Or1k = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Or1k) u32 {
+        return ctx.r[2];
+    }
+    pub fn getPc(ctx: *const Or1k) u32 {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Or1k, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...31 => return @ptrCast(&ctx.r[register_num]),
@ -1022,6 +1101,13 @@ const Powerpc = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Powerpc) Gpr {
+        return ctx.r[1];
+    }
+    pub fn getPc(ctx: *const Powerpc) Gpr {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Powerpc, register_num: u16) DwarfRegisterError![]u8 {
        // References:
        //
@ -1168,6 +1254,13 @@ const Riscv = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Riscv) Gpr {
+        return ctx.x[8];
+    }
+    pub fn getPc(ctx: *const Riscv) Gpr {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Riscv, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...31 => return @ptrCast(&ctx.x[register_num]),
@ -1208,6 +1301,13 @@ const S390x = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const S390x) u64 {
+        return ctx.r[11];
+    }
+    pub fn getPc(ctx: *const S390x) u64 {
+        return ctx.psw.addr;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *S390x, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...15 => return @ptrCast(&ctx.r[register_num]),
@ -1310,6 +1410,13 @@ const Sparc = extern struct {
            asm volatile ("ta 3" ::: .{ .memory = true }); // ST_FLUSH_WINDOWS
    }

+    pub fn getFp(ctx: *const Sparc) Gpr {
+        return ctx.i[6];
+    }
+    pub fn getPc(ctx: *const Sparc) Gpr {
+        return ctx.pc;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Sparc, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...7 => return @ptrCast(&ctx.g[register_num]),
@ -1404,6 +1511,13 @@ const Ve = extern struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const Ve) u64 {
+        return ctx.s[9];
+    }
+    pub fn getPc(ctx: *const Ve) u64 {
+        return ctx.ic;
+    }
+
    pub fn dwarfRegisterBytes(ctx: *Ve, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
            0...63 => return @ptrCast(&ctx.s[register_num]),
@ -1444,6 +1558,13 @@ const X86_16 = struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const X86_16) u16 {
+        return ctx.regs.get(.bp);
+    }
+    pub fn getPc(ctx: *const X86_16) u16 {
+        return ctx.regs.get(.ip);
+    }
+
    // NOTE: There doesn't seem to be any standard for DWARF x86-16 so we'll just reuse the ones for x86.
    pub fn dwarfRegisterBytes(ctx: *X86_16, register_num: u16) DwarfRegisterError![]u8 {
        switch (register_num) {
@ -1490,6 +1611,13 @@ const X86 = struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const X86) u32 {
+        return ctx.gprs.get(.ebp);
+    }
+    pub fn getPc(ctx: *const X86) u32 {
+        return ctx.gprs.get(.eip);
+    }
+
    pub fn dwarfRegisterBytes(ctx: *X86, register_num: u16) DwarfRegisterError![]u8 {
        // System V Application Binary Interface Intel386 Architecture Processor Supplement Version 1.1
        //   § 2.4.2 "DWARF Register Number Mapping"
@ -1558,6 +1686,15 @@ const X86_64 = struct {
        return ctx;
    }

+    pub fn getFp(ctx: *const X86_64) usize {
+        // On x32, registers are 64 bits but `usize` is 32 bits.
+        return @intCast(ctx.gprs.get(.rbp));
+    }
+    pub fn getPc(ctx: *const X86_64) usize {
+        // On x32, registers are 64 bits but `usize` is 32 bits.
+        return @intCast(ctx.gprs.get(.rip));
+    }
+
    pub fn dwarfRegisterBytes(ctx: *X86_64, register_num: u16) DwarfRegisterError![]u8 {
        // System V Application Binary Interface AMD64 Architecture Processor Supplement
        //   § 3.6.2 "DWARF Register Number Mapping"