zig/lib/std/debug/Dwarf/SelfUnwinder.zig
Matthew Lugg 92bc619c49 std.debug: allow fp unwind from context
It's easy to do FP unwinding from a CPU context: you just report the
captured ip/pc value first, and then unwind from the captured fp value.
All this really needed was a couple of new functions on the
`std.debug.cpu_context` implementations so that we don't need to rely on
`std.debug.Dwarf` to access the captured registers.

Resolves: #25576
2025-11-12 21:02:38 +00:00

333 lines
13 KiB
Zig

//! Implements stack unwinding based on `Dwarf.Unwind`. The caller is responsible for providing the
//! initialized `Dwarf.Unwind` from the `.debug_frame` (or equivalent) section; this type handles
//! computing and applying the CFI register rules to evolve a `std.debug.cpu_context.Native` through
//! stack frames, hence performing the virtual unwind.
//!
//! Notably, this type is a valid implementation of `std.debug.SelfInfo.UnwindContext`.
/// The state of the CPU in the current stack frame.
cpu_state: std.debug.cpu_context.Native,
/// The value of the Program Counter in this frame. This is almost the same as the value of the IP
/// register in `cpu_state`, but may be off by one because the IP is typically a *return* address.
pc: usize,
cfi_vm: Dwarf.Unwind.VirtualMachine,
expr_vm: Dwarf.expression.StackMachine(.{ .call_frame_context = true }),
pub const CacheEntry = struct {
const max_rules = 32;
pc: usize,
cie: *const Dwarf.Unwind.CommonInformationEntry,
cfa_rule: Dwarf.Unwind.VirtualMachine.CfaRule,
num_rules: u8,
rules_regs: [max_rules]u16,
rules: [max_rules]Dwarf.Unwind.VirtualMachine.RegisterRule,
pub fn find(entries: []const CacheEntry, pc: usize) ?*const CacheEntry {
assert(pc != 0);
const idx = std.hash.int(pc) % entries.len;
const entry = &entries[idx];
return if (entry.pc == pc) entry else null;
}
pub fn populate(entry: *const CacheEntry, entries: []CacheEntry) void {
const idx = std.hash.int(entry.pc) % entries.len;
entries[idx] = entry.*;
}
pub const empty: CacheEntry = .{
.pc = 0,
.cie = undefined,
.cfa_rule = undefined,
.num_rules = undefined,
.rules_regs = undefined,
.rules = undefined,
};
};
pub fn init(cpu_context: *const std.debug.cpu_context.Native) SelfUnwinder {
return .{
.cpu_state = cpu_context.*,
.pc = stripInstructionPtrAuthCode(cpu_context.getPc()),
.cfi_vm = .{},
.expr_vm = .{},
};
}
pub fn deinit(unwinder: *SelfUnwinder, gpa: Allocator) void {
unwinder.cfi_vm.deinit(gpa);
unwinder.expr_vm.deinit(gpa);
unwinder.* = undefined;
}
pub fn getFp(unwinder: *const SelfUnwinder) usize {
return unwinder.cpu_state.getFp();
}
/// Compute the rule set for the address `unwinder.pc` from the information in `unwind`. The caller
/// may store the returned rule set in a simple fixed-size cache keyed on the `pc` field to avoid
/// frequently recomputing register rules when unwinding many times.
///
/// To actually apply the computed rules, see `next`.
pub fn computeRules(
unwinder: *SelfUnwinder,
gpa: Allocator,
unwind: *const Dwarf.Unwind,
load_offset: usize,
explicit_fde_offset: ?usize,
) !CacheEntry {
assert(unwinder.pc != 0);
const pc_vaddr = unwinder.pc - load_offset;
const fde_offset = explicit_fde_offset orelse try unwind.lookupPc(
pc_vaddr,
@sizeOf(usize),
native_endian,
) orelse return error.MissingDebugInfo;
const cie, const fde = try unwind.getFde(fde_offset, native_endian);
// `lookupPc` can return false positives, so check if the FDE *actually* includes the pc
if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) {
return error.MissingDebugInfo;
}
unwinder.cfi_vm.reset();
const row = try unwinder.cfi_vm.runTo(gpa, pc_vaddr, cie, &fde, @sizeOf(usize), native_endian);
var entry: CacheEntry = .{
.pc = unwinder.pc,
.cie = cie,
.cfa_rule = row.cfa,
.num_rules = undefined,
.rules_regs = undefined,
.rules = undefined,
};
var i: usize = 0;
for (unwinder.cfi_vm.rowColumns(&row)) |col| {
if (i == CacheEntry.max_rules) return error.UnsupportedDebugInfo;
_ = unwinder.cpu_state.dwarfRegisterBytes(col.register) catch |err| switch (err) {
// Reading an unsupported register during unwinding will result in an error, so there is
// no point wasting a rule slot in the cache entry for it.
error.UnsupportedRegister => continue,
error.InvalidRegister => return error.InvalidDebugInfo,
};
entry.rules_regs[i] = col.register;
entry.rules[i] = col.rule;
i += 1;
}
entry.num_rules = @intCast(i);
return entry;
}
/// Applies the register rules given in `cache_entry` to the current state of `unwinder`. The caller
/// is responsible for ensuring that `cache_entry` contains the correct rule set for `unwinder.pc`.
///
/// `unwinder.cpu_state` and `unwinder.pc` are updated to refer to the next frame, and this frame's
/// return address is returned as a `usize`.
pub fn next(unwinder: *SelfUnwinder, gpa: Allocator, cache_entry: *const CacheEntry) std.debug.SelfInfoError!usize {
return unwinder.nextInner(gpa, cache_entry) catch |err| switch (err) {
error.OutOfMemory,
error.InvalidDebugInfo,
=> |e| return e,
error.UnsupportedRegister,
error.UnimplementedExpressionCall,
error.UnimplementedOpcode,
error.UnimplementedUserOpcode,
error.UnimplementedTypedComparison,
error.UnimplementedTypeConversion,
error.UnknownExpressionOpcode,
=> return error.UnsupportedDebugInfo,
error.ReadFailed,
error.EndOfStream,
error.Overflow,
error.IncompatibleRegisterSize,
error.InvalidRegister,
error.IncompleteExpressionContext,
error.InvalidCFAOpcode,
error.InvalidExpression,
error.InvalidFrameBase,
error.InvalidIntegralTypeSize,
error.InvalidSubExpression,
error.InvalidTypeLength,
error.TruncatedIntegralType,
error.DivisionByZero,
=> return error.InvalidDebugInfo,
};
}
fn nextInner(unwinder: *SelfUnwinder, gpa: Allocator, cache_entry: *const CacheEntry) !usize {
const format = cache_entry.cie.format;
const cfa = switch (cache_entry.cfa_rule) {
.none => return error.InvalidDebugInfo,
.reg_off => |ro| cfa: {
const ptr = try regNative(&unwinder.cpu_state, ro.register);
break :cfa try applyOffset(ptr.*, ro.offset);
},
.expression => |expr| cfa: {
// On most implemented architectures, the CFA is defined to be the previous frame's SP.
//
// On s390x, it's defined to be SP + 160 (ELF ABI s390x Supplement §1.6.3); however,
// what this actually means is that there will be a `def_cfa r15 + 160`, so nothing
// special for us to do.
const prev_cfa_val = (try regNative(&unwinder.cpu_state, sp_reg_num)).*;
unwinder.expr_vm.reset();
const value = try unwinder.expr_vm.run(expr, gpa, .{
.format = format,
.cpu_context = &unwinder.cpu_state,
}, prev_cfa_val) orelse return error.InvalidDebugInfo;
switch (value) {
.generic => |g| break :cfa g,
else => return error.InvalidDebugInfo,
}
},
};
// Create a copy of the CPU state, to which we will apply the new rules.
var new_cpu_state = unwinder.cpu_state;
// On all implemented architectures, the CFA is defined to be the previous frame's SP
(try regNative(&new_cpu_state, sp_reg_num)).* = cfa;
const return_address_register = cache_entry.cie.return_address_register;
var has_return_address = true;
const rules_len = cache_entry.num_rules;
for (cache_entry.rules_regs[0..rules_len], cache_entry.rules[0..rules_len]) |register, rule| {
const new_val: union(enum) {
same,
undefined,
val: usize,
bytes: []const u8,
} = switch (rule) {
.default => val: {
// The way things are supposed to work is that `.undefined` is the default rule
// unless an ABI says otherwise (e.g. aarch64, s390x).
//
// Unfortunately, at some point, a decision was made to have libgcc's unwinder
// assume `.same` as the default for all registers. Compilers then started depending
// on this, and the practice was carried forward to LLVM's libunwind and some of its
// backends.
break :val .same;
},
.undefined => .undefined,
.same_value => .same,
.offset => |offset| val: {
const ptr: *const usize = @ptrFromInt(try applyOffset(cfa, offset));
break :val .{ .val = ptr.* };
},
.val_offset => |offset| .{ .val = try applyOffset(cfa, offset) },
.register => |r| .{ .bytes = try unwinder.cpu_state.dwarfRegisterBytes(r) },
.expression => |expr| val: {
unwinder.expr_vm.reset();
const value = try unwinder.expr_vm.run(expr, gpa, .{
.format = format,
.cpu_context = &unwinder.cpu_state,
}, cfa) orelse return error.InvalidDebugInfo;
const ptr: *const usize = switch (value) {
.generic => |addr| @ptrFromInt(addr),
else => return error.InvalidDebugInfo,
};
break :val .{ .val = ptr.* };
},
.val_expression => |expr| val: {
unwinder.expr_vm.reset();
const value = try unwinder.expr_vm.run(expr, gpa, .{
.format = format,
.cpu_context = &unwinder.cpu_state,
}, cfa) orelse return error.InvalidDebugInfo;
switch (value) {
.generic => |val| break :val .{ .val = val },
else => return error.InvalidDebugInfo,
}
},
};
switch (new_val) {
.same => {},
.undefined => {
const dest = try new_cpu_state.dwarfRegisterBytes(@intCast(register));
@memset(dest, undefined);
// If the return address register is explicitly set to `.undefined`, it means that
// there are no more frames to unwind.
if (register == return_address_register) {
has_return_address = false;
}
},
.val => |val| {
const dest = try new_cpu_state.dwarfRegisterBytes(@intCast(register));
if (dest.len != @sizeOf(usize)) return error.InvalidDebugInfo;
const dest_ptr: *align(1) usize = @ptrCast(dest);
dest_ptr.* = val;
},
.bytes => |src| {
const dest = try new_cpu_state.dwarfRegisterBytes(@intCast(register));
if (dest.len != src.len) return error.InvalidDebugInfo;
@memcpy(dest, src);
},
}
}
const return_address = if (has_return_address)
stripInstructionPtrAuthCode((try regNative(&new_cpu_state, return_address_register)).*)
else
0;
(try regNative(&new_cpu_state, ip_reg_num)).* = return_address;
// The new CPU state is complete; flush changes.
unwinder.cpu_state = new_cpu_state;
// The caller will subtract 1 from the return address to get an address corresponding to the
// function call. However, if this is a signal frame, that's actually incorrect, because the
// "return address" we have is the instruction which triggered the signal (if the signal
// handler returned, the instruction would be re-run). Compensate for this by incrementing
// the address in that case.
const adjusted_ret_addr = if (cache_entry.cie.is_signal_frame) return_address +| 1 else return_address;
// We also want to do that same subtraction here to get the PC for the next frame's FDE.
// This is because if the callee was noreturn, then the function call might be the caller's
// last instruction, so `return_address` might actually point outside of it!
unwinder.pc = adjusted_ret_addr -| 1;
return adjusted_ret_addr;
}
pub fn regNative(ctx: *std.debug.cpu_context.Native, num: u16) error{
InvalidRegister,
UnsupportedRegister,
IncompatibleRegisterSize,
}!*align(1) usize {
const bytes = try ctx.dwarfRegisterBytes(num);
if (bytes.len != @sizeOf(usize)) return error.IncompatibleRegisterSize;
return @ptrCast(bytes);
}
/// Since register rules are applied (usually) during a panic,
/// checked addition / subtraction is used so that we can return
/// an error and fall back to FP-based unwinding.
fn applyOffset(base: usize, offset: i64) !usize {
return if (offset >= 0)
try std.math.add(usize, base, @as(usize, @intCast(offset)))
else
try std.math.sub(usize, base, @as(usize, @intCast(-offset)));
}
const ip_reg_num = Dwarf.ipRegNum(builtin.target.cpu.arch).?;
const sp_reg_num = Dwarf.spRegNum(builtin.target.cpu.arch);
const std = @import("std");
const Allocator = std.mem.Allocator;
const Dwarf = std.debug.Dwarf;
const assert = std.debug.assert;
const stripInstructionPtrAuthCode = std.debug.stripInstructionPtrAuthCode;
const builtin = @import("builtin");
const native_endian = builtin.target.cpu.arch.endian();
const SelfUnwinder = @This();