From 0caca625ebad92495a758e3121c91ba1f32774dd Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 19 Nov 2025 12:01:49 +0000 Subject: [PATCH 1/8] std.debug: split up Mach-O debug info handling Like ELF, we now have `std.debug.MachOFile` for the host-independent parts, and `std.debug.SelfInfo.MachO` for logic requiring the file to correspond to the running program. --- lib/std/Build/Step/CheckObject.zig | 29 +- lib/std/debug.zig | 1 + lib/std/debug/MachOFile.zig | 501 +++++++++++++++++++++++++++++ lib/std/debug/SelfInfo/MachO.zig | 444 +++---------------------- lib/std/macho.zig | 68 ++-- src/link/MachO.zig | 4 +- src/link/MachO/Dylib.zig | 7 +- src/link/MachO/Object.zig | 14 +- 8 files changed, 606 insertions(+), 462 deletions(-) create mode 100644 lib/std/debug/MachOFile.zig diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index d9935d4f3d..ab10d368b2 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -729,10 +729,10 @@ const MachODumper = struct { imports: std.ArrayListUnmanaged([]const u8) = .empty, fn parse(ctx: *ObjectContext) !void { - var it = ctx.getLoadCommandIterator(); + var it = try ctx.getLoadCommandIterator(); var i: usize = 0; - while (it.next()) |cmd| { - switch (cmd.cmd()) { + while (try it.next()) |cmd| { + switch (cmd.hdr.cmd) { .SEGMENT_64 => { const seg = cmd.cast(macho.segment_command_64).?; try ctx.segments.append(ctx.gpa, seg); @@ -771,14 +771,13 @@ const MachODumper = struct { return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + off)), 0); } - fn getLoadCommandIterator(ctx: ObjectContext) macho.LoadCommandIterator { - const data = ctx.data[@sizeOf(macho.mach_header_64)..][0..ctx.header.sizeofcmds]; - return .{ .ncmds = ctx.header.ncmds, .buffer = data }; + fn getLoadCommandIterator(ctx: ObjectContext) !macho.LoadCommandIterator { + return .init(&ctx.header, ctx.data[@sizeOf(macho.mach_header_64)..]); } - fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) ?macho.LoadCommandIterator.LoadCommand { - var it = ctx.getLoadCommandIterator(); - while (it.next()) |lc| if (lc.cmd() == cmd) { + fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) !?macho.LoadCommandIterator.LoadCommand { + var it = try ctx.getLoadCommandIterator(); + while (try it.next()) |lc| if (lc.hdr.cmd == cmd) { return lc; }; return null; @@ -872,9 +871,9 @@ const MachODumper = struct { \\LC {d} \\cmd {s} \\cmdsize {d} - , .{ index, @tagName(lc.cmd()), lc.cmdsize() }); + , .{ index, @tagName(lc.hdr.cmd), lc.hdr.cmdsize }); - switch (lc.cmd()) { + switch (lc.hdr.cmd) { .SEGMENT_64 => { const seg = lc.cast(macho.segment_command_64).?; try writer.writeByte('\n'); @@ -1592,9 +1591,9 @@ const MachODumper = struct { .headers => { try ObjectContext.dumpHeader(ctx.header, writer); - var it = ctx.getLoadCommandIterator(); + var it = try ctx.getLoadCommandIterator(); var i: usize = 0; - while (it.next()) |cmd| { + while (try it.next()) |cmd| { try ObjectContext.dumpLoadCommand(cmd, i, writer); try writer.writeByte('\n'); @@ -1615,7 +1614,7 @@ const MachODumper = struct { .dyld_weak_bind, .dyld_lazy_bind, => { - const cmd = ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse + const cmd = try ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse return step.fail("no dyld info found", .{}); const lc = cmd.cast(macho.dyld_info_command).?; @@ -1649,7 +1648,7 @@ const MachODumper = struct { }, .exports => blk: { - if (ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| { + if (try ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| { const lc = cmd.cast(macho.dyld_info_command).?; if (lc.export_size > 0) { const data = ctx.data[lc.export_off..][0..lc.export_size]; diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 3f1982070c..182ea94766 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -21,6 +21,7 @@ const root = @import("root"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const ElfFile = @import("debug/ElfFile.zig"); +pub const MachOFile = @import("debug/MachOFile.zig"); pub const Info = @import("debug/Info.zig"); pub const Coverage = @import("debug/Coverage.zig"); pub const cpu_context = @import("debug/cpu_context.zig"); diff --git a/lib/std/debug/MachOFile.zig b/lib/std/debug/MachOFile.zig new file mode 100644 index 0000000000..b3b5789fe5 --- /dev/null +++ b/lib/std/debug/MachOFile.zig @@ -0,0 +1,501 @@ +mapped_memory: []align(std.heap.page_size_min) const u8, +symbols: []const Symbol, +strings: []const u8, +text_vmaddr: u64, + +/// Key is index into `strings` of the file path. +ofiles: std.AutoArrayHashMapUnmanaged(u32, Error!OFile), + +pub const Error = error{ + InvalidMachO, + InvalidDwarf, + MissingDebugInfo, + UnsupportedDebugInfo, + ReadFailed, + OutOfMemory, +}; + +pub fn deinit(mf: *MachOFile, gpa: Allocator) void { + for (mf.ofiles.values()) |*maybe_of| { + const of = &(maybe_of.* catch continue); + posix.munmap(of.mapped_memory); + of.dwarf.deinit(gpa); + of.symbols_by_name.deinit(gpa); + } + mf.ofiles.deinit(gpa); + gpa.free(mf.symbols); + posix.munmap(mf.mapped_memory); +} + +pub fn load(gpa: Allocator, path: []const u8, arch: std.Target.Cpu.Arch) Error!MachOFile { + switch (arch) { + .x86_64, .aarch64 => {}, + else => unreachable, + } + + const all_mapped_memory = try mapDebugInfoFile(path); + errdefer posix.munmap(all_mapped_memory); + + // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal + // binary": a simple file format which contains Mach-O binaries for multiple targets. For + // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images + // for both ARM64 macOS and x86_64 macOS. + if (all_mapped_memory.len < 4) return error.InvalidMachO; + const magic = std.mem.readInt(u32, all_mapped_memory.ptr[0..4], .little); + + // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. + const mapped_macho = switch (magic) { + macho.MH_MAGIC_64 => all_mapped_memory, + + macho.FAT_CIGAM => mapped_macho: { + // This is the universal binary format (aka a "fat binary"). + var fat_r: Io.Reader = .fixed(all_mapped_memory); + const hdr = fat_r.takeStruct(macho.fat_header, .big) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + const want_cpu_type = switch (arch) { + .x86_64 => macho.CPU_TYPE_X86_64, + .aarch64 => macho.CPU_TYPE_ARM64, + else => unreachable, + }; + for (0..hdr.nfat_arch) |_| { + const fat_arch = fat_r.takeStruct(macho.fat_arch, .big) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + if (fat_arch.cputype != want_cpu_type) continue; + if (fat_arch.offset + fat_arch.size > all_mapped_memory.len) return error.InvalidMachO; + break :mapped_macho all_mapped_memory[fat_arch.offset..][0..fat_arch.size]; + } + // `arch` was not present in the fat binary. + return error.MissingDebugInfo; + }, + + // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It + // will be fairly easy to add support here if necessary; it's very similar to above. + macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, + + else => return error.InvalidMachO, + }; + + var r: Io.Reader = .fixed(mapped_macho); + const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidMachO; + + const symtab: macho.symtab_command, const text_vmaddr: u64 = lcs: { + var it: macho.LoadCommandIterator = try .init(&hdr, mapped_macho[@sizeOf(macho.mach_header_64)..]); + var symtab: ?macho.symtab_command = null; + var text_vmaddr: ?u64 = null; + while (try it.next()) |cmd| switch (cmd.hdr.cmd) { + .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidMachO, + .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { + if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; + text_vmaddr = seg_cmd.vmaddr; + }, + else => {}, + }; + break :lcs .{ + symtab orelse return error.MissingDebugInfo, + text_vmaddr orelse return error.MissingDebugInfo, + }; + }; + + const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; + + var symbols: std.ArrayList(Symbol) = try .initCapacity(gpa, symtab.nsyms); + defer symbols.deinit(gpa); + + // This map is temporary; it is used only to detect duplicates here. This is + // necessary because we prefer to use STAB ("symbolic debugging table") symbols, + // but they might not be present, so we track normal symbols too. + // Indices match 1-1 with those of `symbols`. + var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; + defer symbol_names.deinit(gpa); + try symbol_names.ensureUnusedCapacity(gpa, symtab.nsyms); + + var ofile: u32 = undefined; + var last_sym: Symbol = undefined; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + var sym_r: Io.Reader = .fixed(mapped_macho[symtab.symoff..]); + for (0..symtab.nsyms) |_| { + const sym = sym_r.takeStruct(macho.nlist_64, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + if (sym.n_type.bits.is_stab == 0) { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf, .pbud, .indr, .abs, _ => continue, + .sect => { + const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(.{ + .strx = sym.n_strx, + .addr = sym.n_value, + .ofile = Symbol.unknown_ofile, + }); + } + }, + } + continue; + } + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidMachO, + }, + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .ofile = ofile, + }; + }, + else => return error.InvalidMachO, + }, + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + }, + else => return error.InvalidMachO, + }, + .ensym => switch (state) { + .fun_size => { + state = .ensym; + if (last_sym.strx != 0) { + const name = std.mem.sliceTo(strings[last_sym.strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(last_sym); + } else { + symbols.items[gop.index] = last_sym; + } + } + }, + else => return error.InvalidMachO, + }, + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidMachO, + }, + else => {}, + } + } + + switch (state) { + .init => { + // Missing STAB symtab entries is still okay, unless there were also no normal symbols. + if (symbols.items.len == 0) return error.MissingDebugInfo; + }, + .oso_close => {}, + else => return error.InvalidMachO, // corrupted STAB entries in symtab + } + + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(Symbol, symbols_slice, {}, Symbol.addressLessThan); + + return .{ + .mapped_memory = all_mapped_memory, + .symbols = symbols_slice, + .strings = strings, + .ofiles = .empty, + .text_vmaddr = text_vmaddr, + }; +} +pub fn getDwarfForAddress(mf: *MachOFile, gpa: Allocator, vaddr: u64) !struct { *Dwarf, u64 } { + const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo; + + if (symbol.ofile == Symbol.unknown_ofile) return error.MissingDebugInfo; + + // offset of `address` from start of `symbol` + const address_symbol_offset = vaddr - symbol.addr; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(mf.strings[symbol.strx..], 0); + + const gop = try mf.ofiles.getOrPut(gpa, symbol.ofile); + if (!gop.found_existing) { + const name = mem.sliceTo(mf.strings[symbol.ofile..], 0); + gop.value_ptr.* = loadOFile(gpa, name); + } + const of = &(gop.value_ptr.* catch |err| return err); + + const symbol_index = of.symbols_by_name.getKeyAdapted( + @as([]const u8, stab_symbol), + @as(OFile.SymbolAdapter, .{ .strtab = of.strtab, .symtab_raw = of.symtab_raw }), + ) orelse return error.MissingDebugInfo; + + const symbol_ofile_vaddr = vaddr: { + var sym = of.symtab_raw[symbol_index]; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym); + break :vaddr sym.n_value; + }; + + return .{ &of.dwarf, symbol_ofile_vaddr + address_symbol_offset }; +} +pub fn lookupSymbolName(mf: *MachOFile, vaddr: u64) error{MissingDebugInfo}![]const u8 { + const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo; + return mem.sliceTo(mf.strings[symbol.strx..], 0); +} + +const OFile = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + dwarf: Dwarf, + strtab: []const u8, + symtab_raw: []align(1) const macho.nlist_64, + /// All named symbols in `symtab_raw`. Stored `u32` key is the index into `symtab_raw`. Accessed + /// through `SymbolAdapter`, so that the symbol name is used as the logical key. + symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), + + const SymbolAdapter = struct { + strtab: []const u8, + symtab_raw: []align(1) const macho.nlist_64, + pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { + _ = ctx; + return @truncate(std.hash.Wyhash.hash(0, sym_name)); + } + pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { + _ = b_index; + var b_sym = ctx.symtab_raw[b_sym_index]; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &b_sym); + const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); + return mem.eql(u8, a_sym_name, b_sym_name); + } + }; +}; + +const Symbol = struct { + strx: u32, + addr: u64, + /// Value may be `unknown_ofile`. + ofile: u32, + const unknown_ofile = std.math.maxInt(u32); + fn addressLessThan(context: void, lhs: Symbol, rhs: Symbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } + /// Assumes that `symbols` is sorted in order of ascending `addr`. + fn find(symbols: []const Symbol, address: usize) ?*const Symbol { + if (symbols.len == 0) return null; // no potential match + if (address < symbols[0].addr) return null; // address is before the lowest-address symbol + var left: usize = 0; + var len: usize = symbols.len; + while (len > 1) { + const mid = left + len / 2; + if (address < symbols[mid].addr) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + return &symbols[left]; + } + + test find { + const symbols: []const Symbol = &.{ + .{ .addr = 100, .strx = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, find(symbols, 0)); + try testing.expectEqual(null, find(symbols, 99)); + try testing.expectEqual(&symbols[0], find(symbols, 100).?); + try testing.expectEqual(&symbols[0], find(symbols, 150).?); + try testing.expectEqual(&symbols[0], find(symbols, 199).?); + + try testing.expectEqual(&symbols[1], find(symbols, 200).?); + try testing.expectEqual(&symbols[1], find(symbols, 250).?); + try testing.expectEqual(&symbols[1], find(symbols, 299).?); + + try testing.expectEqual(&symbols[2], find(symbols, 300).?); + try testing.expectEqual(&symbols[2], find(symbols, 301).?); + try testing.expectEqual(&symbols[2], find(symbols, 5000).?); + } +}; +test { + _ = Symbol; +} + +fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { + const mapped_mem = try mapDebugInfoFile(o_file_path); + errdefer posix.munmap(mapped_mem); + + var r: Io.Reader = .fixed(mapped_mem); + const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidMachO; + + const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { + var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtab_cmd: ?macho.symtab_command = null; + var it: macho.LoadCommandIterator = try .init(&hdr, mapped_mem[@sizeOf(macho.mach_header_64)..]); + while (try it.next()) |lc| switch (lc.hdr.cmd) { + .SEGMENT_64 => seg_cmd = lc, + .SYMTAB => symtab_cmd = lc.cast(macho.symtab_command) orelse return error.InvalidMachO, + else => {}, + }; + break :cmds .{ + seg_cmd orelse return error.MissingDebugInfo, + symtab_cmd orelse return error.MissingDebugInfo, + }; + }; + + if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidMachO; + if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidMachO; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + + const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); + if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidMachO; + const symtab_raw: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); + + // TODO handle tentative (common) symbols + var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; + defer symbols_by_name.deinit(gpa); + try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab_raw.len)); + for (symtab_raw, 0..) |sym_raw, sym_index| { + var sym = sym_raw; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym); + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf => continue, // includes tentative symbols + .abs => continue, + else => {}, + } + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( + @as([]const u8, sym_name), + @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab_raw = symtab_raw }), + ); + if (gop.found_existing) return error.InvalidMachO; + gop.key_ptr.* = @intCast(sym_index); + } + + var sections: Dwarf.SectionArray = @splat(null); + for (seg_cmd.getSections()) |sect_raw| { + var sect = sect_raw; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.section_64, §); + + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; + } else continue; + + if (mapped_mem.len < sect.offset + sect.size) return error.InvalidMachO; + const section_bytes = mapped_mem[sect.offset..][0..sect.size]; + sections[section_index] = .{ + .data = section_bytes, + .owned = false, + }; + } + + if (sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null) + { + return error.MissingDebugInfo; + } + + var dwarf: Dwarf = .{ .sections = sections }; + errdefer dwarf.deinit(gpa); + dwarf.open(gpa, .little) catch |err| switch (err) { + error.InvalidDebugInfo, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDwarf, + + error.MissingDebugInfo, + error.ReadFailed, + error.OutOfMemory, + => |e| return e, + }; + + return .{ + .mapped_memory = mapped_mem, + .dwarf = dwarf, + .strtab = strtab, + .symtab_raw = symtab_raw, + .symbols_by_name = symbols_by_name.move(), + }; +} + +/// Uses `mmap` to map the file at `path` into memory. +fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { + const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return error.ReadFailed, + }; + defer file.close(); + + const file_len = std.math.cast( + usize, + file.getEndPos() catch return error.ReadFailed, + ) orelse return error.ReadFailed; + + return posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ) catch return error.ReadFailed; +} + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Io = std.Io; +const assert = std.debug.assert; +const posix = std.posix; +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; + +const builtin = @import("builtin"); + +const MachOFile = @This(); diff --git a/lib/std/debug/SelfInfo/MachO.zig b/lib/std/debug/SelfInfo/MachO.zig index f7eb4465c5..83adb6dcd4 100644 --- a/lib/std/debug/SelfInfo/MachO.zig +++ b/lib/std/debug/SelfInfo/MachO.zig @@ -1,12 +1,10 @@ mutex: std.Thread.Mutex, /// Accessed through `Module.Adapter`. modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false), -ofiles: std.StringArrayHashMapUnmanaged(?OFile), pub const init: SelfInfo = .{ .mutex = .{}, .modules = .empty, - .ofiles = .empty, }; pub fn deinit(si: *SelfInfo, gpa: Allocator) void { for (si.modules.keys()) |*module| { @@ -14,20 +12,12 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void { const u = &(module.unwind orelse break :unwind catch break :unwind); if (u.dwarf) |*dwarf| dwarf.deinit(gpa); } - loaded: { - const l = &(module.loaded_macho orelse break :loaded catch break :loaded); - gpa.free(l.symbols); - posix.munmap(l.mapped_memory); + file: { + const f = &(module.file orelse break :file catch break :file); + f.deinit(gpa); } } - for (si.ofiles.values()) |*opt_ofile| { - const ofile = &(opt_ofile.* orelse continue); - ofile.dwarf.deinit(gpa); - ofile.symbols_by_name.deinit(gpa); - posix.munmap(ofile.mapped_memory); - } si.modules.deinit(gpa); - si.ofiles.deinit(gpa); } pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol { @@ -35,67 +25,55 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st const module = try si.findModule(gpa, address); defer si.mutex.unlock(); - const loaded_macho = try module.getLoadedMachO(gpa); + const file = try module.getFile(gpa); - const vaddr = address - loaded_macho.vaddr_offset; - const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; + // This is not necessarily the same as the vmaddr_slide that dyld would report. This is + // because the segments in the file on disk might differ from the ones in memory. Normally + // we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: + // it exists on disk (necessarily, because the kernel needs to load it!), but is also in + // the dyld cache (dyld actually restart itself from cache after loading it), and the two + // versions have (very) different segment base addresses. It's sort of like a large slide + // has been applied to all addresses in memory. For an optimal experience, we consider the + // on-disk vmaddr instead of the in-memory one. + const vaddr_offset = module.text_base - file.text_vmaddr; - // offset of `address` from start of `symbol` - const address_symbol_offset = vaddr - symbol.addr; + const vaddr = address - vaddr_offset; - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); - - // If any information is missing, we can at least return this from now on. - const sym_only_result: std.debug.Symbol = .{ - .name = stab_symbol, - .compile_unit_name = null, - .source_location = null, + const ofile_dwarf, const ofile_vaddr = file.getDwarfForAddress(gpa, vaddr) catch { + // Return at least the symbol name if available. + return .{ + .name = try file.lookupSymbolName(vaddr), + .compile_unit_name = null, + .source_location = null, + }; }; - if (symbol.ofile == MachoSymbol.unknown_ofile) { - // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. - return sym_only_result; - } - - const o_file: *OFile = of: { - const path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); - const gop = try si.ofiles.getOrPut(gpa, path); - if (!gop.found_existing) { - gop.value_ptr.* = loadOFile(gpa, path) catch null; - } - if (gop.value_ptr.*) |*o_file| { - break :of o_file; - } else { - return sym_only_result; - } + const compile_unit = ofile_dwarf.findCompileUnit(native_endian, ofile_vaddr) catch { + // Return at least the symbol name if available. + return .{ + .name = try file.lookupSymbolName(vaddr), + .compile_unit_name = null, + .source_location = null, + }; }; - const symbol_index = o_file.symbols_by_name.getKeyAdapted( - @as([]const u8, stab_symbol), - @as(OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }), - ) orelse return sym_only_result; - const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; - - const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; - return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, + .name = ofile_dwarf.getSymbolName(ofile_vaddr) orelse + try file.lookupSymbolName(vaddr), .compile_unit_name = compile_unit.die.getAttrString( - &o_file.dwarf, + ofile_dwarf, native_endian, std.dwarf.AT.name, - o_file.dwarf.section(.debug_str), + ofile_dwarf.section(.debug_str), compile_unit, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, }, - .source_location = o_file.dwarf.getLineNumberInfo( + .source_location = ofile_dwarf.getLineNumberInfo( gpa, native_endian, compile_unit, - symbol_ofile_vaddr + address_symbol_offset, + ofile_vaddr, ) catch null, }; } @@ -447,7 +425,7 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module { .text_base = @intFromPtr(info.fbase), .name = std.mem.span(info.fname), .unwind = null, - .loaded_macho = null, + .file = null, }; } return gop.key_ptr; @@ -457,7 +435,7 @@ const Module = struct { text_base: usize, name: []const u8, unwind: ?(Error!Unwind), - loaded_macho: ?(Error!LoadedMachO), + file: ?(Error!MachOFile), const Adapter = struct { pub fn hash(_: Adapter, text_base: usize) u32 { @@ -488,34 +466,17 @@ const Module = struct { dwarf: ?Dwarf.Unwind, }; - const LoadedMachO = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: []const u8, - /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is - /// because the segments in the file on disk might differ from the ones in memory. Normally - /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: - /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in - /// the dyld cache (dyld actually restart itself from cache after loading it), and the two - /// versions have (very) different segment base addresses. It's sort of like a large slide - /// has been applied to all addresses in memory. For an optimal experience, we consider the - /// on-disk vmaddr instead of the in-memory one. - vaddr_offset: usize, - }; - fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind { if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa); return if (module.unwind.?) |*unwind| unwind else |err| err; } fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind { - const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base); - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const sections, const text_vmaddr = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; + const raw_macho: [*]u8 = @ptrCast(header); + var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable; + const sections, const text_vmaddr = while (it.next() catch unreachable) |load_cmd| { + if (load_cmd.hdr.cmd != .SEGMENT_64) continue; const segment_cmd = load_cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; break .{ load_cmd.getSections(), segment_cmd.vmaddr }; @@ -568,235 +529,13 @@ const Module = struct { }; } - fn getLoadedMachO(module: *Module, gpa: Allocator) Error!*LoadedMachO { - if (module.loaded_macho == null) module.loaded_macho = loadMachO(module, gpa) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| e, - else => error.ReadFailed, + fn getFile(module: *Module, gpa: Allocator) Error!*MachOFile { + if (module.file == null) module.file = MachOFile.load(gpa, module.name, builtin.cpu.arch) catch |err| switch (err) { + error.InvalidMachO, error.InvalidDwarf => error.InvalidDebugInfo, + error.MissingDebugInfo, error.OutOfMemory, error.UnsupportedDebugInfo, error.ReadFailed => |e| e, }; - return if (module.loaded_macho.?) |*lm| lm else |err| err; + return if (module.file.?) |*f| f else |err| err; } - fn loadMachO(module: *const Module, gpa: Allocator) Error!LoadedMachO { - const all_mapped_memory = try mapDebugInfoFile(module.name); - errdefer posix.munmap(all_mapped_memory); - - // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal - // binary": a simple file format which contains Mach-O binaries for multiple targets. For - // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images - // for both ARM64 macOS and x86_64 macOS. - if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; - const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; - // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. - const mapped_macho = switch (magic) { - macho.MH_MAGIC_64 => all_mapped_memory, - - macho.FAT_CIGAM => mapped_macho: { - // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing - // is big-endian, so we'll be swapping some bytes. - if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; - const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); - const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); - const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; - const native_cpu_type = switch (builtin.cpu.arch) { - .x86_64 => macho.CPU_TYPE_X86_64, - .aarch64 => macho.CPU_TYPE_ARM64, - else => comptime unreachable, - }; - for (archs) |*arch| { - if (@byteSwap(arch.cputype) != native_cpu_type) continue; - const offset = @byteSwap(arch.offset); - const size = @byteSwap(arch.size); - break :mapped_macho all_mapped_memory[offset..][0..size]; - } - // Our native architecture was not present in the fat binary. - return error.MissingDebugInfo; - }, - - // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It - // will be fairly easy to add support here if necessary; it's very similar to above. - macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, - - else => return error.InvalidDebugInfo, - }; - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - var symtab: ?macho.symtab_command = null; - var text_vmaddr: ?u64 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { - if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; - text_vmaddr = seg_cmd.vmaddr; - }, - else => {}, - }; - break :lc_iter .{ - symtab orelse return error.MissingDebugInfo, - text_vmaddr orelse return error.MissingDebugInfo, - }; - }; - - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); - const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; - - var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); - defer symbols.deinit(gpa); - - // This map is temporary; it is used only to detect duplicates here. This is - // necessary because we prefer to use STAB ("symbolic debugging table") symbols, - // but they might not be present, so we track normal symbols too. - // Indices match 1-1 with those of `symbols`. - var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; - defer symbol_names.deinit(gpa); - try symbol_names.ensureUnusedCapacity(gpa, syms.len); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf, .pbud, .indr, .abs, _ => continue, - .sect => { - const name = std.mem.sliceTo(strings[sym.n_strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(.{ - .strx = sym.n_strx, - .addr = sym.n_value, - .ofile = MachoSymbol.unknown_ofile, - }); - } - }, - } - continue; - } - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type.stab) { - .oso => switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - }, - .bnsym => switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - }, - .fun => switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - }, - else => return error.InvalidDebugInfo, - }, - .ensym => switch (state) { - .fun_size => { - state = .ensym; - if (last_sym.strx != 0) { - const name = std.mem.sliceTo(strings[last_sym.strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(last_sym); - } else { - symbols.items[gop.index] = last_sym; - } - } - }, - else => return error.InvalidDebugInfo, - }, - .so => switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - }, - else => {}, - } - } - - switch (state) { - .init => { - // Missing STAB symtab entries is still okay, unless there were also no normal symbols. - if (symbols.items.len == 0) return error.MissingDebugInfo; - }, - .oso_close => {}, - else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab - } - - const symbols_slice = try symbols.toOwnedSlice(gpa); - errdefer gpa.free(symbols_slice); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - - return .{ - .mapped_memory = all_mapped_memory, - .symbols = symbols_slice, - .strings = strings, - .vaddr_offset = module.text_base - text_vmaddr, - }; - } -}; - -const OFile = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - dwarf: Dwarf, - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed - /// through `SymbolAdapter`, so that the symbol name is used as the logical key. - symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), - - const SymbolAdapter = struct { - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { - _ = ctx; - return @truncate(std.hash.Wyhash.hash(0, sym_name)); - } - pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { - _ = b_index; - const b_sym = ctx.symtab[b_sym_index]; - const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); - return mem.eql(u8, a_sym_name, b_sym_name); - } - }; }; const MachoSymbol = struct { @@ -880,101 +619,12 @@ fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 }; } -fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapDebugInfoFile(o_file_path); - errdefer posix.munmap(mapped_mem); - - if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - - const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { - var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtab_cmd: ?macho.symtab_command = null; - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => seg_cmd = cmd, - .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - break :cmds .{ - seg_cmd orelse return error.MissingDebugInfo, - symtab_cmd orelse return error.MissingDebugInfo, - }; - }; - - if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; - if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; - - const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); - if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; - const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); - - // TODO handle tentative (common) symbols - var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; - defer symbols_by_name.deinit(gpa); - try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len)); - for (symtab, 0..) |sym, sym_index| { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf => continue, // includes tentative symbols - .abs => continue, - else => {}, - } - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( - @as([]const u8, sym_name), - @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }), - ); - if (gop.found_existing) return error.InvalidDebugInfo; - gop.key_ptr.* = @intCast(sym_index); - } - - var sections: Dwarf.SectionArray = @splat(null); - for (seg_cmd.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; - } else continue; - - if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; - const section_bytes = mapped_mem[sect.offset..][0..sect.size]; - sections[section_index] = .{ - .data = section_bytes, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var dwarf: Dwarf = .{ .sections = sections }; - errdefer dwarf.deinit(gpa); - try dwarf.open(gpa, native_endian); - - return .{ - .mapped_memory = mapped_mem, - .dwarf = dwarf, - .strtab = strtab, - .symtab = symtab, - .symbols_by_name = symbols_by_name.move(), - }; -} - const std = @import("std"); const Io = std.Io; const Allocator = std.mem.Allocator; const Dwarf = std.debug.Dwarf; const Error = std.debug.SelfInfoError; +const MachOFile = std.debug.MachOFile; const assert = std.debug.assert; const posix = std.posix; const macho = std.macho; diff --git a/lib/std/macho.zig b/lib/std/macho.zig index d541e2d13e..7b8894e981 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -1902,74 +1902,76 @@ pub const data_in_code_entry = extern struct { }; pub const LoadCommandIterator = struct { + next_index: usize, ncmds: usize, - buffer: []const u8, - index: usize = 0, + r: std.Io.Reader, pub const LoadCommand = struct { hdr: load_command, data: []const u8, - pub fn cmd(lc: LoadCommand) LC { - return lc.hdr.cmd; - } - - pub fn cmdsize(lc: LoadCommand) u32 { - return lc.hdr.cmdsize; - } - pub fn cast(lc: LoadCommand, comptime Cmd: type) ?Cmd { if (lc.data.len < @sizeOf(Cmd)) return null; - return @as(*align(1) const Cmd, @ptrCast(lc.data.ptr)).*; + const ptr: *align(1) const Cmd = @ptrCast(lc.data.ptr); + var cmd = ptr.*; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(Cmd, &cmd); + return cmd; } /// Asserts LoadCommand is of type segment_command_64. + /// If the native endian is not `.little`, the `section_64` values must be byte-swapped by the caller. pub fn getSections(lc: LoadCommand) []align(1) const section_64 { const segment_lc = lc.cast(segment_command_64).?; - if (segment_lc.nsects == 0) return &[0]section_64{}; - const data = lc.data[@sizeOf(segment_command_64)..]; - const sections = @as([*]align(1) const section_64, @ptrCast(data.ptr))[0..segment_lc.nsects]; - return sections; + const sects_ptr: [*]align(1) const section_64 = @ptrCast(lc.data[@sizeOf(segment_command_64)..]); + return sects_ptr[0..segment_lc.nsects]; } /// Asserts LoadCommand is of type dylib_command. pub fn getDylibPathName(lc: LoadCommand) []const u8 { const dylib_lc = lc.cast(dylib_command).?; - const data = lc.data[dylib_lc.dylib.name..]; - return mem.sliceTo(data, 0); + return mem.sliceTo(lc.data[dylib_lc.dylib.name..], 0); } /// Asserts LoadCommand is of type rpath_command. pub fn getRpathPathName(lc: LoadCommand) []const u8 { const rpath_lc = lc.cast(rpath_command).?; - const data = lc.data[rpath_lc.path..]; - return mem.sliceTo(data, 0); + return mem.sliceTo(lc.data[rpath_lc.path..], 0); } /// Asserts LoadCommand is of type build_version_command. + /// If the native endian is not `.little`, the `build_tool_version` values must be byte-swapped by the caller. pub fn getBuildVersionTools(lc: LoadCommand) []align(1) const build_tool_version { const build_lc = lc.cast(build_version_command).?; - const ntools = build_lc.ntools; - if (ntools == 0) return &[0]build_tool_version{}; - const data = lc.data[@sizeOf(build_version_command)..]; - const tools = @as([*]align(1) const build_tool_version, @ptrCast(data.ptr))[0..ntools]; - return tools; + const tools_ptr: [*]align(1) const build_tool_version = @ptrCast(lc.data[@sizeOf(build_version_command)..]); + return tools_ptr[0..build_lc.ntools]; } }; - pub fn next(it: *LoadCommandIterator) ?LoadCommand { - if (it.index >= it.ncmds) return null; + pub fn next(it: *LoadCommandIterator) error{InvalidMachO}!?LoadCommand { + if (it.next_index >= it.ncmds) return null; - const hdr = @as(*align(1) const load_command, @ptrCast(it.buffer.ptr)).*; - const cmd = LoadCommand{ - .hdr = hdr, - .data = it.buffer[0..hdr.cmdsize], + const hdr = it.r.peekStruct(load_command, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + const data = it.r.take(hdr.cmdsize) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, }; - it.buffer = it.buffer[hdr.cmdsize..]; - it.index += 1; + it.next_index += 1; + return .{ .hdr = hdr, .data = data }; + } - return cmd; + pub fn init(hdr: *const mach_header_64, cmds_buf_overlong: []const u8) error{InvalidMachO}!LoadCommandIterator { + if (cmds_buf_overlong.len < hdr.sizeofcmds) return error.InvalidMachO; + if (hdr.ncmds > 0 and hdr.sizeofcmds < @sizeOf(load_command)) return error.InvalidMachO; + const cmds_buf = cmds_buf_overlong[0..hdr.sizeofcmds]; + return .{ + .next_index = 0, + .ncmds = hdr.ncmds, + .r = .fixed(cmds_buf), + }; } }; diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8a3ee07315..7c6708983c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4167,7 +4167,7 @@ pub const Platform = struct { /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to /// the extracted minimum platform version. pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform { - switch (lc.cmd()) { + switch (lc.hdr.cmd) { .BUILD_VERSION => { const cmd = lc.cast(macho.build_version_command).?; return .{ @@ -4200,7 +4200,7 @@ pub const Platform = struct { // We can't distinguish Mac Catalyst here, but this is legacy stuff anyway. const cmd = lc.cast(macho.version_min_command).?; return .{ - .os_tag = switch (lc.cmd()) { + .os_tag = switch (lc.hdr.cmd) { .VERSION_MIN_IPHONEOS => .ios, .VERSION_MIN_MACOSX => .macos, .VERSION_MIN_TVOS => .tvos, diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index c78d52f815..64817ac433 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -90,11 +90,8 @@ fn parseBinary(self: *Dylib, macho_file: *MachO) !void { if (amt != lc_buffer.len) return error.InputOutput; } - var it = LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = lc_buffer, - }; - while (it.next()) |cmd| switch (cmd.cmd()) { + var it = LoadCommandIterator.init(&header, lc_buffer) catch |err| std.debug.panic("bad dylib: {t}", .{err}); + while (it.next() catch |err| std.debug.panic("bad dylib: {t}", .{err})) |cmd| switch (cmd.hdr.cmd) { .ID_DYLIB => { self.id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName()); }, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 7cec09ba91..5f28d3dfda 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -109,11 +109,8 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { if (amt != self.header.?.sizeofcmds) return error.InputOutput; } - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = lc_buffer, - }; - while (it.next()) |lc| switch (lc.cmd()) { + var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err}); + while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) { .SEGMENT_64 => { const sections = lc.getSections(); try self.sections.ensureUnusedCapacity(gpa, sections.len); @@ -1644,11 +1641,8 @@ pub fn parseAr(self: *Object, macho_file: *MachO) !void { if (amt != self.header.?.sizeofcmds) return error.InputOutput; } - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = lc_buffer, - }; - while (it.next()) |lc| switch (lc.cmd()) { + var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err}); + while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) { .SYMTAB => { const cmd = lc.cast(macho.symtab_command).?; try self.strtab.resize(gpa, cmd.strsize); From 0a330d4f947c1b05ac9f7d624443e2f80db2912f Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 19 Nov 2025 12:19:22 +0000 Subject: [PATCH 2/8] std.debug.Info: basic Mach-O support --- lib/std/Build/Fuzz.zig | 29 ++++++++++++-- lib/std/debug/Info.zig | 91 ++++++++++++++++++++++++++++++------------ tools/dump-cov.zig | 35 ++++++++++++---- 3 files changed, 117 insertions(+), 38 deletions(-) diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index 6dd4f70f9f..7a22522d6b 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -383,7 +383,14 @@ fn prepareTables(fuzz: *Fuzz, run_step: *Step.Run, coverage_id: u64) error{ OutO errdefer gop.value_ptr.coverage.deinit(fuzz.gpa); const rebuilt_exe_path = run_step.rebuilt_executable.?; - var debug_info = std.debug.Info.load(fuzz.gpa, rebuilt_exe_path, &gop.value_ptr.coverage) catch |err| { + const target = run_step.producer.?.rootModuleTarget(); + var debug_info = std.debug.Info.load( + fuzz.gpa, + rebuilt_exe_path, + &gop.value_ptr.coverage, + target.ofmt, + target.cpu.arch, + ) catch |err| { log.err("step '{s}': failed to load debug information for '{f}': {s}", .{ run_step.step.name, rebuilt_exe_path, @errorName(err), }); @@ -479,9 +486,23 @@ fn addEntryPoint(fuzz: *Fuzz, coverage_id: u64, addr: u64) error{ AlreadyReporte if (false) { const sl = coverage_map.source_locations[index]; const file_name = coverage_map.coverage.stringAt(coverage_map.coverage.fileAt(sl.file).basename); - log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} between {x} and {x}", .{ - addr, file_name, sl.line, sl.column, index, pcs[index - 1], pcs[index + 1], - }); + if (pcs.len == 1) { + log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index 0 (final)", .{ + addr, file_name, sl.line, sl.column, + }); + } else if (index == 0) { + log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index 0 before {x}", .{ + addr, file_name, sl.line, sl.column, pcs[index + 1], + }); + } else if (index == pcs.len - 1) { + log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} (final) after {x}", .{ + addr, file_name, sl.line, sl.column, index, pcs[index - 1], + }); + } else { + log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} between {x} and {x}", .{ + addr, file_name, sl.line, sl.column, index, pcs[index - 1], pcs[index + 1], + }); + } } try coverage_map.entry_points.append(fuzz.gpa, @intCast(index)); } diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index 74119a3ea4..921cd36ab8 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -9,49 +9,67 @@ const std = @import("../std.zig"); const Allocator = std.mem.Allocator; const Path = std.Build.Cache.Path; -const ElfFile = std.debug.ElfFile; const assert = std.debug.assert; const Coverage = std.debug.Coverage; const SourceLocation = std.debug.Coverage.SourceLocation; +const ElfFile = std.debug.ElfFile; +const MachOFile = std.debug.MachOFile; + const Info = @This(); -/// Sorted by key, ascending. -address_map: std.AutoArrayHashMapUnmanaged(u64, ElfFile), +impl: union(enum) { + elf: ElfFile, + macho: MachOFile, +}, /// Externally managed, outlives this `Info` instance. coverage: *Coverage, -pub const LoadError = std.fs.File.OpenError || ElfFile.LoadError || std.debug.Dwarf.ScanError || error{MissingDebugInfo}; +pub const LoadError = std.fs.File.OpenError || ElfFile.LoadError || MachOFile.Error || std.debug.Dwarf.ScanError || error{ MissingDebugInfo, UnsupportedDebugInfo }; -pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { - var file = try path.root_dir.handle.openFile(path.sub_path, .{}); - defer file.close(); +pub fn load(gpa: Allocator, path: Path, coverage: *Coverage, format: std.Target.ObjectFormat, arch: std.Target.Cpu.Arch) LoadError!Info { + switch (format) { + .elf => { + var file = try path.root_dir.handle.openFile(path.sub_path, .{}); + defer file.close(); - var elf_file: ElfFile = try .load(gpa, file, null, &.none); - errdefer elf_file.deinit(gpa); + var elf_file: ElfFile = try .load(gpa, file, null, &.none); + errdefer elf_file.deinit(gpa); - if (elf_file.dwarf == null) return error.MissingDebugInfo; - try elf_file.dwarf.?.open(gpa, elf_file.endian); - try elf_file.dwarf.?.populateRanges(gpa, elf_file.endian); + if (elf_file.dwarf == null) return error.MissingDebugInfo; + try elf_file.dwarf.?.open(gpa, elf_file.endian); + try elf_file.dwarf.?.populateRanges(gpa, elf_file.endian); - var info: Info = .{ - .address_map = .{}, - .coverage = coverage, - }; - try info.address_map.put(gpa, 0, elf_file); - errdefer comptime unreachable; // elf_file is owned by the map now - return info; + return .{ + .impl = .{ .elf = elf_file }, + .coverage = coverage, + }; + }, + .macho => { + const path_str = try path.toString(gpa); + defer gpa.free(path_str); + + var macho_file: MachOFile = try .load(gpa, path_str, arch); + errdefer macho_file.deinit(gpa); + + return .{ + .impl = .{ .macho = macho_file }, + .coverage = coverage, + }; + }, + else => return error.UnsupportedDebugInfo, + } } pub fn deinit(info: *Info, gpa: Allocator) void { - for (info.address_map.values()) |*elf_file| { - elf_file.dwarf.?.deinit(gpa); + switch (info.impl) { + .elf => |*ef| ef.deinit(gpa), + .macho => |*mf| mf.deinit(gpa), } - info.address_map.deinit(gpa); info.* = undefined; } -pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError; +pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError || error{UnsupportedDebugInfo}; /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations. @@ -64,7 +82,28 @@ pub fn resolveAddresses( output: []SourceLocation, ) ResolveAddressesError!void { assert(sorted_pc_addrs.len == output.len); - if (info.address_map.entries.len != 1) @panic("TODO"); - const elf_file = &info.address_map.values()[0]; - return info.coverage.resolveAddressesDwarf(gpa, elf_file.endian, sorted_pc_addrs, output, &elf_file.dwarf.?); + switch (info.impl) { + .elf => |*ef| return info.coverage.resolveAddressesDwarf(gpa, ef.endian, sorted_pc_addrs, output, &ef.dwarf.?), + .macho => |*mf| { + // Resolving all of the addresses at once unfortunately isn't so easy in Mach-O binaries + // due to split debug information. For now, we'll just resolve the addreses one by one. + for (sorted_pc_addrs, output) |pc_addr, *src_loc| { + const dwarf, const dwarf_pc_addr = mf.getDwarfForAddress(gpa, pc_addr) catch |err| switch (err) { + error.InvalidMachO, error.InvalidDwarf => return error.InvalidDebugInfo, + else => |e| return e, + }; + if (dwarf.ranges.items.len == 0) { + dwarf.populateRanges(gpa, .little) catch |err| switch (err) { + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.ReadFailed, + => return error.InvalidDebugInfo, + else => |e| return e, + }; + } + try info.coverage.resolveAddressesDwarf(gpa, .little, &.{dwarf_pc_addr}, src_loc[0..1], dwarf); + } + }, + } } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index 249783b927..3dd91de612 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -8,31 +8,50 @@ const assert = std.debug.assert; const SeenPcsHeader = std.Build.abi.fuzz.SeenPcsHeader; pub fn main() !void { - var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .init; - defer _ = general_purpose_allocator.deinit(); - const gpa = general_purpose_allocator.allocator(); + var debug_allocator: std.heap.DebugAllocator(.{}) = .init; + defer _ = debug_allocator.deinit(); + const gpa = debug_allocator.allocator(); - var arena_instance = std.heap.ArenaAllocator.init(gpa); + var arena_instance: std.heap.ArenaAllocator = .init(gpa); defer arena_instance.deinit(); const arena = arena_instance.allocator(); + var threaded: std.Io.Threaded = .init(gpa); + defer threaded.deinit(); + const io = threaded.io(); + const args = try std.process.argsAlloc(arena); + + const target_query_str = switch (args.len) { + 3 => "native", + 4 => args[3], + else => return fatal( + \\usage: {0s} path/to/exe path/to/coverage [target] + \\ if omitted, 'target' defaults to 'native' + \\ example: {0s} zig-out/test .zig-cache/v/xxxxxxxx x86_64-linux + , .{if (args.len == 0) "dump-cov" else args[0]}), + }; + + const target = std.zig.resolveTargetQueryOrFatal(io, try .parse(.{ + .arch_os_abi = target_query_str, + })); + const exe_file_name = args[1]; const cov_file_name = args[2]; const exe_path: Path = .{ - .root_dir = std.Build.Cache.Directory.cwd(), + .root_dir = .cwd(), .sub_path = exe_file_name, }; const cov_path: Path = .{ - .root_dir = std.Build.Cache.Directory.cwd(), + .root_dir = .cwd(), .sub_path = cov_file_name, }; - var coverage = std.debug.Coverage.init; + var coverage: std.debug.Coverage = .init; defer coverage.deinit(gpa); - var debug_info = std.debug.Info.load(gpa, exe_path, &coverage) catch |err| { + var debug_info = std.debug.Info.load(gpa, exe_path, &coverage, target.ofmt, target.cpu.arch) catch |err| { fatal("failed to load debug info for {f}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); From 010dcd6a9b64d5bd13579a4b0c4c70a5aee5c967 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 19 Nov 2025 12:55:17 +0000 Subject: [PATCH 3/8] fuzzer: account for runtime address slide This is relevant to PIEs, which are notably enabled by default on macOS. The build system needs to only see virtual addresses, that is, those which do not have the slide applied; but the fuzzer itself naturally sees relocated addresses (i.e. with the slide applied). We just need to subtract the slide when we communicate addresses to the build system. --- lib/compiler/test_runner.zig | 2 +- lib/fuzzer.zig | 34 ++++++++++++++++++++++++------ lib/std/Build/abi.zig | 1 + lib/std/debug.zig | 2 +- lib/std/debug/SelfInfo/Elf.zig | 5 +++++ lib/std/debug/SelfInfo/MachO.zig | 14 ++++++++++++ lib/std/debug/SelfInfo/Windows.zig | 6 ++++++ 7 files changed, 56 insertions(+), 8 deletions(-) diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index 0d6f451947..054fe1eb27 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -184,7 +184,7 @@ fn mainServer() !void { const test_fn = builtin.test_functions[index]; const entry_addr = @intFromPtr(test_fn.func); - try server.serveU64Message(.fuzz_start_addr, entry_addr); + try server.serveU64Message(.fuzz_start_addr, fuzz_abi.fuzzer_unslide_address(entry_addr)); defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1); is_fuzz_test = false; fuzz_test_index = index; diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 6b7a846e4c..5c452340f6 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -116,13 +116,18 @@ const Executable = struct { "failed to init memory map for coverage file '{s}': {t}", .{ &coverage_file_name, e }, ); - map.appendSliceAssumeCapacity(mem.asBytes(&abi.SeenPcsHeader{ + map.appendSliceAssumeCapacity(@ptrCast(&abi.SeenPcsHeader{ .n_runs = 0, .unique_runs = 0, .pcs_len = pcs.len, })); map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize)); - map.appendSliceAssumeCapacity(mem.sliceAsBytes(pcs)); + // Relocations have been applied to `pcs` so it contains runtime addresses (with slide + // applied). We need to translate these to the virtual addresses as on disk. + for (pcs) |pc| { + const pc_vaddr = fuzzer_unslide_address(pc); + map.appendSliceAssumeCapacity(@ptrCast(&pc_vaddr)); + } return map; } else { const size = coverage_file.getEndPos() catch |e| panic( @@ -215,7 +220,16 @@ const Executable = struct { .{ self.pc_counters.len, pcs.len }, ); - self.pc_digest = std.hash.Wyhash.hash(0, mem.sliceAsBytes(pcs)); + self.pc_digest = digest: { + // Relocations have been applied to `pcs` so it contains runtime addresses (with slide + // applied). We need to translate these to the virtual addresses as on disk. + var h: std.hash.Wyhash = .init(0); + for (pcs) |pc| { + const pc_vaddr = fuzzer_unslide_address(pc); + h.update(@ptrCast(&pc_vaddr)); + } + break :digest h.final(); + }; self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest); return self; @@ -622,6 +636,14 @@ export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void { } } +export fn fuzzer_unslide_address(addr: usize) usize { + const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported"); + const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), addr) catch |err| { + std.debug.panic("failed to find virtual address slide: {t}", .{err}); + }; + return addr - slide; +} + /// Helps determine run uniqueness in the face of recursion. /// Currently not used by the fuzzer. export threadlocal var __sancov_lowest_stack: usize = 0; @@ -1185,13 +1207,13 @@ const Mutation = enum { const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len); out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]); }, - .@"const" => out.appendSliceAssumeCapacity(mem.asBytes( + .@"const" => out.appendSliceAssumeCapacity(@ptrCast( &data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)], )), - .small => out.appendSliceAssumeCapacity(mem.asBytes( + .small => out.appendSliceAssumeCapacity(@ptrCast( &mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]), )), - .few => out.appendSliceAssumeCapacity(mem.asBytes( + .few => out.appendSliceAssumeCapacity(@ptrCast( &fewValue(rng, data_ctx[0], data_ctx[1]), )), } diff --git a/lib/std/Build/abi.zig b/lib/std/Build/abi.zig index eb8f6cb1be..b7c1e7379d 100644 --- a/lib/std/Build/abi.zig +++ b/lib/std/Build/abi.zig @@ -145,6 +145,7 @@ pub const fuzz = struct { pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void; pub extern fn fuzzer_new_input(bytes: Slice) void; pub extern fn fuzzer_main(limit_kind: LimitKind, amount: u64) void; + pub extern fn fuzzer_unslide_address(addr: usize) usize; pub const Slice = extern struct { ptr: [*]const u8, diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 182ea94766..29c0731f4e 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1367,7 +1367,7 @@ test printLineFromFile { /// The returned allocator should be thread-safe if the compilation is multi-threaded, because /// multiple threads could capture and/or print stack traces simultaneously. -fn getDebugInfoAllocator() Allocator { +pub fn getDebugInfoAllocator() Allocator { // Allow overriding the debug info allocator by exposing `root.debug.getDebugInfoAllocator`. if (@hasDecl(root, "debug") and @hasDecl(root.debug, "getDebugInfoAllocator")) { return root.debug.getDebugInfoAllocator(); diff --git a/lib/std/debug/SelfInfo/Elf.zig b/lib/std/debug/SelfInfo/Elf.zig index 5036d40197..59c0b42451 100644 --- a/lib/std/debug/SelfInfo/Elf.zig +++ b/lib/std/debug/SelfInfo/Elf.zig @@ -80,6 +80,11 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons if (module.name.len == 0) return error.MissingDebugInfo; return module.name; } +pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize { + const module = try si.findModule(gpa, address, .shared); + defer si.rwlock.unlockShared(); + return module.load_offset; +} pub const can_unwind: bool = s: { // The DWARF code can't deal with ILP32 ABIs yet: https://github.com/ziglang/zig/issues/25447 diff --git a/lib/std/debug/SelfInfo/MachO.zig b/lib/std/debug/SelfInfo/MachO.zig index 83adb6dcd4..94d50bbf77 100644 --- a/lib/std/debug/SelfInfo/MachO.zig +++ b/lib/std/debug/SelfInfo/MachO.zig @@ -82,6 +82,20 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons defer si.mutex.unlock(); return module.name; } +pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize { + const module = try si.findModule(gpa, address); + defer si.mutex.unlock(); + const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base); + const raw_macho: [*]u8 = @ptrCast(header); + var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable; + const text_vmaddr = while (it.next() catch unreachable) |load_cmd| { + if (load_cmd.hdr.cmd != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break segment_cmd.vmaddr; + } else unreachable; + return module.text_base - text_vmaddr; +} pub const can_unwind: bool = true; pub const UnwindContext = std.debug.Dwarf.SelfUnwinder; diff --git a/lib/std/debug/SelfInfo/Windows.zig b/lib/std/debug/SelfInfo/Windows.zig index 70009217db..306287a9e7 100644 --- a/lib/std/debug/SelfInfo/Windows.zig +++ b/lib/std/debug/SelfInfo/Windows.zig @@ -33,6 +33,12 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons const module = try si.findModule(gpa, address); return module.name; } +pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize { + si.mutex.lock(); + defer si.mutex.unlock(); + const module = try si.findModule(gpa, address); + return module.base_address; +} pub const can_unwind: bool = switch (builtin.cpu.arch) { else => true, From e1fa4011fbe812dc7d05326239a62388b591ba52 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 19 Nov 2025 13:05:03 +0000 Subject: [PATCH 4/8] fuzz: hack around unknown module structure --- lib/build-web/fuzz.zig | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/lib/build-web/fuzz.zig b/lib/build-web/fuzz.zig index c694e9e69e..44f1434517 100644 --- a/lib/build-web/fuzz.zig +++ b/lib/build-web/fuzz.zig @@ -228,20 +228,21 @@ fn unpackSourcesInner(tar_bytes: []u8) !void { if (std.mem.endsWith(u8, tar_file.name, ".zig")) { log.debug("found file: '{s}'", .{tar_file.name}); const file_name = try gpa.dupe(u8, tar_file.name); - if (std.mem.indexOfScalar(u8, file_name, '/')) |pkg_name_end| { - const pkg_name = file_name[0..pkg_name_end]; - const gop = try Walk.modules.getOrPut(gpa, pkg_name); - const file: Walk.File.Index = @enumFromInt(Walk.files.entries.len); - if (!gop.found_existing or - std.mem.eql(u8, file_name[pkg_name_end..], "/root.zig") or - std.mem.eql(u8, file_name[pkg_name_end + 1 .. file_name.len - ".zig".len], pkg_name)) - { - gop.value_ptr.* = file; - } - const file_bytes = tar_reader.take(@intCast(tar_file.size)) catch unreachable; - it.unread_file_bytes = 0; // we have read the whole thing - assert(file == try Walk.add_file(file_name, file_bytes)); - } + // This is a hack to guess modules from the tar file contents. To handle modules + // properly, the build system will need to change the structure here to have one + // directory per module. This in turn requires compiler enhancements to allow + // the build system to actually discover the required information. + const mod_name, const is_module_root = p: { + if (std.mem.find(u8, file_name, "std/")) |i| break :p .{ "std", std.mem.eql(u8, file_name[i + 4 ..], "std.zig") }; + if (std.mem.endsWith(u8, file_name, "/builtin.zig")) break :p .{ "builtin", true }; + break :p .{ "root", std.mem.endsWith(u8, file_name, "/root.zig") }; + }; + const gop = try Walk.modules.getOrPut(gpa, mod_name); + const file: Walk.File.Index = @enumFromInt(Walk.files.entries.len); + if (!gop.found_existing or is_module_root) gop.value_ptr.* = file; + const file_bytes = tar_reader.take(@intCast(tar_file.size)) catch unreachable; + it.unread_file_bytes = 0; // we have read the whole thing + assert(file == try Walk.add_file(file_name, file_bytes)); } else { log.warn("skipping: '{s}' - the tar creation should have done that", .{tar_file.name}); } From 0f06b5b58387eacb53448835cbcbca10cca1559e Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 19 Nov 2025 14:33:42 +0000 Subject: [PATCH 5/8] std.debug.MachOFile: handle 'path/to/archive.a(entry.o)' form --- lib/std/debug/MachOFile.zig | 73 ++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/lib/std/debug/MachOFile.zig b/lib/std/debug/MachOFile.zig index b3b5789fe5..3be1b1daff 100644 --- a/lib/std/debug/MachOFile.zig +++ b/lib/std/debug/MachOFile.zig @@ -356,11 +356,58 @@ test { _ = Symbol; } -fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapDebugInfoFile(o_file_path); - errdefer posix.munmap(mapped_mem); +fn loadOFile(gpa: Allocator, o_file_name: []const u8) !OFile { + const all_mapped_memory, const mapped_ofile = map: { + const open_paren = paren: { + if (std.mem.endsWith(u8, o_file_name, ")")) { + if (std.mem.findScalarLast(u8, o_file_name, '(')) |i| { + break :paren i; + } + } + // Not an archive, just a normal path to a .o file + const m = try mapDebugInfoFile(o_file_name); + break :map .{ m, m }; + }; - var r: Io.Reader = .fixed(mapped_mem); + // We have the form 'path/to/archive.a(entry.o)'. Map the archive and find the object file in question. + + const archive_path = o_file_name[0..open_paren]; + const target_name_in_archive = o_file_name[open_paren + 1 .. o_file_name.len - 1]; + const mapped_archive = try mapDebugInfoFile(archive_path); + errdefer posix.munmap(mapped_archive); + + var ar_reader: Io.Reader = .fixed(mapped_archive); + const ar_magic = ar_reader.take(8) catch return error.InvalidMachO; + if (!std.mem.eql(u8, ar_magic, "!\n")) return error.InvalidMachO; + while (true) { + if (ar_reader.seek == ar_reader.buffer.len) return error.MissingDebugInfo; + + const raw_name = ar_reader.takeArray(16) catch return error.InvalidMachO; + ar_reader.discardAll(12 + 6 + 6 + 8) catch return error.InvalidMachO; + const raw_size = ar_reader.takeArray(10) catch return error.InvalidMachO; + const file_magic = ar_reader.takeArray(2) catch return error.InvalidMachO; + if (!std.mem.eql(u8, file_magic, "`\n")) return error.InvalidMachO; + + const size = std.fmt.parseInt(u32, mem.sliceTo(raw_size, ' '), 10) catch return error.InvalidMachO; + const raw_data = ar_reader.take(size) catch return error.InvalidMachO; + + const entry_name: []const u8, const entry_contents: []const u8 = entry: { + if (!std.mem.startsWith(u8, raw_name, "#1/")) { + break :entry .{ mem.sliceTo(raw_name, '/'), raw_data }; + } + const len = std.fmt.parseInt(u32, mem.sliceTo(raw_name[3..], ' '), 10) catch return error.InvalidMachO; + if (len > size) return error.InvalidMachO; + break :entry .{ mem.sliceTo(raw_data[0..len], 0), raw_data[len..] }; + }; + + if (std.mem.eql(u8, entry_name, target_name_in_archive)) { + break :map .{ mapped_archive, entry_contents }; + } + } + }; + errdefer posix.munmap(all_mapped_memory); + + var r: Io.Reader = .fixed(mapped_ofile); const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) { error.ReadFailed => unreachable, error.EndOfStream => return error.InvalidMachO, @@ -370,7 +417,7 @@ fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; var symtab_cmd: ?macho.symtab_command = null; - var it: macho.LoadCommandIterator = try .init(&hdr, mapped_mem[@sizeOf(macho.mach_header_64)..]); + var it: macho.LoadCommandIterator = try .init(&hdr, mapped_ofile[@sizeOf(macho.mach_header_64)..]); while (try it.next()) |lc| switch (lc.hdr.cmd) { .SEGMENT_64 => seg_cmd = lc, .SYMTAB => symtab_cmd = lc.cast(macho.symtab_command) orelse return error.InvalidMachO, @@ -382,13 +429,13 @@ fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { }; }; - if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidMachO; - if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidMachO; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + if (mapped_ofile.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidMachO; + if (mapped_ofile[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidMachO; + const strtab = mapped_ofile[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); - if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidMachO; - const symtab_raw: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); + if (mapped_ofile.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidMachO; + const symtab_raw: []align(1) const macho.nlist_64 = @ptrCast(mapped_ofile[symtab_cmd.symoff..][0..n_sym_bytes]); // TODO handle tentative (common) symbols var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; @@ -423,8 +470,8 @@ fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; } else continue; - if (mapped_mem.len < sect.offset + sect.size) return error.InvalidMachO; - const section_bytes = mapped_mem[sect.offset..][0..sect.size]; + if (mapped_ofile.len < sect.offset + sect.size) return error.InvalidMachO; + const section_bytes = mapped_ofile[sect.offset..][0..sect.size]; sections[section_index] = .{ .data = section_bytes, .owned = false, @@ -455,7 +502,7 @@ fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { }; return .{ - .mapped_memory = mapped_mem, + .mapped_memory = all_mapped_memory, .dwarf = dwarf, .strtab = strtab, .symtab_raw = symtab_raw, From bc524a2b1a6e5cd13c0093bed240b06d23e1a882 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 19 Nov 2025 15:29:21 +0000 Subject: [PATCH 6/8] std.Build: fix crashes running fuzz tests --- lib/std/Build/Step/Compile.zig | 5 +++++ lib/std/Build/Step/Run.zig | 13 +++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index 7dd9c17273..4ea09538c2 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -1932,6 +1932,11 @@ pub fn rebuildInFuzzMode(c: *Compile, gpa: Allocator, progress_node: std.Progres c.step.result_error_bundle.deinit(gpa); c.step.result_error_bundle = std.zig.ErrorBundle.empty; + if (c.step.result_failed_command) |cmd| { + gpa.free(cmd); + c.step.result_failed_command = null; + } + const zig_args = try getZigArgs(c, true); const maybe_output_bin_path = try c.step.evalZigProcess(zig_args, progress_node, false, null, gpa); return maybe_output_bin_path.?; diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index 52a690ef94..dfae77ffb5 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -1140,6 +1140,12 @@ pub fn rerunInFuzzMode( .output_file, .output_directory => unreachable, } } + + if (run.step.result_failed_command) |cmd| { + fuzz.gpa.free(cmd); + run.step.result_failed_command = null; + } + const has_side_effects = false; const rand_int = std.crypto.random.int(u64); const tmp_dir_path = "tmp" ++ fs.path.sep_str ++ std.fmt.hex(rand_int); @@ -1150,7 +1156,7 @@ pub fn rerunInFuzzMode( .web_server = null, // only needed for time reports .ttyconf = fuzz.ttyconf, .unit_test_timeout_ns = null, // don't time out fuzz tests for now - .gpa = undefined, // not used by `runCommand` + .gpa = fuzz.gpa, }, .{ .unit_test_index = unit_test_index, .fuzz = fuzz, @@ -1870,7 +1876,10 @@ fn pollZigTest( // test. For instance, if the test runner leaves this much time between us requesting a test to // start and it acknowledging the test starting, we terminate the child and raise an error. This // *should* never happen, but could in theory be caused by some very unlucky IB in a test. - const response_timeout_ns = @max(options.unit_test_timeout_ns orelse 0, 60 * std.time.ns_per_s); + const response_timeout_ns: ?u64 = ns: { + if (fuzz_context != null) break :ns null; // don't timeout fuzz tests + break :ns @max(options.unit_test_timeout_ns orelse 0, 60 * std.time.ns_per_s); + }; const stdout = poller.reader(.stdout); const stderr = poller.reader(.stderr); From b05fefb9c9bf3065e353102d0e54ea76d5f4e34d Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 19 Nov 2025 17:08:49 +0000 Subject: [PATCH 7/8] std.http: stop assuming previous chunk state The full file may not be written, either due to a previous chunk being in-progress when `sendFile` was called, or due to `limit`. --- lib/std/http.zig | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/std/http.zig b/lib/std/http.zig index dcc96ba741..eedd729576 100644 --- a/lib/std/http.zig +++ b/lib/std/http.zig @@ -962,6 +962,7 @@ pub const BodyWriter = struct { // have to flush the chunk header before knowing the chunk length. return error.Unimplemented; }; + if (data_len == 0) return error.EndOfStream; const out = bw.http_protocol_output; l: switch (bw.state.chunk_len) { 0 => { @@ -975,8 +976,7 @@ pub const BodyWriter = struct { 2 => { try out.writeAll("\r\n"); bw.state.chunk_len = 0; - assert(file_reader.atEnd()); - return error.EndOfStream; + continue :l 0; }, else => { const chunk_limit: std.Io.Limit = .limited(bw.state.chunk_len - 2); @@ -985,8 +985,7 @@ pub const BodyWriter = struct { else try out.write(chunk_limit.slice(w.buffered())); bw.state.chunk_len -= n; - const ret = w.consume(n); - return ret; + return w.consume(n); }, } } From a87b5332319b20347916f77e57253e2df4b2a3af Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 19 Nov 2025 17:11:41 +0000 Subject: [PATCH 8/8] std.Io.Writer: fix some bugs --- lib/std/Io/Writer.zig | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/std/Io/Writer.zig b/lib/std/Io/Writer.zig index d0721307d4..d8468083eb 100644 --- a/lib/std/Io/Writer.zig +++ b/lib/std/Io/Writer.zig @@ -270,16 +270,17 @@ fn writeSplatHeaderLimitFinish( remaining -= copy_len; if (remaining == 0) break :v; } - for (data[0 .. data.len - 1]) |buf| if (buf.len != 0) { - const copy_len = @min(header.len, remaining); - vecs[i] = buf; + for (data[0 .. data.len - 1]) |buf| { + if (buf.len == 0) continue; + const copy_len = @min(buf.len, remaining); + vecs[i] = buf[0..copy_len]; i += 1; remaining -= copy_len; if (remaining == 0) break :v; if (vecs.len - i == 0) break :v; - }; + } const pattern = data[data.len - 1]; - if (splat == 1) { + if (splat == 1 or remaining < pattern.len) { vecs[i] = pattern[0..@min(remaining, pattern.len)]; i += 1; break :v; @@ -915,7 +916,16 @@ pub fn sendFileHeader( if (new_end <= w.buffer.len) { @memcpy(w.buffer[w.end..][0..header.len], header); w.end = new_end; - return header.len + try w.vtable.sendFile(w, file_reader, limit); + const file_bytes = w.vtable.sendFile(w, file_reader, limit) catch |err| switch (err) { + error.ReadFailed, error.WriteFailed => |e| return e, + error.EndOfStream, error.Unimplemented => |e| { + // These errors are non-fatal, so if we wrote any header bytes, we will report that + // and suppress this error. Only if there was no header may we return the error. + if (header.len != 0) return header.len; + return e; + }, + }; + return header.len + file_bytes; } const buffered_contents = limit.slice(file_reader.interface.buffered()); const n = try w.vtable.drain(w, &.{ header, buffered_contents }, 1);