From 0caca625ebad92495a758e3121c91ba1f32774dd Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Wed, 19 Nov 2025 12:01:49 +0000 Subject: [PATCH] std.debug: split up Mach-O debug info handling Like ELF, we now have `std.debug.MachOFile` for the host-independent parts, and `std.debug.SelfInfo.MachO` for logic requiring the file to correspond to the running program. --- lib/std/Build/Step/CheckObject.zig | 29 +- lib/std/debug.zig | 1 + lib/std/debug/MachOFile.zig | 501 +++++++++++++++++++++++++++++ lib/std/debug/SelfInfo/MachO.zig | 444 +++---------------------- lib/std/macho.zig | 68 ++-- src/link/MachO.zig | 4 +- src/link/MachO/Dylib.zig | 7 +- src/link/MachO/Object.zig | 14 +- 8 files changed, 606 insertions(+), 462 deletions(-) create mode 100644 lib/std/debug/MachOFile.zig diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index d9935d4f3d..ab10d368b2 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -729,10 +729,10 @@ const MachODumper = struct { imports: std.ArrayListUnmanaged([]const u8) = .empty, fn parse(ctx: *ObjectContext) !void { - var it = ctx.getLoadCommandIterator(); + var it = try ctx.getLoadCommandIterator(); var i: usize = 0; - while (it.next()) |cmd| { - switch (cmd.cmd()) { + while (try it.next()) |cmd| { + switch (cmd.hdr.cmd) { .SEGMENT_64 => { const seg = cmd.cast(macho.segment_command_64).?; try ctx.segments.append(ctx.gpa, seg); @@ -771,14 +771,13 @@ const MachODumper = struct { return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + off)), 0); } - fn getLoadCommandIterator(ctx: ObjectContext) macho.LoadCommandIterator { - const data = ctx.data[@sizeOf(macho.mach_header_64)..][0..ctx.header.sizeofcmds]; - return .{ .ncmds = ctx.header.ncmds, .buffer = data }; + fn getLoadCommandIterator(ctx: ObjectContext) !macho.LoadCommandIterator { + return .init(&ctx.header, ctx.data[@sizeOf(macho.mach_header_64)..]); } - fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) ?macho.LoadCommandIterator.LoadCommand { - var it = ctx.getLoadCommandIterator(); - while (it.next()) |lc| if (lc.cmd() == cmd) { + fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) !?macho.LoadCommandIterator.LoadCommand { + var it = try ctx.getLoadCommandIterator(); + while (try it.next()) |lc| if (lc.hdr.cmd == cmd) { return lc; }; return null; @@ -872,9 +871,9 @@ const MachODumper = struct { \\LC {d} \\cmd {s} \\cmdsize {d} - , .{ index, @tagName(lc.cmd()), lc.cmdsize() }); + , .{ index, @tagName(lc.hdr.cmd), lc.hdr.cmdsize }); - switch (lc.cmd()) { + switch (lc.hdr.cmd) { .SEGMENT_64 => { const seg = lc.cast(macho.segment_command_64).?; try writer.writeByte('\n'); @@ -1592,9 +1591,9 @@ const MachODumper = struct { .headers => { try ObjectContext.dumpHeader(ctx.header, writer); - var it = ctx.getLoadCommandIterator(); + var it = try ctx.getLoadCommandIterator(); var i: usize = 0; - while (it.next()) |cmd| { + while (try it.next()) |cmd| { try ObjectContext.dumpLoadCommand(cmd, i, writer); try writer.writeByte('\n'); @@ -1615,7 +1614,7 @@ const MachODumper = struct { .dyld_weak_bind, .dyld_lazy_bind, => { - const cmd = ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse + const cmd = try ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse return step.fail("no dyld info found", .{}); const lc = cmd.cast(macho.dyld_info_command).?; @@ -1649,7 +1648,7 @@ const MachODumper = struct { }, .exports => blk: { - if (ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| { + if (try ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| { const lc = cmd.cast(macho.dyld_info_command).?; if (lc.export_size > 0) { const data = ctx.data[lc.export_off..][0..lc.export_size]; diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 3f1982070c..182ea94766 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -21,6 +21,7 @@ const root = @import("root"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const ElfFile = @import("debug/ElfFile.zig"); +pub const MachOFile = @import("debug/MachOFile.zig"); pub const Info = @import("debug/Info.zig"); pub const Coverage = @import("debug/Coverage.zig"); pub const cpu_context = @import("debug/cpu_context.zig"); diff --git a/lib/std/debug/MachOFile.zig b/lib/std/debug/MachOFile.zig new file mode 100644 index 0000000000..b3b5789fe5 --- /dev/null +++ b/lib/std/debug/MachOFile.zig @@ -0,0 +1,501 @@ +mapped_memory: []align(std.heap.page_size_min) const u8, +symbols: []const Symbol, +strings: []const u8, +text_vmaddr: u64, + +/// Key is index into `strings` of the file path. +ofiles: std.AutoArrayHashMapUnmanaged(u32, Error!OFile), + +pub const Error = error{ + InvalidMachO, + InvalidDwarf, + MissingDebugInfo, + UnsupportedDebugInfo, + ReadFailed, + OutOfMemory, +}; + +pub fn deinit(mf: *MachOFile, gpa: Allocator) void { + for (mf.ofiles.values()) |*maybe_of| { + const of = &(maybe_of.* catch continue); + posix.munmap(of.mapped_memory); + of.dwarf.deinit(gpa); + of.symbols_by_name.deinit(gpa); + } + mf.ofiles.deinit(gpa); + gpa.free(mf.symbols); + posix.munmap(mf.mapped_memory); +} + +pub fn load(gpa: Allocator, path: []const u8, arch: std.Target.Cpu.Arch) Error!MachOFile { + switch (arch) { + .x86_64, .aarch64 => {}, + else => unreachable, + } + + const all_mapped_memory = try mapDebugInfoFile(path); + errdefer posix.munmap(all_mapped_memory); + + // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal + // binary": a simple file format which contains Mach-O binaries for multiple targets. For + // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images + // for both ARM64 macOS and x86_64 macOS. + if (all_mapped_memory.len < 4) return error.InvalidMachO; + const magic = std.mem.readInt(u32, all_mapped_memory.ptr[0..4], .little); + + // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. + const mapped_macho = switch (magic) { + macho.MH_MAGIC_64 => all_mapped_memory, + + macho.FAT_CIGAM => mapped_macho: { + // This is the universal binary format (aka a "fat binary"). + var fat_r: Io.Reader = .fixed(all_mapped_memory); + const hdr = fat_r.takeStruct(macho.fat_header, .big) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + const want_cpu_type = switch (arch) { + .x86_64 => macho.CPU_TYPE_X86_64, + .aarch64 => macho.CPU_TYPE_ARM64, + else => unreachable, + }; + for (0..hdr.nfat_arch) |_| { + const fat_arch = fat_r.takeStruct(macho.fat_arch, .big) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + if (fat_arch.cputype != want_cpu_type) continue; + if (fat_arch.offset + fat_arch.size > all_mapped_memory.len) return error.InvalidMachO; + break :mapped_macho all_mapped_memory[fat_arch.offset..][0..fat_arch.size]; + } + // `arch` was not present in the fat binary. + return error.MissingDebugInfo; + }, + + // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It + // will be fairly easy to add support here if necessary; it's very similar to above. + macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, + + else => return error.InvalidMachO, + }; + + var r: Io.Reader = .fixed(mapped_macho); + const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidMachO; + + const symtab: macho.symtab_command, const text_vmaddr: u64 = lcs: { + var it: macho.LoadCommandIterator = try .init(&hdr, mapped_macho[@sizeOf(macho.mach_header_64)..]); + var symtab: ?macho.symtab_command = null; + var text_vmaddr: ?u64 = null; + while (try it.next()) |cmd| switch (cmd.hdr.cmd) { + .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidMachO, + .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { + if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; + text_vmaddr = seg_cmd.vmaddr; + }, + else => {}, + }; + break :lcs .{ + symtab orelse return error.MissingDebugInfo, + text_vmaddr orelse return error.MissingDebugInfo, + }; + }; + + const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; + + var symbols: std.ArrayList(Symbol) = try .initCapacity(gpa, symtab.nsyms); + defer symbols.deinit(gpa); + + // This map is temporary; it is used only to detect duplicates here. This is + // necessary because we prefer to use STAB ("symbolic debugging table") symbols, + // but they might not be present, so we track normal symbols too. + // Indices match 1-1 with those of `symbols`. + var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; + defer symbol_names.deinit(gpa); + try symbol_names.ensureUnusedCapacity(gpa, symtab.nsyms); + + var ofile: u32 = undefined; + var last_sym: Symbol = undefined; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + var sym_r: Io.Reader = .fixed(mapped_macho[symtab.symoff..]); + for (0..symtab.nsyms) |_| { + const sym = sym_r.takeStruct(macho.nlist_64, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + if (sym.n_type.bits.is_stab == 0) { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf, .pbud, .indr, .abs, _ => continue, + .sect => { + const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(.{ + .strx = sym.n_strx, + .addr = sym.n_value, + .ofile = Symbol.unknown_ofile, + }); + } + }, + } + continue; + } + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidMachO, + }, + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .ofile = ofile, + }; + }, + else => return error.InvalidMachO, + }, + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + }, + else => return error.InvalidMachO, + }, + .ensym => switch (state) { + .fun_size => { + state = .ensym; + if (last_sym.strx != 0) { + const name = std.mem.sliceTo(strings[last_sym.strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(last_sym); + } else { + symbols.items[gop.index] = last_sym; + } + } + }, + else => return error.InvalidMachO, + }, + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidMachO, + }, + else => {}, + } + } + + switch (state) { + .init => { + // Missing STAB symtab entries is still okay, unless there were also no normal symbols. + if (symbols.items.len == 0) return error.MissingDebugInfo; + }, + .oso_close => {}, + else => return error.InvalidMachO, // corrupted STAB entries in symtab + } + + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(Symbol, symbols_slice, {}, Symbol.addressLessThan); + + return .{ + .mapped_memory = all_mapped_memory, + .symbols = symbols_slice, + .strings = strings, + .ofiles = .empty, + .text_vmaddr = text_vmaddr, + }; +} +pub fn getDwarfForAddress(mf: *MachOFile, gpa: Allocator, vaddr: u64) !struct { *Dwarf, u64 } { + const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo; + + if (symbol.ofile == Symbol.unknown_ofile) return error.MissingDebugInfo; + + // offset of `address` from start of `symbol` + const address_symbol_offset = vaddr - symbol.addr; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(mf.strings[symbol.strx..], 0); + + const gop = try mf.ofiles.getOrPut(gpa, symbol.ofile); + if (!gop.found_existing) { + const name = mem.sliceTo(mf.strings[symbol.ofile..], 0); + gop.value_ptr.* = loadOFile(gpa, name); + } + const of = &(gop.value_ptr.* catch |err| return err); + + const symbol_index = of.symbols_by_name.getKeyAdapted( + @as([]const u8, stab_symbol), + @as(OFile.SymbolAdapter, .{ .strtab = of.strtab, .symtab_raw = of.symtab_raw }), + ) orelse return error.MissingDebugInfo; + + const symbol_ofile_vaddr = vaddr: { + var sym = of.symtab_raw[symbol_index]; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym); + break :vaddr sym.n_value; + }; + + return .{ &of.dwarf, symbol_ofile_vaddr + address_symbol_offset }; +} +pub fn lookupSymbolName(mf: *MachOFile, vaddr: u64) error{MissingDebugInfo}![]const u8 { + const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo; + return mem.sliceTo(mf.strings[symbol.strx..], 0); +} + +const OFile = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + dwarf: Dwarf, + strtab: []const u8, + symtab_raw: []align(1) const macho.nlist_64, + /// All named symbols in `symtab_raw`. Stored `u32` key is the index into `symtab_raw`. Accessed + /// through `SymbolAdapter`, so that the symbol name is used as the logical key. + symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), + + const SymbolAdapter = struct { + strtab: []const u8, + symtab_raw: []align(1) const macho.nlist_64, + pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { + _ = ctx; + return @truncate(std.hash.Wyhash.hash(0, sym_name)); + } + pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { + _ = b_index; + var b_sym = ctx.symtab_raw[b_sym_index]; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &b_sym); + const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); + return mem.eql(u8, a_sym_name, b_sym_name); + } + }; +}; + +const Symbol = struct { + strx: u32, + addr: u64, + /// Value may be `unknown_ofile`. + ofile: u32, + const unknown_ofile = std.math.maxInt(u32); + fn addressLessThan(context: void, lhs: Symbol, rhs: Symbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } + /// Assumes that `symbols` is sorted in order of ascending `addr`. + fn find(symbols: []const Symbol, address: usize) ?*const Symbol { + if (symbols.len == 0) return null; // no potential match + if (address < symbols[0].addr) return null; // address is before the lowest-address symbol + var left: usize = 0; + var len: usize = symbols.len; + while (len > 1) { + const mid = left + len / 2; + if (address < symbols[mid].addr) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + return &symbols[left]; + } + + test find { + const symbols: []const Symbol = &.{ + .{ .addr = 100, .strx = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, find(symbols, 0)); + try testing.expectEqual(null, find(symbols, 99)); + try testing.expectEqual(&symbols[0], find(symbols, 100).?); + try testing.expectEqual(&symbols[0], find(symbols, 150).?); + try testing.expectEqual(&symbols[0], find(symbols, 199).?); + + try testing.expectEqual(&symbols[1], find(symbols, 200).?); + try testing.expectEqual(&symbols[1], find(symbols, 250).?); + try testing.expectEqual(&symbols[1], find(symbols, 299).?); + + try testing.expectEqual(&symbols[2], find(symbols, 300).?); + try testing.expectEqual(&symbols[2], find(symbols, 301).?); + try testing.expectEqual(&symbols[2], find(symbols, 5000).?); + } +}; +test { + _ = Symbol; +} + +fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { + const mapped_mem = try mapDebugInfoFile(o_file_path); + errdefer posix.munmap(mapped_mem); + + var r: Io.Reader = .fixed(mapped_mem); + const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidMachO; + + const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { + var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtab_cmd: ?macho.symtab_command = null; + var it: macho.LoadCommandIterator = try .init(&hdr, mapped_mem[@sizeOf(macho.mach_header_64)..]); + while (try it.next()) |lc| switch (lc.hdr.cmd) { + .SEGMENT_64 => seg_cmd = lc, + .SYMTAB => symtab_cmd = lc.cast(macho.symtab_command) orelse return error.InvalidMachO, + else => {}, + }; + break :cmds .{ + seg_cmd orelse return error.MissingDebugInfo, + symtab_cmd orelse return error.MissingDebugInfo, + }; + }; + + if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidMachO; + if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidMachO; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + + const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); + if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidMachO; + const symtab_raw: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); + + // TODO handle tentative (common) symbols + var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; + defer symbols_by_name.deinit(gpa); + try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab_raw.len)); + for (symtab_raw, 0..) |sym_raw, sym_index| { + var sym = sym_raw; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym); + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf => continue, // includes tentative symbols + .abs => continue, + else => {}, + } + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( + @as([]const u8, sym_name), + @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab_raw = symtab_raw }), + ); + if (gop.found_existing) return error.InvalidMachO; + gop.key_ptr.* = @intCast(sym_index); + } + + var sections: Dwarf.SectionArray = @splat(null); + for (seg_cmd.getSections()) |sect_raw| { + var sect = sect_raw; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.section_64, §); + + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; + } else continue; + + if (mapped_mem.len < sect.offset + sect.size) return error.InvalidMachO; + const section_bytes = mapped_mem[sect.offset..][0..sect.size]; + sections[section_index] = .{ + .data = section_bytes, + .owned = false, + }; + } + + if (sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null) + { + return error.MissingDebugInfo; + } + + var dwarf: Dwarf = .{ .sections = sections }; + errdefer dwarf.deinit(gpa); + dwarf.open(gpa, .little) catch |err| switch (err) { + error.InvalidDebugInfo, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDwarf, + + error.MissingDebugInfo, + error.ReadFailed, + error.OutOfMemory, + => |e| return e, + }; + + return .{ + .mapped_memory = mapped_mem, + .dwarf = dwarf, + .strtab = strtab, + .symtab_raw = symtab_raw, + .symbols_by_name = symbols_by_name.move(), + }; +} + +/// Uses `mmap` to map the file at `path` into memory. +fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { + const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return error.ReadFailed, + }; + defer file.close(); + + const file_len = std.math.cast( + usize, + file.getEndPos() catch return error.ReadFailed, + ) orelse return error.ReadFailed; + + return posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ) catch return error.ReadFailed; +} + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Io = std.Io; +const assert = std.debug.assert; +const posix = std.posix; +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; + +const builtin = @import("builtin"); + +const MachOFile = @This(); diff --git a/lib/std/debug/SelfInfo/MachO.zig b/lib/std/debug/SelfInfo/MachO.zig index f7eb4465c5..83adb6dcd4 100644 --- a/lib/std/debug/SelfInfo/MachO.zig +++ b/lib/std/debug/SelfInfo/MachO.zig @@ -1,12 +1,10 @@ mutex: std.Thread.Mutex, /// Accessed through `Module.Adapter`. modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false), -ofiles: std.StringArrayHashMapUnmanaged(?OFile), pub const init: SelfInfo = .{ .mutex = .{}, .modules = .empty, - .ofiles = .empty, }; pub fn deinit(si: *SelfInfo, gpa: Allocator) void { for (si.modules.keys()) |*module| { @@ -14,20 +12,12 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void { const u = &(module.unwind orelse break :unwind catch break :unwind); if (u.dwarf) |*dwarf| dwarf.deinit(gpa); } - loaded: { - const l = &(module.loaded_macho orelse break :loaded catch break :loaded); - gpa.free(l.symbols); - posix.munmap(l.mapped_memory); + file: { + const f = &(module.file orelse break :file catch break :file); + f.deinit(gpa); } } - for (si.ofiles.values()) |*opt_ofile| { - const ofile = &(opt_ofile.* orelse continue); - ofile.dwarf.deinit(gpa); - ofile.symbols_by_name.deinit(gpa); - posix.munmap(ofile.mapped_memory); - } si.modules.deinit(gpa); - si.ofiles.deinit(gpa); } pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol { @@ -35,67 +25,55 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st const module = try si.findModule(gpa, address); defer si.mutex.unlock(); - const loaded_macho = try module.getLoadedMachO(gpa); + const file = try module.getFile(gpa); - const vaddr = address - loaded_macho.vaddr_offset; - const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; + // This is not necessarily the same as the vmaddr_slide that dyld would report. This is + // because the segments in the file on disk might differ from the ones in memory. Normally + // we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: + // it exists on disk (necessarily, because the kernel needs to load it!), but is also in + // the dyld cache (dyld actually restart itself from cache after loading it), and the two + // versions have (very) different segment base addresses. It's sort of like a large slide + // has been applied to all addresses in memory. For an optimal experience, we consider the + // on-disk vmaddr instead of the in-memory one. + const vaddr_offset = module.text_base - file.text_vmaddr; - // offset of `address` from start of `symbol` - const address_symbol_offset = vaddr - symbol.addr; + const vaddr = address - vaddr_offset; - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); - - // If any information is missing, we can at least return this from now on. - const sym_only_result: std.debug.Symbol = .{ - .name = stab_symbol, - .compile_unit_name = null, - .source_location = null, + const ofile_dwarf, const ofile_vaddr = file.getDwarfForAddress(gpa, vaddr) catch { + // Return at least the symbol name if available. + return .{ + .name = try file.lookupSymbolName(vaddr), + .compile_unit_name = null, + .source_location = null, + }; }; - if (symbol.ofile == MachoSymbol.unknown_ofile) { - // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. - return sym_only_result; - } - - const o_file: *OFile = of: { - const path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); - const gop = try si.ofiles.getOrPut(gpa, path); - if (!gop.found_existing) { - gop.value_ptr.* = loadOFile(gpa, path) catch null; - } - if (gop.value_ptr.*) |*o_file| { - break :of o_file; - } else { - return sym_only_result; - } + const compile_unit = ofile_dwarf.findCompileUnit(native_endian, ofile_vaddr) catch { + // Return at least the symbol name if available. + return .{ + .name = try file.lookupSymbolName(vaddr), + .compile_unit_name = null, + .source_location = null, + }; }; - const symbol_index = o_file.symbols_by_name.getKeyAdapted( - @as([]const u8, stab_symbol), - @as(OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }), - ) orelse return sym_only_result; - const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; - - const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; - return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, + .name = ofile_dwarf.getSymbolName(ofile_vaddr) orelse + try file.lookupSymbolName(vaddr), .compile_unit_name = compile_unit.die.getAttrString( - &o_file.dwarf, + ofile_dwarf, native_endian, std.dwarf.AT.name, - o_file.dwarf.section(.debug_str), + ofile_dwarf.section(.debug_str), compile_unit, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, }, - .source_location = o_file.dwarf.getLineNumberInfo( + .source_location = ofile_dwarf.getLineNumberInfo( gpa, native_endian, compile_unit, - symbol_ofile_vaddr + address_symbol_offset, + ofile_vaddr, ) catch null, }; } @@ -447,7 +425,7 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module { .text_base = @intFromPtr(info.fbase), .name = std.mem.span(info.fname), .unwind = null, - .loaded_macho = null, + .file = null, }; } return gop.key_ptr; @@ -457,7 +435,7 @@ const Module = struct { text_base: usize, name: []const u8, unwind: ?(Error!Unwind), - loaded_macho: ?(Error!LoadedMachO), + file: ?(Error!MachOFile), const Adapter = struct { pub fn hash(_: Adapter, text_base: usize) u32 { @@ -488,34 +466,17 @@ const Module = struct { dwarf: ?Dwarf.Unwind, }; - const LoadedMachO = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: []const u8, - /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is - /// because the segments in the file on disk might differ from the ones in memory. Normally - /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: - /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in - /// the dyld cache (dyld actually restart itself from cache after loading it), and the two - /// versions have (very) different segment base addresses. It's sort of like a large slide - /// has been applied to all addresses in memory. For an optimal experience, we consider the - /// on-disk vmaddr instead of the in-memory one. - vaddr_offset: usize, - }; - fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind { if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa); return if (module.unwind.?) |*unwind| unwind else |err| err; } fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind { - const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base); - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const sections, const text_vmaddr = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; + const raw_macho: [*]u8 = @ptrCast(header); + var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable; + const sections, const text_vmaddr = while (it.next() catch unreachable) |load_cmd| { + if (load_cmd.hdr.cmd != .SEGMENT_64) continue; const segment_cmd = load_cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; break .{ load_cmd.getSections(), segment_cmd.vmaddr }; @@ -568,235 +529,13 @@ const Module = struct { }; } - fn getLoadedMachO(module: *Module, gpa: Allocator) Error!*LoadedMachO { - if (module.loaded_macho == null) module.loaded_macho = loadMachO(module, gpa) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| e, - else => error.ReadFailed, + fn getFile(module: *Module, gpa: Allocator) Error!*MachOFile { + if (module.file == null) module.file = MachOFile.load(gpa, module.name, builtin.cpu.arch) catch |err| switch (err) { + error.InvalidMachO, error.InvalidDwarf => error.InvalidDebugInfo, + error.MissingDebugInfo, error.OutOfMemory, error.UnsupportedDebugInfo, error.ReadFailed => |e| e, }; - return if (module.loaded_macho.?) |*lm| lm else |err| err; + return if (module.file.?) |*f| f else |err| err; } - fn loadMachO(module: *const Module, gpa: Allocator) Error!LoadedMachO { - const all_mapped_memory = try mapDebugInfoFile(module.name); - errdefer posix.munmap(all_mapped_memory); - - // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal - // binary": a simple file format which contains Mach-O binaries for multiple targets. For - // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images - // for both ARM64 macOS and x86_64 macOS. - if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; - const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; - // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. - const mapped_macho = switch (magic) { - macho.MH_MAGIC_64 => all_mapped_memory, - - macho.FAT_CIGAM => mapped_macho: { - // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing - // is big-endian, so we'll be swapping some bytes. - if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; - const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); - const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); - const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; - const native_cpu_type = switch (builtin.cpu.arch) { - .x86_64 => macho.CPU_TYPE_X86_64, - .aarch64 => macho.CPU_TYPE_ARM64, - else => comptime unreachable, - }; - for (archs) |*arch| { - if (@byteSwap(arch.cputype) != native_cpu_type) continue; - const offset = @byteSwap(arch.offset); - const size = @byteSwap(arch.size); - break :mapped_macho all_mapped_memory[offset..][0..size]; - } - // Our native architecture was not present in the fat binary. - return error.MissingDebugInfo; - }, - - // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It - // will be fairly easy to add support here if necessary; it's very similar to above. - macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, - - else => return error.InvalidDebugInfo, - }; - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - var symtab: ?macho.symtab_command = null; - var text_vmaddr: ?u64 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { - if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; - text_vmaddr = seg_cmd.vmaddr; - }, - else => {}, - }; - break :lc_iter .{ - symtab orelse return error.MissingDebugInfo, - text_vmaddr orelse return error.MissingDebugInfo, - }; - }; - - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); - const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; - - var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); - defer symbols.deinit(gpa); - - // This map is temporary; it is used only to detect duplicates here. This is - // necessary because we prefer to use STAB ("symbolic debugging table") symbols, - // but they might not be present, so we track normal symbols too. - // Indices match 1-1 with those of `symbols`. - var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; - defer symbol_names.deinit(gpa); - try symbol_names.ensureUnusedCapacity(gpa, syms.len); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf, .pbud, .indr, .abs, _ => continue, - .sect => { - const name = std.mem.sliceTo(strings[sym.n_strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(.{ - .strx = sym.n_strx, - .addr = sym.n_value, - .ofile = MachoSymbol.unknown_ofile, - }); - } - }, - } - continue; - } - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type.stab) { - .oso => switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - }, - .bnsym => switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - }, - .fun => switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - }, - else => return error.InvalidDebugInfo, - }, - .ensym => switch (state) { - .fun_size => { - state = .ensym; - if (last_sym.strx != 0) { - const name = std.mem.sliceTo(strings[last_sym.strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(last_sym); - } else { - symbols.items[gop.index] = last_sym; - } - } - }, - else => return error.InvalidDebugInfo, - }, - .so => switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - }, - else => {}, - } - } - - switch (state) { - .init => { - // Missing STAB symtab entries is still okay, unless there were also no normal symbols. - if (symbols.items.len == 0) return error.MissingDebugInfo; - }, - .oso_close => {}, - else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab - } - - const symbols_slice = try symbols.toOwnedSlice(gpa); - errdefer gpa.free(symbols_slice); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - - return .{ - .mapped_memory = all_mapped_memory, - .symbols = symbols_slice, - .strings = strings, - .vaddr_offset = module.text_base - text_vmaddr, - }; - } -}; - -const OFile = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - dwarf: Dwarf, - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed - /// through `SymbolAdapter`, so that the symbol name is used as the logical key. - symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), - - const SymbolAdapter = struct { - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { - _ = ctx; - return @truncate(std.hash.Wyhash.hash(0, sym_name)); - } - pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { - _ = b_index; - const b_sym = ctx.symtab[b_sym_index]; - const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); - return mem.eql(u8, a_sym_name, b_sym_name); - } - }; }; const MachoSymbol = struct { @@ -880,101 +619,12 @@ fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 }; } -fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapDebugInfoFile(o_file_path); - errdefer posix.munmap(mapped_mem); - - if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - - const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { - var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtab_cmd: ?macho.symtab_command = null; - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => seg_cmd = cmd, - .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - break :cmds .{ - seg_cmd orelse return error.MissingDebugInfo, - symtab_cmd orelse return error.MissingDebugInfo, - }; - }; - - if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; - if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; - - const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); - if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; - const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); - - // TODO handle tentative (common) symbols - var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; - defer symbols_by_name.deinit(gpa); - try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len)); - for (symtab, 0..) |sym, sym_index| { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf => continue, // includes tentative symbols - .abs => continue, - else => {}, - } - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( - @as([]const u8, sym_name), - @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }), - ); - if (gop.found_existing) return error.InvalidDebugInfo; - gop.key_ptr.* = @intCast(sym_index); - } - - var sections: Dwarf.SectionArray = @splat(null); - for (seg_cmd.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; - } else continue; - - if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; - const section_bytes = mapped_mem[sect.offset..][0..sect.size]; - sections[section_index] = .{ - .data = section_bytes, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var dwarf: Dwarf = .{ .sections = sections }; - errdefer dwarf.deinit(gpa); - try dwarf.open(gpa, native_endian); - - return .{ - .mapped_memory = mapped_mem, - .dwarf = dwarf, - .strtab = strtab, - .symtab = symtab, - .symbols_by_name = symbols_by_name.move(), - }; -} - const std = @import("std"); const Io = std.Io; const Allocator = std.mem.Allocator; const Dwarf = std.debug.Dwarf; const Error = std.debug.SelfInfoError; +const MachOFile = std.debug.MachOFile; const assert = std.debug.assert; const posix = std.posix; const macho = std.macho; diff --git a/lib/std/macho.zig b/lib/std/macho.zig index d541e2d13e..7b8894e981 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -1902,74 +1902,76 @@ pub const data_in_code_entry = extern struct { }; pub const LoadCommandIterator = struct { + next_index: usize, ncmds: usize, - buffer: []const u8, - index: usize = 0, + r: std.Io.Reader, pub const LoadCommand = struct { hdr: load_command, data: []const u8, - pub fn cmd(lc: LoadCommand) LC { - return lc.hdr.cmd; - } - - pub fn cmdsize(lc: LoadCommand) u32 { - return lc.hdr.cmdsize; - } - pub fn cast(lc: LoadCommand, comptime Cmd: type) ?Cmd { if (lc.data.len < @sizeOf(Cmd)) return null; - return @as(*align(1) const Cmd, @ptrCast(lc.data.ptr)).*; + const ptr: *align(1) const Cmd = @ptrCast(lc.data.ptr); + var cmd = ptr.*; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(Cmd, &cmd); + return cmd; } /// Asserts LoadCommand is of type segment_command_64. + /// If the native endian is not `.little`, the `section_64` values must be byte-swapped by the caller. pub fn getSections(lc: LoadCommand) []align(1) const section_64 { const segment_lc = lc.cast(segment_command_64).?; - if (segment_lc.nsects == 0) return &[0]section_64{}; - const data = lc.data[@sizeOf(segment_command_64)..]; - const sections = @as([*]align(1) const section_64, @ptrCast(data.ptr))[0..segment_lc.nsects]; - return sections; + const sects_ptr: [*]align(1) const section_64 = @ptrCast(lc.data[@sizeOf(segment_command_64)..]); + return sects_ptr[0..segment_lc.nsects]; } /// Asserts LoadCommand is of type dylib_command. pub fn getDylibPathName(lc: LoadCommand) []const u8 { const dylib_lc = lc.cast(dylib_command).?; - const data = lc.data[dylib_lc.dylib.name..]; - return mem.sliceTo(data, 0); + return mem.sliceTo(lc.data[dylib_lc.dylib.name..], 0); } /// Asserts LoadCommand is of type rpath_command. pub fn getRpathPathName(lc: LoadCommand) []const u8 { const rpath_lc = lc.cast(rpath_command).?; - const data = lc.data[rpath_lc.path..]; - return mem.sliceTo(data, 0); + return mem.sliceTo(lc.data[rpath_lc.path..], 0); } /// Asserts LoadCommand is of type build_version_command. + /// If the native endian is not `.little`, the `build_tool_version` values must be byte-swapped by the caller. pub fn getBuildVersionTools(lc: LoadCommand) []align(1) const build_tool_version { const build_lc = lc.cast(build_version_command).?; - const ntools = build_lc.ntools; - if (ntools == 0) return &[0]build_tool_version{}; - const data = lc.data[@sizeOf(build_version_command)..]; - const tools = @as([*]align(1) const build_tool_version, @ptrCast(data.ptr))[0..ntools]; - return tools; + const tools_ptr: [*]align(1) const build_tool_version = @ptrCast(lc.data[@sizeOf(build_version_command)..]); + return tools_ptr[0..build_lc.ntools]; } }; - pub fn next(it: *LoadCommandIterator) ?LoadCommand { - if (it.index >= it.ncmds) return null; + pub fn next(it: *LoadCommandIterator) error{InvalidMachO}!?LoadCommand { + if (it.next_index >= it.ncmds) return null; - const hdr = @as(*align(1) const load_command, @ptrCast(it.buffer.ptr)).*; - const cmd = LoadCommand{ - .hdr = hdr, - .data = it.buffer[0..hdr.cmdsize], + const hdr = it.r.peekStruct(load_command, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + const data = it.r.take(hdr.cmdsize) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, }; - it.buffer = it.buffer[hdr.cmdsize..]; - it.index += 1; + it.next_index += 1; + return .{ .hdr = hdr, .data = data }; + } - return cmd; + pub fn init(hdr: *const mach_header_64, cmds_buf_overlong: []const u8) error{InvalidMachO}!LoadCommandIterator { + if (cmds_buf_overlong.len < hdr.sizeofcmds) return error.InvalidMachO; + if (hdr.ncmds > 0 and hdr.sizeofcmds < @sizeOf(load_command)) return error.InvalidMachO; + const cmds_buf = cmds_buf_overlong[0..hdr.sizeofcmds]; + return .{ + .next_index = 0, + .ncmds = hdr.ncmds, + .r = .fixed(cmds_buf), + }; } }; diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8a3ee07315..7c6708983c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4167,7 +4167,7 @@ pub const Platform = struct { /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to /// the extracted minimum platform version. pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform { - switch (lc.cmd()) { + switch (lc.hdr.cmd) { .BUILD_VERSION => { const cmd = lc.cast(macho.build_version_command).?; return .{ @@ -4200,7 +4200,7 @@ pub const Platform = struct { // We can't distinguish Mac Catalyst here, but this is legacy stuff anyway. const cmd = lc.cast(macho.version_min_command).?; return .{ - .os_tag = switch (lc.cmd()) { + .os_tag = switch (lc.hdr.cmd) { .VERSION_MIN_IPHONEOS => .ios, .VERSION_MIN_MACOSX => .macos, .VERSION_MIN_TVOS => .tvos, diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index c78d52f815..64817ac433 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -90,11 +90,8 @@ fn parseBinary(self: *Dylib, macho_file: *MachO) !void { if (amt != lc_buffer.len) return error.InputOutput; } - var it = LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = lc_buffer, - }; - while (it.next()) |cmd| switch (cmd.cmd()) { + var it = LoadCommandIterator.init(&header, lc_buffer) catch |err| std.debug.panic("bad dylib: {t}", .{err}); + while (it.next() catch |err| std.debug.panic("bad dylib: {t}", .{err})) |cmd| switch (cmd.hdr.cmd) { .ID_DYLIB => { self.id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName()); }, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 7cec09ba91..5f28d3dfda 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -109,11 +109,8 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { if (amt != self.header.?.sizeofcmds) return error.InputOutput; } - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = lc_buffer, - }; - while (it.next()) |lc| switch (lc.cmd()) { + var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err}); + while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) { .SEGMENT_64 => { const sections = lc.getSections(); try self.sections.ensureUnusedCapacity(gpa, sections.len); @@ -1644,11 +1641,8 @@ pub fn parseAr(self: *Object, macho_file: *MachO) !void { if (amt != self.header.?.sizeofcmds) return error.InputOutput; } - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = lc_buffer, - }; - while (it.next()) |lc| switch (lc.cmd()) { + var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err}); + while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) { .SYMTAB => { const cmd = lc.cast(macho.symtab_command).?; try self.strtab.resize(gpa, cmd.strsize);