mapped_memory: []align(std.heap.page_size_min) const u8, symbols: []const Symbol, strings: []const u8, text_vmaddr: u64, /// Key is index into `strings` of the file path. ofiles: std.AutoArrayHashMapUnmanaged(u32, Error!OFile), pub const Error = error{ InvalidMachO, InvalidDwarf, MissingDebugInfo, UnsupportedDebugInfo, ReadFailed, OutOfMemory, }; pub fn deinit(mf: *MachOFile, gpa: Allocator) void { for (mf.ofiles.values()) |*maybe_of| { const of = &(maybe_of.* catch continue); posix.munmap(of.mapped_memory); of.dwarf.deinit(gpa); of.symbols_by_name.deinit(gpa); } mf.ofiles.deinit(gpa); gpa.free(mf.symbols); posix.munmap(mf.mapped_memory); } pub fn load(gpa: Allocator, io: Io, path: []const u8, arch: std.Target.Cpu.Arch) Error!MachOFile { switch (arch) { .x86_64, .aarch64 => {}, else => unreachable, } const all_mapped_memory = try mapDebugInfoFile(io, path); errdefer posix.munmap(all_mapped_memory); // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal // binary": a simple file format which contains Mach-O binaries for multiple targets. For // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images // for both ARM64 macOS and x86_64 macOS. if (all_mapped_memory.len < 4) return error.InvalidMachO; const magic = std.mem.readInt(u32, all_mapped_memory.ptr[0..4], .little); // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. const mapped_macho = switch (magic) { macho.MH_MAGIC_64 => all_mapped_memory, macho.FAT_CIGAM => mapped_macho: { // This is the universal binary format (aka a "fat binary"). var fat_r: Io.Reader = .fixed(all_mapped_memory); const hdr = fat_r.takeStruct(macho.fat_header, .big) catch |err| switch (err) { error.ReadFailed => unreachable, error.EndOfStream => return error.InvalidMachO, }; const want_cpu_type = switch (arch) { .x86_64 => macho.CPU_TYPE_X86_64, .aarch64 => macho.CPU_TYPE_ARM64, else => unreachable, }; for (0..hdr.nfat_arch) |_| { const fat_arch = fat_r.takeStruct(macho.fat_arch, .big) catch |err| switch (err) { error.ReadFailed => unreachable, error.EndOfStream => return error.InvalidMachO, }; if (fat_arch.cputype != want_cpu_type) continue; if (fat_arch.offset + fat_arch.size > all_mapped_memory.len) return error.InvalidMachO; break :mapped_macho all_mapped_memory[fat_arch.offset..][0..fat_arch.size]; } // `arch` was not present in the fat binary. return error.MissingDebugInfo; }, // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It // will be fairly easy to add support here if necessary; it's very similar to above. macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, else => return error.InvalidMachO, }; var r: Io.Reader = .fixed(mapped_macho); const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) { error.ReadFailed => unreachable, error.EndOfStream => return error.InvalidMachO, }; if (hdr.magic != macho.MH_MAGIC_64) return error.InvalidMachO; const symtab: macho.symtab_command, const text_vmaddr: u64 = lcs: { var it: macho.LoadCommandIterator = try .init(&hdr, mapped_macho[@sizeOf(macho.mach_header_64)..]); var symtab: ?macho.symtab_command = null; var text_vmaddr: ?u64 = null; while (try it.next()) |cmd| switch (cmd.hdr.cmd) { .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidMachO, .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; text_vmaddr = seg_cmd.vmaddr; }, else => {}, }; break :lcs .{ symtab orelse return error.MissingDebugInfo, text_vmaddr orelse return error.MissingDebugInfo, }; }; const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; var symbols: std.ArrayList(Symbol) = try .initCapacity(gpa, symtab.nsyms); defer symbols.deinit(gpa); // This map is temporary; it is used only to detect duplicates here. This is // necessary because we prefer to use STAB ("symbolic debugging table") symbols, // but they might not be present, so we track normal symbols too. // Indices match 1-1 with those of `symbols`. var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; defer symbol_names.deinit(gpa); try symbol_names.ensureUnusedCapacity(gpa, symtab.nsyms); var ofile: u32 = undefined; var last_sym: Symbol = undefined; var state: enum { init, oso_open, oso_close, bnsym, fun_strx, fun_size, ensym, } = .init; var sym_r: Io.Reader = .fixed(mapped_macho[symtab.symoff..]); for (0..symtab.nsyms) |_| { const sym = sym_r.takeStruct(macho.nlist_64, .little) catch |err| switch (err) { error.ReadFailed => unreachable, error.EndOfStream => return error.InvalidMachO, }; if (sym.n_type.bits.is_stab == 0) { if (sym.n_strx == 0) continue; switch (sym.n_type.bits.type) { .undf, .pbud, .indr, .abs, _ => continue, .sect => { const name = std.mem.sliceTo(strings[sym.n_strx..], 0); const gop = symbol_names.getOrPutAssumeCapacity(name); if (!gop.found_existing) { assert(gop.index == symbols.items.len); symbols.appendAssumeCapacity(.{ .strx = sym.n_strx, .addr = sym.n_value, .ofile = Symbol.unknown_ofile, }); } }, } continue; } // TODO handle globals N_GSYM, and statics N_STSYM switch (sym.n_type.stab) { .oso => switch (state) { .init, .oso_close => { state = .oso_open; ofile = sym.n_strx; }, else => return error.InvalidMachO, }, .bnsym => switch (state) { .oso_open, .ensym => { state = .bnsym; last_sym = .{ .strx = 0, .addr = sym.n_value, .ofile = ofile, }; }, else => return error.InvalidMachO, }, .fun => switch (state) { .bnsym => { state = .fun_strx; last_sym.strx = sym.n_strx; }, .fun_strx => { state = .fun_size; }, else => return error.InvalidMachO, }, .ensym => switch (state) { .fun_size => { state = .ensym; if (last_sym.strx != 0) { const name = std.mem.sliceTo(strings[last_sym.strx..], 0); const gop = symbol_names.getOrPutAssumeCapacity(name); if (!gop.found_existing) { assert(gop.index == symbols.items.len); symbols.appendAssumeCapacity(last_sym); } else { symbols.items[gop.index] = last_sym; } } }, else => return error.InvalidMachO, }, .so => switch (state) { .init, .oso_close => {}, .oso_open, .ensym => { state = .oso_close; }, else => return error.InvalidMachO, }, else => {}, } } switch (state) { .init => { // Missing STAB symtab entries is still okay, unless there were also no normal symbols. if (symbols.items.len == 0) return error.MissingDebugInfo; }, .oso_close => {}, else => return error.InvalidMachO, // corrupted STAB entries in symtab } const symbols_slice = try symbols.toOwnedSlice(gpa); errdefer gpa.free(symbols_slice); // Even though lld emits symbols in ascending order, this debug code // should work for programs linked in any valid way. // This sort is so that we can binary search later. mem.sort(Symbol, symbols_slice, {}, Symbol.addressLessThan); return .{ .mapped_memory = all_mapped_memory, .symbols = symbols_slice, .strings = strings, .ofiles = .empty, .text_vmaddr = text_vmaddr, }; } pub fn getDwarfForAddress(mf: *MachOFile, gpa: Allocator, io: Io, vaddr: u64) !struct { *Dwarf, u64 } { const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo; if (symbol.ofile == Symbol.unknown_ofile) return error.MissingDebugInfo; // offset of `address` from start of `symbol` const address_symbol_offset = vaddr - symbol.addr; // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos const stab_symbol = mem.sliceTo(mf.strings[symbol.strx..], 0); const gop = try mf.ofiles.getOrPut(gpa, symbol.ofile); if (!gop.found_existing) { const name = mem.sliceTo(mf.strings[symbol.ofile..], 0); gop.value_ptr.* = loadOFile(gpa, io, name); } const of = &(gop.value_ptr.* catch |err| return err); const symbol_index = of.symbols_by_name.getKeyAdapted( @as([]const u8, stab_symbol), @as(OFile.SymbolAdapter, .{ .strtab = of.strtab, .symtab_raw = of.symtab_raw }), ) orelse return error.MissingDebugInfo; const symbol_ofile_vaddr = vaddr: { var sym = of.symtab_raw[symbol_index]; if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym); break :vaddr sym.n_value; }; return .{ &of.dwarf, symbol_ofile_vaddr + address_symbol_offset }; } pub fn lookupSymbolName(mf: *MachOFile, vaddr: u64) error{MissingDebugInfo}![]const u8 { const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo; return mem.sliceTo(mf.strings[symbol.strx..], 0); } const OFile = struct { mapped_memory: []align(std.heap.page_size_min) const u8, dwarf: Dwarf, strtab: []const u8, symtab_raw: []align(1) const macho.nlist_64, /// All named symbols in `symtab_raw`. Stored `u32` key is the index into `symtab_raw`. Accessed /// through `SymbolAdapter`, so that the symbol name is used as the logical key. symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), const SymbolAdapter = struct { strtab: []const u8, symtab_raw: []align(1) const macho.nlist_64, pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { _ = ctx; return @truncate(std.hash.Wyhash.hash(0, sym_name)); } pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { _ = b_index; var b_sym = ctx.symtab_raw[b_sym_index]; if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &b_sym); const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); return mem.eql(u8, a_sym_name, b_sym_name); } }; }; const Symbol = struct { strx: u32, addr: u64, /// Value may be `unknown_ofile`. ofile: u32, const unknown_ofile = std.math.maxInt(u32); fn addressLessThan(context: void, lhs: Symbol, rhs: Symbol) bool { _ = context; return lhs.addr < rhs.addr; } /// Assumes that `symbols` is sorted in order of ascending `addr`. fn find(symbols: []const Symbol, address: usize) ?*const Symbol { if (symbols.len == 0) return null; // no potential match if (address < symbols[0].addr) return null; // address is before the lowest-address symbol var left: usize = 0; var len: usize = symbols.len; while (len > 1) { const mid = left + len / 2; if (address < symbols[mid].addr) { len /= 2; } else { left = mid; len -= len / 2; } } return &symbols[left]; } test find { const symbols: []const Symbol = &.{ .{ .addr = 100, .strx = undefined, .ofile = undefined }, .{ .addr = 200, .strx = undefined, .ofile = undefined }, .{ .addr = 300, .strx = undefined, .ofile = undefined }, }; try testing.expectEqual(null, find(symbols, 0)); try testing.expectEqual(null, find(symbols, 99)); try testing.expectEqual(&symbols[0], find(symbols, 100).?); try testing.expectEqual(&symbols[0], find(symbols, 150).?); try testing.expectEqual(&symbols[0], find(symbols, 199).?); try testing.expectEqual(&symbols[1], find(symbols, 200).?); try testing.expectEqual(&symbols[1], find(symbols, 250).?); try testing.expectEqual(&symbols[1], find(symbols, 299).?); try testing.expectEqual(&symbols[2], find(symbols, 300).?); try testing.expectEqual(&symbols[2], find(symbols, 301).?); try testing.expectEqual(&symbols[2], find(symbols, 5000).?); } }; test { _ = Symbol; } fn loadOFile(gpa: Allocator, io: Io, o_file_name: []const u8) !OFile { const all_mapped_memory, const mapped_ofile = map: { const open_paren = paren: { if (std.mem.endsWith(u8, o_file_name, ")")) { if (std.mem.findScalarLast(u8, o_file_name, '(')) |i| { break :paren i; } } // Not an archive, just a normal path to a .o file const m = try mapDebugInfoFile(io, o_file_name); break :map .{ m, m }; }; // We have the form 'path/to/archive.a(entry.o)'. Map the archive and find the object file in question. const archive_path = o_file_name[0..open_paren]; const target_name_in_archive = o_file_name[open_paren + 1 .. o_file_name.len - 1]; const mapped_archive = try mapDebugInfoFile(io, archive_path); errdefer posix.munmap(mapped_archive); var ar_reader: Io.Reader = .fixed(mapped_archive); const ar_magic = ar_reader.take(8) catch return error.InvalidMachO; if (!std.mem.eql(u8, ar_magic, "!\n")) return error.InvalidMachO; while (true) { if (ar_reader.seek == ar_reader.buffer.len) return error.MissingDebugInfo; const raw_name = ar_reader.takeArray(16) catch return error.InvalidMachO; ar_reader.discardAll(12 + 6 + 6 + 8) catch return error.InvalidMachO; const raw_size = ar_reader.takeArray(10) catch return error.InvalidMachO; const file_magic = ar_reader.takeArray(2) catch return error.InvalidMachO; if (!std.mem.eql(u8, file_magic, "`\n")) return error.InvalidMachO; const size = std.fmt.parseInt(u32, mem.sliceTo(raw_size, ' '), 10) catch return error.InvalidMachO; const raw_data = ar_reader.take(size) catch return error.InvalidMachO; const entry_name: []const u8, const entry_contents: []const u8 = entry: { if (!std.mem.startsWith(u8, raw_name, "#1/")) { break :entry .{ mem.sliceTo(raw_name, '/'), raw_data }; } const len = std.fmt.parseInt(u32, mem.sliceTo(raw_name[3..], ' '), 10) catch return error.InvalidMachO; if (len > size) return error.InvalidMachO; break :entry .{ mem.sliceTo(raw_data[0..len], 0), raw_data[len..] }; }; if (std.mem.eql(u8, entry_name, target_name_in_archive)) { break :map .{ mapped_archive, entry_contents }; } } }; errdefer posix.munmap(all_mapped_memory); var r: Io.Reader = .fixed(mapped_ofile); const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) { error.ReadFailed => unreachable, error.EndOfStream => return error.InvalidMachO, }; if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidMachO; const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; var symtab_cmd: ?macho.symtab_command = null; var it: macho.LoadCommandIterator = try .init(&hdr, mapped_ofile[@sizeOf(macho.mach_header_64)..]); while (try it.next()) |lc| switch (lc.hdr.cmd) { .SEGMENT_64 => seg_cmd = lc, .SYMTAB => symtab_cmd = lc.cast(macho.symtab_command) orelse return error.InvalidMachO, else => {}, }; break :cmds .{ seg_cmd orelse return error.MissingDebugInfo, symtab_cmd orelse return error.MissingDebugInfo, }; }; if (mapped_ofile.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidMachO; if (mapped_ofile[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidMachO; const strtab = mapped_ofile[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); if (mapped_ofile.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidMachO; const symtab_raw: []align(1) const macho.nlist_64 = @ptrCast(mapped_ofile[symtab_cmd.symoff..][0..n_sym_bytes]); // TODO handle tentative (common) symbols var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; defer symbols_by_name.deinit(gpa); try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab_raw.len)); for (symtab_raw, 0..) |sym_raw, sym_index| { var sym = sym_raw; if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym); if (sym.n_strx == 0) continue; switch (sym.n_type.bits.type) { .undf => continue, // includes tentative symbols .abs => continue, else => {}, } const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( @as([]const u8, sym_name), @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab_raw = symtab_raw }), ); if (gop.found_existing) return error.InvalidMachO; gop.key_ptr.* = @intCast(sym_index); } var sections: Dwarf.SectionArray = @splat(null); for (seg_cmd.getSections()) |sect_raw| { var sect = sect_raw; if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.section_64, §); if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; } else continue; if (mapped_ofile.len < sect.offset + sect.size) return error.InvalidMachO; const section_bytes = mapped_ofile[sect.offset..][0..sect.size]; sections[section_index] = .{ .data = section_bytes, .owned = false, }; } if (sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null) { return error.MissingDebugInfo; } var dwarf: Dwarf = .{ .sections = sections }; errdefer dwarf.deinit(gpa); dwarf.open(gpa, .little) catch |err| switch (err) { error.InvalidDebugInfo, error.EndOfStream, error.Overflow, error.StreamTooLong, => return error.InvalidDwarf, error.MissingDebugInfo, error.ReadFailed, error.OutOfMemory, => |e| return e, }; return .{ .mapped_memory = all_mapped_memory, .dwarf = dwarf, .strtab = strtab, .symtab_raw = symtab_raw, .symbols_by_name = symbols_by_name.move(), }; } /// Uses `mmap` to map the file at `path` into memory. fn mapDebugInfoFile(io: Io, path: []const u8) ![]align(std.heap.page_size_min) const u8 { const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return error.MissingDebugInfo, else => return error.ReadFailed, }; defer file.close(io); const file_len = std.math.cast( usize, file.getEndPos() catch return error.ReadFailed, ) orelse return error.ReadFailed; return posix.mmap( null, file_len, posix.PROT.READ, .{ .TYPE = .SHARED }, file.handle, 0, ) catch return error.ReadFailed; } const std = @import("std"); const Allocator = std.mem.Allocator; const Dwarf = std.debug.Dwarf; const Io = std.Io; const assert = std.debug.assert; const posix = std.posix; const macho = std.macho; const mem = std.mem; const testing = std.testing; const builtin = @import("builtin"); const MachOFile = @This();