zig/lib/std/debug/ElfFile.zig
2025-10-29 06:20:48 -07:00

537 lines
20 KiB
Zig

//! A helper type for loading an ELF file and collecting its DWARF debug information, unwind
//! information, and symbol table.
is_64: bool,
endian: Endian,
/// This is `null` iff any of the required DWARF sections were missing. `ElfFile.load` does *not*
/// call `Dwarf.open`, `Dwarf.scanAllFunctions`, etc; that is the caller's responsibility.
dwarf: ?Dwarf,
/// If non-`null`, describes the `.eh_frame` section, which can be used with `Dwarf.Unwind`.
eh_frame: ?UnwindSection,
/// If non-`null`, describes the `.debug_frame` section, which can be used with `Dwarf.Unwind`.
debug_frame: ?UnwindSection,
/// If non-`null`, this is the contents of the `.strtab` section.
strtab: ?[]const u8,
/// If non-`null`, describes the `.symtab` section.
symtab: ?SymtabSection,
/// Binary search table lazily populated by `searchSymtab`.
symbol_search_table: ?[]usize,
/// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that
/// this memory can be unmapped by `ElfFile.deinit`.
mapped_file: []align(std.heap.page_size_min) const u8,
/// Sometimes, debug info is stored separately to the main ELF file. In that case, `mapped_file`
/// is the mapped ELF binary, and `mapped_debug_file` is the mapped debug info file. Both must
/// be unmapped by `ElfFile.deinit`.
mapped_debug_file: ?[]align(std.heap.page_size_min) const u8,
arena: std.heap.ArenaAllocator.State,
pub const UnwindSection = struct {
vaddr: u64,
bytes: []const u8,
};
pub const SymtabSection = struct {
entry_size: u64,
bytes: []const u8,
};
pub const DebugInfoSearchPaths = struct {
/// The location of a debuginfod client directory, which acts as a search path for build IDs. If
/// given, we can load from this directory opportunistically, but make no effort to populate it.
/// To avoid allocation when building the search paths, this is given as two components which
/// will be concatenated.
debuginfod_client: ?[2][]const u8,
/// All "global debug directories" on the system. These are used as search paths for both debug
/// links and build IDs. On typical systems this is just "/usr/lib/debug".
global_debug: []const []const u8,
/// The path to the dirname of the ELF file, which acts as a search path for debug links.
exe_dir: ?[]const u8,
pub const none: DebugInfoSearchPaths = .{
.debuginfod_client = null,
.global_debug = &.{},
.exe_dir = null,
};
pub fn native(exe_path: []const u8) DebugInfoSearchPaths {
return .{
.debuginfod_client = p: {
if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |p| {
break :p .{ p, "" };
}
if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| {
break :p .{ cache_path, "/debuginfod_client" };
}
if (std.posix.getenv("HOME")) |home_path| {
break :p .{ home_path, "/.cache/debuginfod_client" };
}
break :p null;
},
.global_debug = &.{
"/usr/lib/debug",
},
.exe_dir = std.fs.path.dirname(exe_path) orelse ".",
};
}
};
pub fn deinit(ef: *ElfFile, gpa: Allocator) void {
if (ef.dwarf) |*dwarf| dwarf.deinit(gpa);
if (ef.symbol_search_table) |t| gpa.free(t);
var arena = ef.arena.promote(gpa);
arena.deinit();
std.posix.munmap(ef.mapped_file);
if (ef.mapped_debug_file) |m| std.posix.munmap(m);
ef.* = undefined;
}
pub const LoadError = error{
OutOfMemory,
Overflow,
TruncatedElfFile,
InvalidCompressedSection,
InvalidElfMagic,
InvalidElfVersion,
InvalidElfClass,
InvalidElfEndian,
// The remaining errors all occur when attemping to stat or mmap a file.
SystemResources,
MemoryMappingNotSupported,
AccessDenied,
LockedMemoryLimitExceeded,
ProcessFdQuotaExceeded,
SystemFdQuotaExceeded,
Canceled,
Unexpected,
};
pub fn load(
gpa: Allocator,
elf_file: std.fs.File,
opt_build_id: ?[]const u8,
di_search_paths: *const DebugInfoSearchPaths,
) LoadError!ElfFile {
var arena_instance: std.heap.ArenaAllocator = .init(gpa);
errdefer arena_instance.deinit();
const arena = arena_instance.allocator();
var result = loadInner(arena, elf_file, null) catch |err| switch (err) {
error.CrcMismatch => unreachable, // we passed crc as null
else => |e| return e,
};
errdefer std.posix.munmap(result.mapped_mem);
// `loadInner` did most of the work, but we might need to load an external debug info file
const di_mapped_mem: ?[]align(std.heap.page_size_min) const u8 = load_di: {
if (result.sections.get(.debug_info) != null and
result.sections.get(.debug_abbrev) != null and
result.sections.get(.debug_str) != null and
result.sections.get(.debug_line) != null)
{
// The info is already loaded from this file alone!
break :load_di null;
}
// We're missing some debug info---let's try and load it from a separate file.
build_id: {
const build_id = opt_build_id orelse break :build_id;
if (build_id.len < 3) break :build_id;
for (di_search_paths.global_debug) |global_debug| {
if (try loadSeparateDebugFile(arena, &result, null, "{s}/.build-id/{x}/{x}.debug", .{
global_debug,
build_id[0..1],
build_id[1..],
})) |mapped| break :load_di mapped;
}
if (di_search_paths.debuginfod_client) |components| {
if (try loadSeparateDebugFile(arena, &result, null, "{s}{s}/{x}/debuginfo", .{
components[0],
components[1],
build_id,
})) |mapped| break :load_di mapped;
}
}
debug_link: {
const section = result.sections.get(.gnu_debuglink) orelse break :debug_link;
const debug_filename = std.mem.sliceTo(section.bytes, 0);
const crc_offset = std.mem.alignForward(usize, debug_filename.len + 1, 4);
if (section.bytes.len < crc_offset + 4) break :debug_link;
const debug_crc = std.mem.readInt(u32, section.bytes[crc_offset..][0..4], result.endian);
const exe_dir = di_search_paths.exe_dir orelse break :debug_link;
if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}", .{
exe_dir,
debug_filename,
})) |mapped| break :load_di mapped;
if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/.debug/{s}", .{
exe_dir,
debug_filename,
})) |mapped| break :load_di mapped;
for (di_search_paths.global_debug) |global_debug| {
// This looks like a bug; it isn't. They really do embed the absolute path to the
// exe's dirname, *under* the global debug path.
if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}/{s}", .{
global_debug,
exe_dir,
debug_filename,
})) |mapped| break :load_di mapped;
}
}
break :load_di null;
};
errdefer comptime unreachable;
return .{
.is_64 = result.is_64,
.endian = result.endian,
.dwarf = dwarf: {
if (result.sections.get(.debug_info) == null or
result.sections.get(.debug_abbrev) == null or
result.sections.get(.debug_str) == null or
result.sections.get(.debug_line) == null)
{
break :dwarf null; // debug info not present
}
var sections: Dwarf.SectionArray = @splat(null);
inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| {
if (result.sections.get(@field(Section.Id, f.name))) |s| {
sections[f.value] = .{ .data = s.bytes, .owned = false };
}
}
break :dwarf .{ .sections = sections };
},
.eh_frame = if (result.sections.get(.eh_frame)) |s| .{
.vaddr = s.header.sh_addr,
.bytes = s.bytes,
} else null,
.debug_frame = if (result.sections.get(.debug_frame)) |s| .{
.vaddr = s.header.sh_addr,
.bytes = s.bytes,
} else null,
.strtab = if (result.sections.get(.strtab)) |s| s.bytes else null,
.symtab = if (result.sections.get(.symtab)) |s| .{
.entry_size = s.header.sh_entsize,
.bytes = s.bytes,
} else null,
.symbol_search_table = null,
.mapped_file = result.mapped_mem,
.mapped_debug_file = di_mapped_mem,
.arena = arena_instance.state,
};
}
pub fn searchSymtab(ef: *ElfFile, gpa: Allocator, vaddr: u64) error{
NoSymtab,
NoStrtab,
BadSymtab,
OutOfMemory,
}!std.debug.Symbol {
const symtab = ef.symtab orelse return error.NoSymtab;
const strtab = ef.strtab orelse return error.NoStrtab;
if (symtab.bytes.len % symtab.entry_size != 0) return error.BadSymtab;
const swap_endian = ef.endian != @import("builtin").cpu.arch.endian();
switch (ef.is_64) {
inline true, false => |is_64| {
const Sym = if (is_64) elf.Elf64_Sym else elf.Elf32_Sym;
if (symtab.entry_size != @sizeOf(Sym)) return error.BadSymtab;
const symbols: []align(1) const Sym = @ptrCast(symtab.bytes);
if (ef.symbol_search_table == null) {
ef.symbol_search_table = try buildSymbolSearchTable(gpa, ef.endian, Sym, symbols);
}
const search_table = ef.symbol_search_table.?;
const SearchContext = struct {
swap_endian: bool,
target: u64,
symbols: []align(1) const Sym,
fn predicate(ctx: @This(), sym_index: usize) bool {
// We need to return `true` for the first N items, then `false` for the rest --
// the index we'll get out is the first `false` one. So, we'll return `true` iff
// the target address is after the *end* of this symbol. This synchronizes with
// the logic in `buildSymbolSearchTable` which sorts by *end* address.
var sym = ctx.symbols[sym_index];
if (ctx.swap_endian) std.mem.byteSwapAllFields(Sym, &sym);
const sym_end = sym.st_value + sym.st_size;
return ctx.target >= sym_end;
}
};
const sym_index_index = std.sort.partitionPoint(usize, search_table, @as(SearchContext, .{
.swap_endian = swap_endian,
.target = vaddr,
.symbols = symbols,
}), SearchContext.predicate);
if (sym_index_index == search_table.len) return .unknown;
var sym = symbols[search_table[sym_index_index]];
if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym);
if (vaddr < sym.st_value or vaddr >= sym.st_value + sym.st_size) return .unknown;
return .{
.name = std.mem.sliceTo(strtab[sym.st_name..], 0),
.compile_unit_name = null,
.source_location = null,
};
},
}
}
fn buildSymbolSearchTable(gpa: Allocator, endian: Endian, comptime Sym: type, symbols: []align(1) const Sym) error{
OutOfMemory,
BadSymtab,
}![]usize {
var result: std.ArrayList(usize) = .empty;
defer result.deinit(gpa);
const swap_endian = endian != @import("builtin").cpu.arch.endian();
for (symbols, 0..) |sym_orig, sym_index| {
var sym = sym_orig;
if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym);
if (sym.st_name == 0) continue;
if (sym.st_shndx == elf.SHN_UNDEF) continue;
try result.append(gpa, sym_index);
}
const SortContext = struct {
swap_endian: bool,
symbols: []align(1) const Sym,
fn lessThan(ctx: @This(), lhs_sym_index: usize, rhs_sym_index: usize) bool {
// We sort by *end* address, not start address. This matches up with logic in `searchSymtab`.
var lhs_sym = ctx.symbols[lhs_sym_index];
var rhs_sym = ctx.symbols[rhs_sym_index];
if (ctx.swap_endian) {
std.mem.byteSwapAllFields(Sym, &lhs_sym);
std.mem.byteSwapAllFields(Sym, &rhs_sym);
}
const lhs_val = lhs_sym.st_value + lhs_sym.st_size;
const rhs_val = rhs_sym.st_value + rhs_sym.st_size;
return lhs_val < rhs_val;
}
};
std.mem.sort(usize, result.items, @as(SortContext, .{
.swap_endian = swap_endian,
.symbols = symbols,
}), SortContext.lessThan);
return result.toOwnedSlice(gpa);
}
/// Only used locally, during `load`.
const Section = struct {
header: elf.Elf64_Shdr,
bytes: []const u8,
const Id = enum {
// DWARF sections: see `Dwarf.Section.Id`.
debug_info,
debug_abbrev,
debug_str,
debug_str_offsets,
debug_line,
debug_line_str,
debug_ranges,
debug_loclists,
debug_rnglists,
debug_addr,
debug_names,
// Then anything else we're interested in.
gnu_debuglink,
eh_frame,
debug_frame,
symtab,
strtab,
};
const Array = std.enums.EnumArray(Section.Id, ?Section);
};
fn loadSeparateDebugFile(arena: Allocator, main_loaded: *LoadInnerResult, opt_crc: ?u32, comptime fmt: []const u8, args: anytype) Allocator.Error!?[]align(std.heap.page_size_min) const u8 {
const path = try std.fmt.allocPrint(arena, fmt, args);
const elf_file = std.fs.cwd().openFile(path, .{}) catch return null;
defer elf_file.close();
const result = loadInner(arena, elf_file, opt_crc) catch |err| switch (err) {
error.OutOfMemory => |e| return e,
error.CrcMismatch => return null,
else => return null,
};
errdefer comptime unreachable;
const have_debug_sections = inline for (@as([]const []const u8, &.{
"debug_info",
"debug_abbrev",
"debug_str",
"debug_line",
})) |name| {
const s = @field(Section.Id, name);
if (main_loaded.sections.get(s) == null and result.sections.get(s) != null) {
break false;
}
} else true;
if (result.is_64 != main_loaded.is_64 or
result.endian != main_loaded.endian or
!have_debug_sections)
{
std.posix.munmap(result.mapped_mem);
return null;
}
inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| {
const id = @field(Section.Id, f.name);
if (main_loaded.sections.get(id) == null) {
main_loaded.sections.set(id, result.sections.get(id));
}
}
return result.mapped_mem;
}
const LoadInnerResult = struct {
is_64: bool,
endian: Endian,
sections: Section.Array,
mapped_mem: []align(std.heap.page_size_min) const u8,
};
fn loadInner(
arena: Allocator,
elf_file: std.fs.File,
opt_crc: ?u32,
) (LoadError || error{ CrcMismatch, Canceled })!LoadInnerResult {
const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: {
const file_len = std.math.cast(
usize,
elf_file.getEndPos() catch |err| switch (err) {
error.PermissionDenied => unreachable, // not asking for PROT_EXEC
else => |e| return e,
},
) orelse return error.Overflow;
break :mapped std.posix.mmap(
null,
file_len,
std.posix.PROT.READ,
.{ .TYPE = .SHARED },
elf_file.handle,
0,
) catch |err| switch (err) {
error.MappingAlreadyExists => unreachable, // not using FIXED_NOREPLACE
error.PermissionDenied => unreachable, // not asking for PROT_EXEC
else => |e| return e,
};
};
if (opt_crc) |crc| {
if (std.hash.crc.Crc32.hash(mapped_mem) != crc) {
return error.CrcMismatch;
}
}
errdefer std.posix.munmap(mapped_mem);
var fr: std.Io.Reader = .fixed(mapped_mem);
const header = elf.Header.read(&fr) catch |err| switch (err) {
error.ReadFailed => unreachable,
error.EndOfStream => return error.TruncatedElfFile,
error.InvalidElfMagic,
error.InvalidElfVersion,
error.InvalidElfClass,
error.InvalidElfEndian,
=> |e| return e,
};
const endian = header.endian;
const shstrtab_shdr_off = try std.math.add(
u64,
header.shoff,
try std.math.mul(u64, header.shstrndx, header.shentsize),
);
fr.seek = std.math.cast(usize, shstrtab_shdr_off) orelse return error.Overflow;
const shstrtab: []const u8 = if (header.is_64) shstrtab: {
const shdr = fr.takeStruct(elf.Elf64_Shdr, endian) catch return error.TruncatedElfFile;
if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile;
break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)];
} else shstrtab: {
const shdr = fr.takeStruct(elf.Elf32_Shdr, endian) catch return error.TruncatedElfFile;
if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile;
break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)];
};
var sections: Section.Array = .initFill(null);
var it = header.iterateSectionHeadersBuffer(mapped_mem);
while (it.next() catch return error.TruncatedElfFile) |shdr| {
if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue;
if (shdr.sh_name > shstrtab.len) return error.TruncatedElfFile;
const name = std.mem.sliceTo(shstrtab[@intCast(shdr.sh_name)..], 0);
const section_id: Section.Id = inline for (@typeInfo(Section.Id).@"enum".fields) |s| {
if (std.mem.eql(u8, "." ++ s.name, name)) {
break @enumFromInt(s.value);
}
} else continue;
if (sections.get(section_id) != null) continue;
if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile;
const raw_section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)];
const section_bytes: []const u8 = bytes: {
if ((shdr.sh_flags & elf.SHF_COMPRESSED) == 0) break :bytes raw_section_bytes;
var section_reader: std.Io.Reader = .fixed(raw_section_bytes);
const ch_type: elf.COMPRESS, const ch_size: u64 = if (header.is_64) ch: {
const chdr = section_reader.takeStruct(elf.Elf64_Chdr, endian) catch return error.InvalidCompressedSection;
break :ch .{ chdr.ch_type, chdr.ch_size };
} else ch: {
const chdr = section_reader.takeStruct(elf.Elf32_Chdr, endian) catch return error.InvalidCompressedSection;
break :ch .{ chdr.ch_type, chdr.ch_size };
};
if (ch_type != .ZLIB) {
// The compression algorithm is unsupported, but don't make that a hard error; the
// file might still be valid, and we might still be okay without this section.
continue;
}
const buf = try arena.alloc(u8, std.math.cast(usize, ch_size) orelse return error.Overflow);
var fw: std.Io.Writer = .fixed(buf);
var decompress: std.compress.flate.Decompress = .init(&section_reader, .zlib, &.{});
const n = decompress.reader.streamRemaining(&fw) catch |err| switch (err) {
// If a write failed, then `buf` filled up, so `ch_size` was incorrect
error.WriteFailed => return error.InvalidCompressedSection,
// If a read failed, flate expected the section to have more data
error.ReadFailed => return error.InvalidCompressedSection,
};
// It's also an error if the data is shorter than expected.
if (n != buf.len) return error.InvalidCompressedSection;
break :bytes buf;
};
sections.set(section_id, .{ .header = shdr, .bytes = section_bytes });
}
return .{
.is_64 = header.is_64,
.endian = endian,
.sections = sections,
.mapped_mem = mapped_mem,
};
}
const std = @import("std");
const Endian = std.builtin.Endian;
const Dwarf = std.debug.Dwarf;
const ElfFile = @This();
const Allocator = std.mem.Allocator;
const elf = std.elf;