zig/src-self-hosted/link/MachO.zig
Jakub Konka 55dc81ba2a
Hardcode runtime (libSystem) version to minimum possible
While we try to work out what the correlation between the OS and runtime
versions is, this commit hardcodes the latter to the minimum (compat)
version of 1.0.0.

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
2020-08-26 06:30:54 +02:00

382 lines
13 KiB
Zig

const MachO = @This();
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.link);
const macho = std.macho;
const codegen = @import("../codegen.zig");
const math = std.math;
const mem = std.mem;
const trace = @import("../tracy.zig").trace;
const Type = @import("../type.zig").Type;
const Module = @import("../Module.zig");
const link = @import("../link.zig");
const File = link.File;
pub const base_tag: File.Tag = File.Tag.macho;
base: File,
/// List of all load command headers that are in the file.
/// We use it to track number and size of all commands needed by the header.
commands: std.ArrayListUnmanaged(macho.load_command) = std.ArrayListUnmanaged(macho.load_command){},
command_file_offset: ?u64 = null,
/// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
/// Same order as in the file.
segments: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){},
/// Section (headers) *always* follow segment (load commands) directly!
sections: std.ArrayListUnmanaged(macho.section_64) = std.ArrayListUnmanaged(macho.section_64){},
/// Offset (index) into __TEXT segment load command.
text_segment_offset: ?u64 = null,
/// Offset (index) into __LINKEDIT segment load command.
linkedit_segment_offset: ?u664 = null,
/// Entry point load command
entry_point_cmd: ?macho.entry_point_command = null,
entry_addr: ?u64 = null,
/// The first 4GB of process' memory is reserved for the null (__PAGEZERO) segment.
/// This is also the start address for our binary.
vm_start_address: u64 = 0x100000000,
seg_table_dirty: bool = false,
error_flags: File.ErrorFlags = File.ErrorFlags{},
/// `alloc_num / alloc_den` is the factor of padding when allocating.
const alloc_num = 4;
const alloc_den = 3;
/// Default path to dyld
/// TODO instead of hardcoding it, we should probably look through some env vars and search paths
/// instead but this will do for now.
const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld";
/// Default lib search path
/// TODO instead of hardcoding it, we should probably look through some env vars and search paths
/// instead but this will do for now.
const DEFAULT_LIB_SEARCH_PATH: []const u8 = "/usr/lib";
const LIB_SYSTEM_NAME: [*:0]const u8 = "System";
/// TODO we should search for libSystem and fail if it doesn't exist, instead of hardcoding it
const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B.dylib";
pub const TextBlock = struct {
pub const empty = TextBlock{};
};
pub const SrcFn = struct {
pub const empty = SrcFn{};
};
pub fn openPath(allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, options: link.Options) !*File {
assert(options.object_format == .macho);
const file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, .mode = link.determineMode(options) });
errdefer file.close();
var macho_file = try allocator.create(MachO);
errdefer allocator.destroy(macho_file);
macho_file.* = openFile(allocator, file, options) catch |err| switch (err) {
error.IncrFailed => try createFile(allocator, file, options),
else => |e| return e,
};
return &macho_file.base;
}
/// Returns error.IncrFailed if incremental update could not be performed.
fn openFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO {
switch (options.output_mode) {
.Exe => {},
.Obj => {},
.Lib => return error.IncrFailed,
}
var self: MachO = .{
.base = .{
.file = file,
.tag = .macho,
.options = options,
.allocator = allocator,
},
};
errdefer self.deinit();
// TODO implement reading the macho file
return error.IncrFailed;
//try self.populateMissingMetadata();
//return self;
}
/// Truncates the existing file contents and overwrites the contents.
/// Returns an error if `file` is not already open with +read +write +seek abilities.
fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO {
var self: MachO = .{
.base = .{
.file = file,
.tag = .macho,
.options = options,
.allocator = allocator,
},
};
errdefer self.deinit();
switch (options.output_mode) {
.Exe => {
// The first segment command for executables is always a __PAGEZERO segment.
const pagezero = .{
.cmd = macho.LC_SEGMENT_64,
.cmdsize = commandSize(@sizeOf(macho.segment_command_64)),
.segname = makeString("__PAGEZERO"),
.vmaddr = 0,
.vmsize = self.vm_start_address,
.fileoff = 0,
.filesize = 0,
.maxprot = macho.VM_PROT_NONE,
.initprot = macho.VM_PROT_NONE,
.nsects = 0,
.flags = 0,
};
try self.commands.append(allocator, .{
.cmd = pagezero.cmd,
.cmdsize = pagezero.cmdsize,
});
try self.segments.append(allocator, pagezero);
},
.Obj => return error.TODOImplementWritingObjFiles,
.Lib => return error.TODOImplementWritingLibFiles,
}
try self.populateMissingMetadata();
return self;
}
fn writeMachOHeader(self: *MachO) !void {
var hdr: macho.mach_header_64 = undefined;
hdr.magic = macho.MH_MAGIC_64;
const CpuInfo = struct {
cpu_type: macho.cpu_type_t,
cpu_subtype: macho.cpu_subtype_t,
};
const cpu_info: CpuInfo = switch (self.base.options.target.cpu.arch) {
.aarch64 => .{
.cpu_type = macho.CPU_TYPE_ARM64,
.cpu_subtype = macho.CPU_SUBTYPE_ARM_ALL,
},
.x86_64 => .{
.cpu_type = macho.CPU_TYPE_X86_64,
.cpu_subtype = macho.CPU_SUBTYPE_X86_64_ALL,
},
else => return error.UnsupportedMachOArchitecture,
};
hdr.cputype = cpu_info.cpu_type;
hdr.cpusubtype = cpu_info.cpu_subtype;
const filetype: u32 = switch (self.base.options.output_mode) {
.Exe => macho.MH_EXECUTE,
.Obj => macho.MH_OBJECT,
.Lib => switch (self.base.options.link_mode) {
.Static => return error.TODOStaticLibMachOType,
.Dynamic => macho.MH_DYLIB,
},
};
hdr.filetype = filetype;
const ncmds = try math.cast(u32, self.commands.items.len);
hdr.ncmds = ncmds;
var sizeof_cmds: u32 = 0;
for (self.commands.items) |cmd| {
sizeof_cmds += cmd.cmdsize;
}
hdr.sizeofcmds = sizeof_cmds;
// TODO should these be set to something else?
hdr.flags = 0;
hdr.reserved = 0;
try self.base.file.?.pwriteAll(@ptrCast([*]const u8, &hdr)[0..@sizeOf(macho.mach_header_64)], 0);
}
pub fn flush(self: *MachO, module: *Module) !void {
// Save segments first
{
const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segments.items.len);
defer self.base.allocator.free(buf);
self.command_file_offset = @sizeOf(macho.mach_header_64);
for (buf) |*seg, i| {
seg.* = self.segments.items[i];
self.command_file_offset.? += self.segments.items[i].cmdsize;
}
try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), @sizeOf(macho.mach_header_64));
}
switch (self.base.options.output_mode) {
.Exe => {
{
// Specify path to dynamic linker dyld
const cmdsize = commandSize(@sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH));
const load_dylinker = [1]macho.dylinker_command{
.{
.cmd = macho.LC_LOAD_DYLINKER,
.cmdsize = cmdsize,
.name = @sizeOf(macho.dylinker_command),
},
};
try self.commands.append(self.base.allocator, .{
.cmd = macho.LC_LOAD_DYLINKER,
.cmdsize = cmdsize,
});
try self.base.file.?.pwriteAll(mem.sliceAsBytes(load_dylinker[0..1]), self.command_file_offset.?);
const file_offset = self.command_file_offset.? + @sizeOf(macho.dylinker_command);
try self.addPadding(cmdsize - @sizeOf(macho.dylinker_command), file_offset);
try self.base.file.?.pwriteAll(mem.spanZ(DEFAULT_DYLD_PATH), file_offset);
self.command_file_offset.? += cmdsize;
}
{
// Link against libSystem
const cmdsize = commandSize(@sizeOf(macho.dylib_command) + mem.lenZ(LIB_SYSTEM_PATH));
// TODO Find a way to work out runtime version from the OS version triple stored in std.Target.
// In the meantime, we're gonna hardcode to the minimum compatibility version of 1.0.0.
const min_version = 0x10000;
const dylib = .{
.name = @sizeOf(macho.dylib_command),
.timestamp = 2, // not sure why not simply 0; this is reverse engineered from Mach-O files
.current_version = min_version,
.compatibility_version = min_version,
};
const load_dylib = [1]macho.dylib_command{
.{
.cmd = macho.LC_LOAD_DYLIB,
.cmdsize = cmdsize,
.dylib = dylib,
},
};
try self.commands.append(self.base.allocator, .{
.cmd = macho.LC_LOAD_DYLIB,
.cmdsize = cmdsize,
});
try self.base.file.?.pwriteAll(mem.sliceAsBytes(load_dylib[0..1]), self.command_file_offset.?);
const file_offset = self.command_file_offset.? + @sizeOf(macho.dylib_command);
try self.addPadding(cmdsize - @sizeOf(macho.dylib_command), file_offset);
try self.base.file.?.pwriteAll(mem.spanZ(LIB_SYSTEM_PATH), file_offset);
self.command_file_offset.? += cmdsize;
}
},
.Obj => return error.TODOImplementWritingObjFiles,
.Lib => return error.TODOImplementWritingLibFiles,
}
if (self.entry_addr == null and self.base.options.output_mode == .Exe) {
log.debug("flushing. no_entry_point_found = true\n", .{});
self.error_flags.no_entry_point_found = true;
} else {
log.debug("flushing. no_entry_point_found = false\n", .{});
self.error_flags.no_entry_point_found = false;
try self.writeMachOHeader();
}
}
pub fn deinit(self: *MachO) void {
self.commands.deinit(self.base.allocator);
self.segments.deinit(self.base.allocator);
self.sections.deinit(self.base.allocator);
}
pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void {}
pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {}
pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void {}
pub fn updateDeclExports(
self: *MachO,
module: *Module,
decl: *const Module.Decl,
exports: []const *Module.Export,
) !void {}
pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {}
pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 {
@panic("TODO implement getDeclVAddr for MachO");
}
pub fn populateMissingMetadata(self: *MachO) !void {
if (self.text_segment_offset == null) {
self.text_segment_offset = @intCast(u64, self.segments.items.len);
const file_size = alignSize(u64, self.base.options.program_code_size_hint, 0x1000);
log.debug("vmsize/filesize = {}", .{file_size});
const file_offset = 0;
const vm_address = self.vm_start_address; // the end of __PAGEZERO segment in VM
const protection = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE;
const cmdsize = commandSize(@sizeOf(macho.segment_command_64));
const text_segment = .{
.cmd = macho.LC_SEGMENT_64,
.cmdsize = cmdsize,
.segname = makeString("__TEXT"),
.vmaddr = vm_address,
.vmsize = file_size,
.fileoff = 0, // __TEXT segment *always* starts at 0 file offset
.filesize = 0, //file_size,
.maxprot = protection,
.initprot = protection,
.nsects = 0,
.flags = 0,
};
try self.commands.append(self.base.allocator, .{
.cmd = macho.LC_SEGMENT_64,
.cmdsize = cmdsize,
});
try self.segments.append(self.base.allocator, text_segment);
}
}
fn makeString(comptime bytes: []const u8) [16]u8 {
var buf = [_]u8{0} ** 16;
if (bytes.len > buf.len) @compileError("MachO segment/section name too long");
mem.copy(u8, buf[0..], bytes);
return buf;
}
fn alignSize(comptime Int: type, min_size: anytype, alignment: Int) Int {
const size = @intCast(Int, min_size);
if (size % alignment == 0) return size;
const div = size / alignment;
return (div + 1) * alignment;
}
fn commandSize(min_size: anytype) u32 {
return alignSize(u32, min_size, @sizeOf(u64));
}
fn addPadding(self: *MachO, size: u32, file_offset: u64) !void {
if (size == 0) return;
const buf = try self.base.allocator.alloc(u8, size);
defer self.base.allocator.free(buf);
mem.set(u8, buf[0..], 0);
try self.base.file.?.pwriteAll(buf, file_offset);
}