wasm: Use File.Index for symbol locations

Rather than using the optional, we now directly use `File.Index` which
can already represent an unknown file due to its `.null` value. This
means we do not pay for the memory cost.

This type of index is now used for:
- SymbolLoc
- Key of the functions map
- InitFunc

Now we can simply pass things like atom.file, object.file, loc.file etc
whenever we need to access its representing object file which makes it
a lot easier.
This commit is contained in:
Luuk de Gram 2024-01-21 12:06:33 +01:00
parent 94f3a18c88
commit 0a030d6598
No known key found for this signature in database
GPG key ID: A8CFE58E4DC7D664
4 changed files with 49 additions and 84 deletions

View file

@ -125,7 +125,10 @@ func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{},
/// Output function section where the key is the original
/// function index and the value is function.
/// This allows us to map multiple symbols to the same function.
functions: std.AutoArrayHashMapUnmanaged(struct { file: ?u16, index: u32 }, struct { func: std.wasm.Func, sym_index: u32 }) = .{},
functions: std.AutoArrayHashMapUnmanaged(
struct { file: File.Index, index: u32 },
struct { func: std.wasm.Func, sym_index: u32 },
) = .{},
/// Output global section
wasm_globals: std.ArrayListUnmanaged(std.wasm.Global) = .{},
/// Memory section
@ -217,16 +220,14 @@ pub const SymbolLoc = struct {
/// The index of the symbol within the specified file
index: u32,
/// The index of the object file where the symbol resides.
/// When this is `null` the symbol comes from a non-object file.
file: ?u16,
file: File.Index,
/// From a given location, returns the corresponding symbol in the wasm binary
pub fn getSymbol(loc: SymbolLoc, wasm_file: *const Wasm) *Symbol {
if (wasm_file.discarded.get(loc)) |new_loc| {
return new_loc.getSymbol(wasm_file);
}
if (loc.file) |object_index| {
const obj_file = wasm_file.file(@enumFromInt(object_index)).?;
if (wasm_file.file(loc.file)) |obj_file| {
return obj_file.symbol(loc.index);
}
return &wasm_file.synthetic_symbols.items[loc.index];
@ -237,8 +238,7 @@ pub const SymbolLoc = struct {
if (wasm_file.discarded.get(loc)) |new_loc| {
return new_loc.getName(wasm_file);
}
if (loc.file) |object_index| {
const obj_file = wasm_file.file(@enumFromInt(object_index)).?;
if (wasm_file.file(loc.file)) |obj_file| {
return obj_file.symbolName(loc.index);
}
return wasm_file.string_table.get(wasm_file.synthetic_symbols.items[loc.index].name);
@ -263,7 +263,7 @@ pub const InitFuncLoc = struct {
/// object file index in the list of objects.
/// Unlike `SymbolLoc` this cannot be `null` as we never define
/// our own ctors.
file: u16,
file: File.Index,
/// Symbol index within the corresponding object file.
index: u32,
/// The priority in which the constructor must be called.
@ -633,7 +633,7 @@ fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !Symbol
fn createSyntheticSymbolOffset(wasm: *Wasm, name_offset: u32, tag: Symbol.Tag) !SymbolLoc {
const sym_index = @as(u32, @intCast(wasm.synthetic_symbols.items.len));
const loc: SymbolLoc = .{ .index = sym_index, .file = null };
const loc: SymbolLoc = .{ .index = sym_index, .file = .null };
const gpa = wasm.base.comp.gpa;
try wasm.synthetic_symbols.append(gpa, .{
.name = name_offset,
@ -680,7 +680,7 @@ pub fn createAtom(wasm: *Wasm, sym_index: u32, file_index: File.Index) !Atom.Ind
const index: Atom.Index = @intCast(wasm.managed_atoms.items.len);
const atom = try wasm.managed_atoms.addOne(gpa);
atom.* = .{ .file = file_index, .sym_index = sym_index };
try wasm.symbol_atom.putNoClobber(gpa, .{ .file = null, .index = sym_index }, index);
try wasm.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), index);
return index;
}
@ -763,10 +763,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void {
for (obj_file.symbols(), 0..) |symbol, i| {
const sym_index: u32 = @intCast(i);
const location: SymbolLoc = .{
.file = @intFromEnum(file_index),
.index = sym_index,
};
const location: SymbolLoc = .{ .file = file_index, .index = sym_index };
const sym_name = obj_file.string(symbol.name);
if (mem.eql(u8, sym_name, "__indirect_function_table")) {
continue;
@ -796,9 +793,10 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void {
const existing_loc = maybe_existing.value_ptr.*;
const existing_sym: *Symbol = existing_loc.getSymbol(wasm);
const existing_file = wasm.file(existing_loc.file);
const existing_file_path = if (existing_loc.file) |existing_file_index|
wasm.file(@enumFromInt(existing_file_index)).?.path()
const existing_file_path = if (existing_file) |existing_obj_file|
existing_obj_file.path()
else
wasm.name;
@ -831,8 +829,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void {
if (existing_sym.isUndefined() and symbol.isUndefined()) {
// only verify module/import name for function symbols
if (symbol.tag == .function) {
const existing_name = if (existing_loc.file) |existing_file_index| blk: {
const existing_obj = wasm.file(@enumFromInt(existing_file_index)).?;
const existing_name = if (existing_file) |existing_obj| blk: {
const imp = existing_obj.import(existing_loc.index);
break :blk existing_obj.string(imp.module_name);
} else blk: {
@ -1363,8 +1360,8 @@ fn checkUndefinedSymbols(wasm: *const Wasm) !void {
const symbol = undef.getSymbol(wasm);
if (symbol.tag == .data) {
found_undefined_symbols = true;
const file_name = if (undef.file) |file_index|
wasm.file(@enumFromInt(file_index)).?.path()
const file_name = if (wasm.file(undef.file)) |obj_file|
obj_file.path()
else
wasm.name;
const symbol_name = undef.getName(wasm);
@ -1461,8 +1458,7 @@ fn getGlobalType(wasm: *const Wasm, loc: SymbolLoc) std.wasm.GlobalType {
const symbol = loc.getSymbol(wasm);
assert(symbol.tag == .global);
const is_undefined = symbol.isUndefined();
if (loc.file) |file_index| {
const obj_file = wasm.file(@enumFromInt(file_index)).?;
if (wasm.file(loc.file)) |obj_file| {
if (is_undefined) {
return obj_file.import(loc.index).kind.global;
}
@ -1480,8 +1476,7 @@ fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type {
const symbol = loc.getSymbol(wasm);
assert(symbol.tag == .function);
const is_undefined = symbol.isUndefined();
if (loc.file) |file_index| {
const obj_file = wasm.file(@enumFromInt(file_index)).?;
if (wasm.file(loc.file)) |obj_file| {
if (is_undefined) {
const ty_index = obj_file.import(loc.index).kind.function;
return obj_file.funcTypes()[ty_index];
@ -1625,8 +1620,8 @@ fn allocateAtoms(wasm: *Wasm) !void {
// Ensure we get the original symbol, so we verify the correct symbol on whether
// it is dead or not and ensure an atom is removed when dead.
// This is required as we may have parsed aliases into atoms.
const sym = if (symbol_loc.file) |file_index|
wasm.file(@enumFromInt(file_index)).?.symbol(symbol_loc.index).*
const sym = if (wasm.file(symbol_loc.file)) |obj_file|
obj_file.symbol(symbol_loc.index).*
else
wasm.synthetic_symbols.items[symbol_loc.index];
@ -1754,10 +1749,10 @@ fn setupInitFunctions(wasm: *Wasm) !void {
log.debug("appended init func '{s}'\n", .{object.string_table.get(symbol.name)});
wasm.init_funcs.appendAssumeCapacity(.{
.index = init_func.symbol_index,
.file = @intFromEnum(file_index),
.file = file_index,
.priority = init_func.priority,
});
try wasm.mark(.{ .index = init_func.symbol_index, .file = @intFromEnum(file_index) });
try wasm.mark(.{ .index = init_func.symbol_index, .file = file_index });
}
}
@ -1841,7 +1836,7 @@ fn createSyntheticFunction(
const func_index = wasm.imported_functions_count + @as(u32, @intCast(wasm.functions.count()));
try wasm.functions.putNoClobber(
gpa,
.{ .file = null, .index = func_index },
.{ .file = .null, .index = func_index },
.{ .func = .{ .type_index = ty_index }, .sym_index = loc.index },
);
symbol.index = func_index;
@ -1849,8 +1844,8 @@ fn createSyntheticFunction(
// create the atom that will be output into the final binary
const atom_index = try wasm.createAtom(loc.index, .null);
const atom = wasm.getAtomPtr(atom_index);
atom.code = function_body.moveToUnmanaged();
atom.size = @intCast(function_body.items.len);
atom.code = function_body.moveToUnmanaged();
try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom_index);
}
@ -1969,20 +1964,8 @@ fn initializeTLSFunction(wasm: *Wasm) !void {
fn setupImports(wasm: *Wasm) !void {
const gpa = wasm.base.comp.gpa;
log.debug("Merging imports", .{});
var discarded_it = wasm.discarded.keyIterator();
while (discarded_it.next()) |discarded| {
if (discarded.file == null) {
// remove an import if it was resolved
if (wasm.imports.remove(discarded.*)) {
log.debug("Removed symbol '{s}' as an import", .{
discarded.getName(wasm),
});
}
}
}
for (wasm.resolved_symbols.keys()) |symbol_loc| {
const file_index = symbol_loc.file orelse {
const obj_file = wasm.file(symbol_loc.file) orelse {
// Synthetic symbols will already exist in the `import` section
continue;
};
@ -1996,7 +1979,6 @@ fn setupImports(wasm: *Wasm) !void {
}
log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(wasm)});
const obj_file = wasm.file(@enumFromInt(file_index)).?;
const import = obj_file.import(symbol_loc.index);
// We copy the import to a new import to ensure the names contain references
@ -2054,15 +2036,13 @@ fn mergeSections(wasm: *Wasm) !void {
defer removed_duplicates.deinit();
for (wasm.resolved_symbols.keys()) |sym_loc| {
const file_index = sym_loc.file orelse {
const obj_file = wasm.file(sym_loc.file) orelse {
// Zig code-generated symbols are already within the sections and do not
// require to be merged
continue;
};
const obj_file = wasm.file(@enumFromInt(file_index)).?;
const symbol = obj_file.symbol(sym_loc.index);
if (symbol.isDead() or symbol.isUndefined()) {
// Skip undefined symbols as they go in the `import` section
continue;
@ -2105,7 +2085,7 @@ fn mergeSections(wasm: *Wasm) !void {
symbol.index = @as(u32, @intCast(wasm.tables.items.len)) + wasm.imported_tables_count;
try wasm.tables.append(gpa, original_table);
},
else => continue,
else => {},
}
}
@ -2132,12 +2112,11 @@ fn mergeTypes(wasm: *Wasm) !void {
defer dirty.deinit();
for (wasm.resolved_symbols.keys()) |sym_loc| {
const file_index = sym_loc.file orelse {
const obj_file = wasm.file(sym_loc.file) orelse {
// zig code-generated symbols are already present in final type section
continue;
};
const obj_file = wasm.file(@enumFromInt(file_index)).?;
const symbol = obj_file.symbol(sym_loc.index);
if (symbol.tag != .function or symbol.isDead()) {
// Only functions have types. Only retrieve the type of referenced functions.
@ -2191,7 +2170,7 @@ fn setupExports(wasm: *Wasm) !void {
const sym_name = sym_loc.getName(wasm);
const export_name = if (wasm.export_names.get(sym_loc)) |name| name else blk: {
if (sym_loc.file == null) break :blk symbol.name;
if (sym_loc.file == .null) break :blk symbol.name;
break :blk try wasm.string_table.put(gpa, sym_name);
};
const exp: types.Export = if (symbol.tag == .data) exp: {
@ -2425,7 +2404,7 @@ pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: u32
break :blk index;
},
.section => {
const section_name = obj_file.symbolName(symbol.index);
const section_name = obj_file.symbolName(symbol_index);
if (mem.eql(u8, section_name, ".debug_info")) {
return wasm.debug_info_index orelse blk: {
wasm.debug_info_index = index;
@ -2475,7 +2454,7 @@ pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: u32
break :blk index;
};
} else {
log.warn("found unknown section '{s}'", .{section_name});
log.err("found unknown section '{s}'", .{section_name});
return error.UnexpectedValue;
}
},
@ -4221,10 +4200,7 @@ fn emitDataRelocations(
size_offset += getULEB128Size(atom.size);
for (atom.relocs.items) |relocation| {
count += 1;
const sym_loc: SymbolLoc = .{
.file = atom.file,
.index = relocation.index,
};
const sym_loc: SymbolLoc = .{ .file = atom.file, .index = relocation.index };
const symbol_index = symbol_table.get(sym_loc).?;
try leb.writeULEB128(writer, @intFromEnum(relocation.relocation_type));
const offset = atom.offset + relocation.offset + size_offset;
@ -4322,8 +4298,7 @@ fn markReferences(wasm: *Wasm) !void {
// Debug sections may require to be parsed and marked when it contains
// relocations to alive symbols.
if (sym.tag == .section and comp.config.debug_format != .strip) {
const file_index = sym_loc.file orelse continue; // Incremental debug info is done independently
const obj_file = wasm.file(@enumFromInt(file_index)).?;
const obj_file = wasm.file(sym_loc.file) orelse continue; // Incremental debug info is done independently
_ = try obj_file.parseSymbolIntoAtom(wasm, sym_loc.index);
sym.mark();
}
@ -4347,10 +4322,10 @@ fn mark(wasm: *Wasm, loc: SymbolLoc) !void {
return;
}
const atom_index = if (loc.file) |file_index| idx: {
const obj_file = wasm.file(@enumFromInt(file_index)).?;
break :idx try obj_file.parseSymbolIntoAtom(wasm, loc.index);
} else wasm.symbol_atom.get(loc) orelse return;
const atom_index = if (wasm.file(loc.file)) |obj_file|
try obj_file.parseSymbolIntoAtom(wasm, loc.index)
else
wasm.symbol_atom.get(loc) orelse return;
const atom = wasm.getAtom(atom_index);
for (atom.relocs.items) |reloc| {

View file

@ -59,10 +59,7 @@ pub fn format(atom: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptio
/// Returns the location of the symbol that represents this `Atom`
pub fn symbolLoc(atom: Atom) Wasm.SymbolLoc {
if (atom.file == .null) {
return .{ .file = null, .index = atom.sym_index };
}
return .{ .file = @intFromEnum(atom.file), .index = atom.sym_index };
return .{ .file = atom.file, .index = atom.sym_index };
}
pub fn getSymbolIndex(atom: Atom) ?u32 {
@ -83,7 +80,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void {
for (atom.relocs.items) |reloc| {
const value = atom.relocationValue(reloc, wasm_bin);
log.debug("Relocating '{s}' referenced in '{s}' offset=0x{x:0>8} value={d}", .{
(Wasm.SymbolLoc{ .file = @intFromEnum(atom.file), .index = reloc.index }).getName(wasm_bin),
(Wasm.SymbolLoc{ .file = atom.file, .index = reloc.index }).getName(wasm_bin),
symbol_name,
reloc.offset,
value,
@ -122,11 +119,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void {
/// All values will be represented as a `u64` as all values can fit within it.
/// The final value must be casted to the correct size.
fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 {
const target_loc = if (atom.file == .null)
(Wasm.SymbolLoc{ .file = null, .index = relocation.index }).finalLoc(wasm_bin)
else
(Wasm.SymbolLoc{ .file = @intFromEnum(atom.file), .index = relocation.index }).finalLoc(wasm_bin);
const target_loc = (Wasm.SymbolLoc{ .file = atom.file, .index = relocation.index }).finalLoc(wasm_bin);
const symbol = target_loc.getSymbol(wasm_bin);
if (relocation.relocation_type != .R_WASM_TYPE_INDEX_LEB and
symbol.tag != .section and
@ -142,7 +135,7 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa
.R_WASM_TABLE_INDEX_I64,
.R_WASM_TABLE_INDEX_SLEB,
.R_WASM_TABLE_INDEX_SLEB64,
=> return wasm_bin.function_table.get(.{ .file = @intFromEnum(atom.file), .index = relocation.index }) orelse 0,
=> return wasm_bin.function_table.get(.{ .file = atom.file, .index = relocation.index }) orelse 0,
.R_WASM_TYPE_INDEX_LEB => {
const obj_file = wasm_bin.file(atom.file) orelse return relocation.index;
const original_type = obj_file.funcTypes()[relocation.index];

View file

@ -952,7 +952,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato
.R_WASM_TABLE_INDEX_SLEB64,
=> {
try wasm.function_table.put(gpa, .{
.file = @intFromEnum(object.index),
.file = object.index,
.index = reloc.index,
}, 0);
},
@ -961,10 +961,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato
=> {
const sym = object.symtable[reloc.index];
if (sym.tag != .global) {
try wasm.got_symbols.append(
gpa,
.{ .file = @intFromEnum(object.index), .index = reloc.index },
);
try wasm.got_symbols.append(gpa, .{ .file = object.index, .index = reloc.index });
}
},
else => {},

View file

@ -468,7 +468,7 @@ pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 {
fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void {
const symbol_index = zig_object.error_table_symbol orelse return;
const gpa = wasm_file.base.comp.gpa;
const atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = symbol_index }).?;
const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = symbol_index }).?;
// Rather than creating a symbol for each individual error name,
// we create a symbol for the entire region of error names. We then calculate
@ -633,7 +633,7 @@ pub fn getDeclVAddr(
const target_symbol_index = wasm_file.getAtom(target_atom_index).sym_index;
std.debug.assert(reloc_info.parent_atom_index != 0);
const atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?;
const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = reloc_info.parent_atom_index }).?;
const atom = wasm_file.getAtomPtr(atom_index);
const is_wasm32 = target.cpu.arch == .wasm32;
if (decl.ty.zigTypeTag(mod) == .Fn) {
@ -670,7 +670,7 @@ pub fn getAnonDeclVAddr(
const atom_index = zig_object.anon_decls.get(decl_val).?;
const target_symbol_index = wasm_file.getAtom(atom_index).getSymbolIndex().?;
const parent_atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?;
const parent_atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = reloc_info.parent_atom_index }).?;
const parent_atom = wasm_file.getAtomPtr(parent_atom_index);
const is_wasm32 = target.cpu.arch == .wasm32;
const mod = wasm_file.base.comp.module.?;
@ -705,7 +705,7 @@ pub fn deleteDeclExport(
) void {
const atom_index = zig_object.decls.get(decl_index) orelse return;
const sym_index = wasm_file.getAtom(atom_index).sym_index;
const loc: Wasm.SymbolLoc = .{ .file = null, .index = sym_index };
const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = sym_index };
const sym = loc.getSymbol(wasm_file);
std.debug.assert(zig_object.global_syms.remove(sym.name));
}
@ -1161,7 +1161,7 @@ pub fn storeDeclType(zig_object: *ZigObject, gpa: std.mem.Allocator, decl_index:
/// its relocations and create any GOT symbols or function table indexes it may require.
pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) !Atom.Index {
const gpa = wasm_file.base.comp.gpa;
const loc: Wasm.SymbolLoc = .{ .file = @intFromEnum(zig_object.index), .index = index };
const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = index };
const final_index = try wasm_file.getMatchingSegment(zig_object.index, index);
const atom_index = wasm_file.symbol_atom.get(loc).?;
try wasm_file.appendAtomAtIndex(final_index, atom_index);