zig/src/link/SpirV/BinaryModule.zig
2025-07-14 15:16:17 +02:00

464 lines
18 KiB
Zig

const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const log = std.log.scoped(.spirv_parse);
const spec = @import("../../codegen/spirv/spec.zig");
const Opcode = spec.Opcode;
const Word = spec.Word;
const InstructionSet = spec.InstructionSet;
const ResultId = spec.Id;
const BinaryModule = @This();
pub const header_words = 5;
/// The module SPIR-V version.
version: spec.Version,
/// The generator magic number.
generator_magic: u32,
/// The result-id bound of this SPIR-V module.
id_bound: u32,
/// The instructions of this module. This does not contain the header.
instructions: []const Word,
/// Maps OpExtInstImport result-ids to their InstructionSet.
ext_inst_map: std.AutoHashMapUnmanaged(ResultId, InstructionSet),
/// This map contains the width of arithmetic types (OpTypeInt and
/// OpTypeFloat). We need this information to correctly parse the operands
/// of Op(Spec)Constant and OpSwitch.
arith_type_width: std.AutoHashMapUnmanaged(ResultId, u16),
/// The starting offsets of some sections
sections: struct {
functions: usize,
},
pub fn deinit(self: *BinaryModule, a: Allocator) void {
self.ext_inst_map.deinit(a);
self.arith_type_width.deinit(a);
self.* = undefined;
}
pub fn iterateInstructions(self: BinaryModule) Instruction.Iterator {
return Instruction.Iterator.init(self.instructions, 0);
}
pub fn iterateInstructionsFrom(self: BinaryModule, offset: usize) Instruction.Iterator {
return Instruction.Iterator.init(self.instructions, offset);
}
pub fn instructionAt(self: BinaryModule, offset: usize) Instruction {
var it = self.iterateInstructionsFrom(offset);
return it.next().?;
}
pub fn finalize(self: BinaryModule, a: Allocator) ![]Word {
const result = try a.alloc(Word, 5 + self.instructions.len);
errdefer a.free(result);
result[0] = spec.magic_number;
result[1] = @bitCast(self.version);
result[2] = spec.zig_generator_id;
result[3] = self.id_bound;
result[4] = 0; // Schema
@memcpy(result[5..], self.instructions);
return result;
}
/// Errors that can be raised when the module is not correct.
/// Note that the parser doesn't validate SPIR-V modules by a
/// long shot. It only yields errors that critically prevent
/// further analysis of the module.
pub const ParseError = error{
/// Raised when the module doesn't start with the SPIR-V magic.
/// This usually means that the module isn't actually SPIR-V.
InvalidMagic,
/// Raised when the module has an invalid "physical" format:
/// For example when the header is incomplete, or an instruction
/// has an illegal format.
InvalidPhysicalFormat,
/// OpExtInstImport was used with an unknown extension string.
InvalidExtInstImport,
/// The module had an instruction with an invalid (unknown) opcode.
InvalidOpcode,
/// An instruction's operands did not conform to the SPIR-V specification
/// for that instruction.
InvalidOperands,
/// A result-id was declared more than once.
DuplicateId,
/// Some ID did not resolve.
InvalidId,
/// This opcode or instruction is not supported yet.
UnsupportedOperation,
/// Parser ran out of memory.
OutOfMemory,
};
pub const Instruction = struct {
pub const Iterator = struct {
words: []const Word,
index: usize = 0,
offset: usize = 0,
pub fn init(words: []const Word, start_offset: usize) Iterator {
return .{ .words = words, .offset = start_offset };
}
pub fn next(self: *Iterator) ?Instruction {
if (self.offset >= self.words.len) return null;
const instruction_len = self.words[self.offset] >> 16;
defer self.offset += instruction_len;
defer self.index += 1;
assert(instruction_len != 0);
assert(self.offset < self.words.len);
return Instruction{
.opcode = @enumFromInt(self.words[self.offset] & 0xFFFF),
.index = self.index,
.offset = self.offset,
.operands = self.words[self.offset..][1..instruction_len],
};
}
};
/// The opcode for this instruction.
opcode: Opcode,
/// The instruction's index.
index: usize,
/// The instruction's word offset in the module.
offset: usize,
/// The raw (unparsed) operands for this instruction.
operands: []const Word,
};
/// This parser contains information (acceleration tables)
/// that can be persisted across different modules. This is
/// used to initialize the module, and is also used when
/// further analyzing it.
pub const Parser = struct {
/// The allocator used to allocate this parser's structures,
/// and also the structures of any parsed module.
a: Allocator,
/// Maps (instruction set, opcode) => instruction index (for instruction set)
opcode_table: std.AutoHashMapUnmanaged(u32, u16) = .empty,
pub fn init(a: Allocator) !Parser {
var self = Parser{
.a = a,
};
errdefer self.deinit();
inline for (std.meta.tags(InstructionSet)) |set| {
const instructions = set.instructions();
try self.opcode_table.ensureUnusedCapacity(a, @intCast(instructions.len));
for (instructions, 0..) |inst, i| {
// Note: Some instructions may alias another. In this case we don't really care
// which one is first: they all (should) have the same operands anyway. Just pick
// the first, which is usually the core, KHR or EXT variant.
const entry = self.opcode_table.getOrPutAssumeCapacity(mapSetAndOpcode(set, @intCast(inst.opcode)));
if (!entry.found_existing) {
entry.value_ptr.* = @intCast(i);
}
}
}
return self;
}
pub fn deinit(self: *Parser) void {
self.opcode_table.deinit(self.a);
}
fn mapSetAndOpcode(set: InstructionSet, opcode: u16) u32 {
return (@as(u32, @intFromEnum(set)) << 16) | opcode;
}
pub fn getInstSpec(self: Parser, opcode: Opcode) ?spec.Instruction {
const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(opcode))) orelse return null;
return InstructionSet.core.instructions()[index];
}
pub fn parse(self: *Parser, module: []const u32) ParseError!BinaryModule {
if (module[0] != spec.magic_number) {
return error.InvalidMagic;
} else if (module.len < header_words) {
log.err("module only has {}/{} header words", .{ module.len, header_words });
return error.InvalidPhysicalFormat;
}
var binary = BinaryModule{
.version = @bitCast(module[1]),
.generator_magic = module[2],
.id_bound = module[3],
.instructions = module[header_words..],
.ext_inst_map = .{},
.arith_type_width = .{},
.sections = undefined,
};
var maybe_function_section: ?usize = null;
// First pass through the module to verify basic structure and
// to gather some initial stuff for more detailed analysis.
// We want to check some stuff that Instruction.Iterator is no good for,
// so just iterate manually.
var offset: usize = 0;
while (offset < binary.instructions.len) {
const len = binary.instructions[offset] >> 16;
if (len == 0 or len + offset > binary.instructions.len) {
log.err("invalid instruction format: len={}, end={}, module len={}", .{ len, len + offset, binary.instructions.len });
return error.InvalidPhysicalFormat;
}
defer offset += len;
// We can't really efficiently use non-exhaustive enums here, because we would
// need to manually write out all valid cases. Since we have this map anyway, just
// use that.
const opcode: Opcode = @enumFromInt(@as(u16, @truncate(binary.instructions[offset])));
const inst_spec = self.getInstSpec(opcode) orelse {
log.err("invalid opcode for core set: {}", .{@intFromEnum(opcode)});
return error.InvalidOpcode;
};
const operands = binary.instructions[offset..][1..len];
switch (opcode) {
.OpExtInstImport => {
const set_name = std.mem.sliceTo(std.mem.sliceAsBytes(operands[1..]), 0);
const set = std.meta.stringToEnum(InstructionSet, set_name) orelse {
log.err("invalid instruction set '{s}'", .{set_name});
return error.InvalidExtInstImport;
};
if (set == .core) return error.InvalidExtInstImport;
try binary.ext_inst_map.put(self.a, @enumFromInt(operands[0]), set);
},
.OpTypeInt, .OpTypeFloat => {
const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[0]));
if (entry.found_existing) return error.DuplicateId;
entry.value_ptr.* = std.math.cast(u16, operands[1]) orelse return error.InvalidOperands;
},
.OpFunction => if (maybe_function_section == null) {
maybe_function_section = offset;
},
else => {},
}
// OpSwitch takes a value as argument, not an OpType... hence we need to populate arith_type_width
// with ALL operations that return an int or float.
const spec_operands = inst_spec.operands;
if (spec_operands.len >= 2 and
spec_operands[0].kind == .id_result_type and
spec_operands[1].kind == .id_result)
{
if (operands.len < 2) return error.InvalidOperands;
if (binary.arith_type_width.get(@enumFromInt(operands[0]))) |width| {
const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[1]));
if (entry.found_existing) return error.DuplicateId;
entry.value_ptr.* = width;
}
}
}
binary.sections = .{
.functions = maybe_function_section orelse binary.instructions.len,
};
return binary;
}
/// Parse offsets in the instruction that contain result-ids.
/// Returned offsets are relative to inst.operands.
/// Returns in an arraylist to armortize allocations.
pub fn parseInstructionResultIds(
self: *Parser,
binary: BinaryModule,
inst: Instruction,
offsets: *std.ArrayList(u16),
) !void {
const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(inst.opcode))).?;
const operands = InstructionSet.core.instructions()[index].operands;
var offset: usize = 0;
switch (inst.opcode) {
.OpSpecConstantOp => {
assert(operands[0].kind == .id_result_type);
assert(operands[1].kind == .id_result);
offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);
if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
const spec_opcode = std.math.cast(u16, inst.operands[offset]) orelse return error.InvalidPhysicalFormat;
const spec_index = self.opcode_table.get(mapSetAndOpcode(.core, spec_opcode)) orelse
return error.InvalidPhysicalFormat;
const spec_operands = InstructionSet.core.instructions()[spec_index].operands;
assert(spec_operands[0].kind == .id_result_type);
assert(spec_operands[1].kind == .id_result);
offset = try self.parseOperandsResultIds(binary, inst, spec_operands[2..], offset + 1, offsets);
},
.OpExtInst => {
assert(operands[0].kind == .id_result_type);
assert(operands[1].kind == .id_result);
offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);
if (offset + 1 >= inst.operands.len) return error.InvalidPhysicalFormat;
const set_id: ResultId = @enumFromInt(inst.operands[offset]);
try offsets.append(@intCast(offset));
const set = binary.ext_inst_map.get(set_id) orelse {
log.err("invalid instruction set {}", .{@intFromEnum(set_id)});
return error.InvalidId;
};
const ext_opcode = std.math.cast(u16, inst.operands[offset + 1]) orelse return error.InvalidPhysicalFormat;
const ext_index = self.opcode_table.get(mapSetAndOpcode(set, ext_opcode)) orelse
return error.InvalidPhysicalFormat;
const ext_operands = set.instructions()[ext_index].operands;
offset = try self.parseOperandsResultIds(binary, inst, ext_operands, offset + 2, offsets);
},
else => {
offset = try self.parseOperandsResultIds(binary, inst, operands, offset, offsets);
},
}
if (offset != inst.operands.len) return error.InvalidPhysicalFormat;
}
fn parseOperandsResultIds(
self: *Parser,
binary: BinaryModule,
inst: Instruction,
operands: []const spec.Operand,
start_offset: usize,
offsets: *std.ArrayList(u16),
) !usize {
var offset = start_offset;
for (operands) |operand| {
offset = try self.parseOperandResultIds(binary, inst, operand, offset, offsets);
}
return offset;
}
fn parseOperandResultIds(
self: *Parser,
binary: BinaryModule,
inst: Instruction,
operand: spec.Operand,
start_offset: usize,
offsets: *std.ArrayList(u16),
) !usize {
var offset = start_offset;
switch (operand.quantifier) {
.variadic => while (offset < inst.operands.len) {
offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
},
.optional => if (offset < inst.operands.len) {
offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
},
.required => {
offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
},
}
return offset;
}
fn parseOperandKindResultIds(
self: *Parser,
binary: BinaryModule,
inst: Instruction,
kind: spec.OperandKind,
start_offset: usize,
offsets: *std.ArrayList(u16),
) !usize {
var offset = start_offset;
if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
switch (kind.category()) {
.bit_enum => {
const mask = inst.operands[offset];
offset += 1;
for (kind.enumerants()) |enumerant| {
if ((mask & enumerant.value) != 0) {
for (enumerant.parameters) |param_kind| {
offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
}
}
}
},
.value_enum => {
const value = inst.operands[offset];
offset += 1;
for (kind.enumerants()) |enumerant| {
if (value == enumerant.value) {
for (enumerant.parameters) |param_kind| {
offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
}
break;
}
}
},
.id => {
try offsets.append(@intCast(offset));
offset += 1;
},
else => switch (kind) {
.literal_integer, .literal_float => offset += 1,
.literal_string => while (true) {
if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
const word = inst.operands[offset];
offset += 1;
if (word & 0xFF000000 == 0 or
word & 0x00FF0000 == 0 or
word & 0x0000FF00 == 0 or
word & 0x000000FF == 0)
{
break;
}
},
.literal_context_dependent_number => {
assert(inst.opcode == .OpConstant or inst.opcode == .OpSpecConstantOp);
const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
log.err("invalid LiteralContextDependentNumber type {}", .{inst.operands[0]});
return error.InvalidId;
};
offset += switch (bit_width) {
1...32 => 1,
33...64 => 2,
else => unreachable,
};
},
.literal_ext_inst_integer => unreachable,
.literal_spec_constant_op_integer => unreachable,
.pair_literal_integer_id_ref => { // Switch case
assert(inst.opcode == .OpSwitch);
const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
log.err("invalid OpSwitch type {}", .{inst.operands[0]});
return error.InvalidId;
};
offset += switch (bit_width) {
1...32 => 1,
33...64 => 2,
else => unreachable,
};
try offsets.append(@intCast(offset));
offset += 1;
},
.pair_id_ref_literal_integer => {
try offsets.append(@intCast(offset));
offset += 2;
},
.pair_id_ref_id_ref => {
try offsets.append(@intCast(offset));
try offsets.append(@intCast(offset + 1));
offset += 2;
},
else => unreachable,
},
}
return offset;
}
};