mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 05:44:20 +00:00
464 lines
18 KiB
Zig
464 lines
18 KiB
Zig
const std = @import("std");
|
|
const assert = std.debug.assert;
|
|
const Allocator = std.mem.Allocator;
|
|
const log = std.log.scoped(.spirv_parse);
|
|
|
|
const spec = @import("../../codegen/spirv/spec.zig");
|
|
const Opcode = spec.Opcode;
|
|
const Word = spec.Word;
|
|
const InstructionSet = spec.InstructionSet;
|
|
const ResultId = spec.Id;
|
|
|
|
const BinaryModule = @This();
|
|
|
|
pub const header_words = 5;
|
|
|
|
/// The module SPIR-V version.
|
|
version: spec.Version,
|
|
|
|
/// The generator magic number.
|
|
generator_magic: u32,
|
|
|
|
/// The result-id bound of this SPIR-V module.
|
|
id_bound: u32,
|
|
|
|
/// The instructions of this module. This does not contain the header.
|
|
instructions: []const Word,
|
|
|
|
/// Maps OpExtInstImport result-ids to their InstructionSet.
|
|
ext_inst_map: std.AutoHashMapUnmanaged(ResultId, InstructionSet),
|
|
|
|
/// This map contains the width of arithmetic types (OpTypeInt and
|
|
/// OpTypeFloat). We need this information to correctly parse the operands
|
|
/// of Op(Spec)Constant and OpSwitch.
|
|
arith_type_width: std.AutoHashMapUnmanaged(ResultId, u16),
|
|
|
|
/// The starting offsets of some sections
|
|
sections: struct {
|
|
functions: usize,
|
|
},
|
|
|
|
pub fn deinit(self: *BinaryModule, a: Allocator) void {
|
|
self.ext_inst_map.deinit(a);
|
|
self.arith_type_width.deinit(a);
|
|
self.* = undefined;
|
|
}
|
|
|
|
pub fn iterateInstructions(self: BinaryModule) Instruction.Iterator {
|
|
return Instruction.Iterator.init(self.instructions, 0);
|
|
}
|
|
|
|
pub fn iterateInstructionsFrom(self: BinaryModule, offset: usize) Instruction.Iterator {
|
|
return Instruction.Iterator.init(self.instructions, offset);
|
|
}
|
|
|
|
pub fn instructionAt(self: BinaryModule, offset: usize) Instruction {
|
|
var it = self.iterateInstructionsFrom(offset);
|
|
return it.next().?;
|
|
}
|
|
|
|
pub fn finalize(self: BinaryModule, a: Allocator) ![]Word {
|
|
const result = try a.alloc(Word, 5 + self.instructions.len);
|
|
errdefer a.free(result);
|
|
|
|
result[0] = spec.magic_number;
|
|
result[1] = @bitCast(self.version);
|
|
result[2] = spec.zig_generator_id;
|
|
result[3] = self.id_bound;
|
|
result[4] = 0; // Schema
|
|
|
|
@memcpy(result[5..], self.instructions);
|
|
return result;
|
|
}
|
|
|
|
/// Errors that can be raised when the module is not correct.
|
|
/// Note that the parser doesn't validate SPIR-V modules by a
|
|
/// long shot. It only yields errors that critically prevent
|
|
/// further analysis of the module.
|
|
pub const ParseError = error{
|
|
/// Raised when the module doesn't start with the SPIR-V magic.
|
|
/// This usually means that the module isn't actually SPIR-V.
|
|
InvalidMagic,
|
|
/// Raised when the module has an invalid "physical" format:
|
|
/// For example when the header is incomplete, or an instruction
|
|
/// has an illegal format.
|
|
InvalidPhysicalFormat,
|
|
/// OpExtInstImport was used with an unknown extension string.
|
|
InvalidExtInstImport,
|
|
/// The module had an instruction with an invalid (unknown) opcode.
|
|
InvalidOpcode,
|
|
/// An instruction's operands did not conform to the SPIR-V specification
|
|
/// for that instruction.
|
|
InvalidOperands,
|
|
/// A result-id was declared more than once.
|
|
DuplicateId,
|
|
/// Some ID did not resolve.
|
|
InvalidId,
|
|
/// This opcode or instruction is not supported yet.
|
|
UnsupportedOperation,
|
|
/// Parser ran out of memory.
|
|
OutOfMemory,
|
|
};
|
|
|
|
pub const Instruction = struct {
|
|
pub const Iterator = struct {
|
|
words: []const Word,
|
|
index: usize = 0,
|
|
offset: usize = 0,
|
|
|
|
pub fn init(words: []const Word, start_offset: usize) Iterator {
|
|
return .{ .words = words, .offset = start_offset };
|
|
}
|
|
|
|
pub fn next(self: *Iterator) ?Instruction {
|
|
if (self.offset >= self.words.len) return null;
|
|
|
|
const instruction_len = self.words[self.offset] >> 16;
|
|
defer self.offset += instruction_len;
|
|
defer self.index += 1;
|
|
assert(instruction_len != 0);
|
|
assert(self.offset < self.words.len);
|
|
|
|
return Instruction{
|
|
.opcode = @enumFromInt(self.words[self.offset] & 0xFFFF),
|
|
.index = self.index,
|
|
.offset = self.offset,
|
|
.operands = self.words[self.offset..][1..instruction_len],
|
|
};
|
|
}
|
|
};
|
|
|
|
/// The opcode for this instruction.
|
|
opcode: Opcode,
|
|
/// The instruction's index.
|
|
index: usize,
|
|
/// The instruction's word offset in the module.
|
|
offset: usize,
|
|
/// The raw (unparsed) operands for this instruction.
|
|
operands: []const Word,
|
|
};
|
|
|
|
/// This parser contains information (acceleration tables)
|
|
/// that can be persisted across different modules. This is
|
|
/// used to initialize the module, and is also used when
|
|
/// further analyzing it.
|
|
pub const Parser = struct {
|
|
/// The allocator used to allocate this parser's structures,
|
|
/// and also the structures of any parsed module.
|
|
a: Allocator,
|
|
|
|
/// Maps (instruction set, opcode) => instruction index (for instruction set)
|
|
opcode_table: std.AutoHashMapUnmanaged(u32, u16) = .empty,
|
|
|
|
pub fn init(a: Allocator) !Parser {
|
|
var self = Parser{
|
|
.a = a,
|
|
};
|
|
errdefer self.deinit();
|
|
|
|
inline for (std.meta.tags(InstructionSet)) |set| {
|
|
const instructions = set.instructions();
|
|
try self.opcode_table.ensureUnusedCapacity(a, @intCast(instructions.len));
|
|
for (instructions, 0..) |inst, i| {
|
|
// Note: Some instructions may alias another. In this case we don't really care
|
|
// which one is first: they all (should) have the same operands anyway. Just pick
|
|
// the first, which is usually the core, KHR or EXT variant.
|
|
const entry = self.opcode_table.getOrPutAssumeCapacity(mapSetAndOpcode(set, @intCast(inst.opcode)));
|
|
if (!entry.found_existing) {
|
|
entry.value_ptr.* = @intCast(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
return self;
|
|
}
|
|
|
|
pub fn deinit(self: *Parser) void {
|
|
self.opcode_table.deinit(self.a);
|
|
}
|
|
|
|
fn mapSetAndOpcode(set: InstructionSet, opcode: u16) u32 {
|
|
return (@as(u32, @intFromEnum(set)) << 16) | opcode;
|
|
}
|
|
|
|
pub fn getInstSpec(self: Parser, opcode: Opcode) ?spec.Instruction {
|
|
const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(opcode))) orelse return null;
|
|
return InstructionSet.core.instructions()[index];
|
|
}
|
|
|
|
pub fn parse(self: *Parser, module: []const u32) ParseError!BinaryModule {
|
|
if (module[0] != spec.magic_number) {
|
|
return error.InvalidMagic;
|
|
} else if (module.len < header_words) {
|
|
log.err("module only has {}/{} header words", .{ module.len, header_words });
|
|
return error.InvalidPhysicalFormat;
|
|
}
|
|
|
|
var binary = BinaryModule{
|
|
.version = @bitCast(module[1]),
|
|
.generator_magic = module[2],
|
|
.id_bound = module[3],
|
|
.instructions = module[header_words..],
|
|
.ext_inst_map = .{},
|
|
.arith_type_width = .{},
|
|
.sections = undefined,
|
|
};
|
|
|
|
var maybe_function_section: ?usize = null;
|
|
|
|
// First pass through the module to verify basic structure and
|
|
// to gather some initial stuff for more detailed analysis.
|
|
// We want to check some stuff that Instruction.Iterator is no good for,
|
|
// so just iterate manually.
|
|
var offset: usize = 0;
|
|
while (offset < binary.instructions.len) {
|
|
const len = binary.instructions[offset] >> 16;
|
|
if (len == 0 or len + offset > binary.instructions.len) {
|
|
log.err("invalid instruction format: len={}, end={}, module len={}", .{ len, len + offset, binary.instructions.len });
|
|
return error.InvalidPhysicalFormat;
|
|
}
|
|
defer offset += len;
|
|
|
|
// We can't really efficiently use non-exhaustive enums here, because we would
|
|
// need to manually write out all valid cases. Since we have this map anyway, just
|
|
// use that.
|
|
const opcode: Opcode = @enumFromInt(@as(u16, @truncate(binary.instructions[offset])));
|
|
const inst_spec = self.getInstSpec(opcode) orelse {
|
|
log.err("invalid opcode for core set: {}", .{@intFromEnum(opcode)});
|
|
return error.InvalidOpcode;
|
|
};
|
|
|
|
const operands = binary.instructions[offset..][1..len];
|
|
switch (opcode) {
|
|
.OpExtInstImport => {
|
|
const set_name = std.mem.sliceTo(std.mem.sliceAsBytes(operands[1..]), 0);
|
|
const set = std.meta.stringToEnum(InstructionSet, set_name) orelse {
|
|
log.err("invalid instruction set '{s}'", .{set_name});
|
|
return error.InvalidExtInstImport;
|
|
};
|
|
if (set == .core) return error.InvalidExtInstImport;
|
|
try binary.ext_inst_map.put(self.a, @enumFromInt(operands[0]), set);
|
|
},
|
|
.OpTypeInt, .OpTypeFloat => {
|
|
const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[0]));
|
|
if (entry.found_existing) return error.DuplicateId;
|
|
entry.value_ptr.* = std.math.cast(u16, operands[1]) orelse return error.InvalidOperands;
|
|
},
|
|
.OpFunction => if (maybe_function_section == null) {
|
|
maybe_function_section = offset;
|
|
},
|
|
else => {},
|
|
}
|
|
|
|
// OpSwitch takes a value as argument, not an OpType... hence we need to populate arith_type_width
|
|
// with ALL operations that return an int or float.
|
|
const spec_operands = inst_spec.operands;
|
|
if (spec_operands.len >= 2 and
|
|
spec_operands[0].kind == .id_result_type and
|
|
spec_operands[1].kind == .id_result)
|
|
{
|
|
if (operands.len < 2) return error.InvalidOperands;
|
|
if (binary.arith_type_width.get(@enumFromInt(operands[0]))) |width| {
|
|
const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[1]));
|
|
if (entry.found_existing) return error.DuplicateId;
|
|
entry.value_ptr.* = width;
|
|
}
|
|
}
|
|
}
|
|
|
|
binary.sections = .{
|
|
.functions = maybe_function_section orelse binary.instructions.len,
|
|
};
|
|
|
|
return binary;
|
|
}
|
|
|
|
/// Parse offsets in the instruction that contain result-ids.
|
|
/// Returned offsets are relative to inst.operands.
|
|
/// Returns in an arraylist to armortize allocations.
|
|
pub fn parseInstructionResultIds(
|
|
self: *Parser,
|
|
binary: BinaryModule,
|
|
inst: Instruction,
|
|
offsets: *std.ArrayList(u16),
|
|
) !void {
|
|
const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(inst.opcode))).?;
|
|
const operands = InstructionSet.core.instructions()[index].operands;
|
|
|
|
var offset: usize = 0;
|
|
switch (inst.opcode) {
|
|
.OpSpecConstantOp => {
|
|
assert(operands[0].kind == .id_result_type);
|
|
assert(operands[1].kind == .id_result);
|
|
offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);
|
|
|
|
if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
|
|
const spec_opcode = std.math.cast(u16, inst.operands[offset]) orelse return error.InvalidPhysicalFormat;
|
|
const spec_index = self.opcode_table.get(mapSetAndOpcode(.core, spec_opcode)) orelse
|
|
return error.InvalidPhysicalFormat;
|
|
const spec_operands = InstructionSet.core.instructions()[spec_index].operands;
|
|
assert(spec_operands[0].kind == .id_result_type);
|
|
assert(spec_operands[1].kind == .id_result);
|
|
offset = try self.parseOperandsResultIds(binary, inst, spec_operands[2..], offset + 1, offsets);
|
|
},
|
|
.OpExtInst => {
|
|
assert(operands[0].kind == .id_result_type);
|
|
assert(operands[1].kind == .id_result);
|
|
offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);
|
|
|
|
if (offset + 1 >= inst.operands.len) return error.InvalidPhysicalFormat;
|
|
const set_id: ResultId = @enumFromInt(inst.operands[offset]);
|
|
try offsets.append(@intCast(offset));
|
|
const set = binary.ext_inst_map.get(set_id) orelse {
|
|
log.err("invalid instruction set {}", .{@intFromEnum(set_id)});
|
|
return error.InvalidId;
|
|
};
|
|
const ext_opcode = std.math.cast(u16, inst.operands[offset + 1]) orelse return error.InvalidPhysicalFormat;
|
|
const ext_index = self.opcode_table.get(mapSetAndOpcode(set, ext_opcode)) orelse
|
|
return error.InvalidPhysicalFormat;
|
|
const ext_operands = set.instructions()[ext_index].operands;
|
|
offset = try self.parseOperandsResultIds(binary, inst, ext_operands, offset + 2, offsets);
|
|
},
|
|
else => {
|
|
offset = try self.parseOperandsResultIds(binary, inst, operands, offset, offsets);
|
|
},
|
|
}
|
|
|
|
if (offset != inst.operands.len) return error.InvalidPhysicalFormat;
|
|
}
|
|
|
|
fn parseOperandsResultIds(
|
|
self: *Parser,
|
|
binary: BinaryModule,
|
|
inst: Instruction,
|
|
operands: []const spec.Operand,
|
|
start_offset: usize,
|
|
offsets: *std.ArrayList(u16),
|
|
) !usize {
|
|
var offset = start_offset;
|
|
for (operands) |operand| {
|
|
offset = try self.parseOperandResultIds(binary, inst, operand, offset, offsets);
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
fn parseOperandResultIds(
|
|
self: *Parser,
|
|
binary: BinaryModule,
|
|
inst: Instruction,
|
|
operand: spec.Operand,
|
|
start_offset: usize,
|
|
offsets: *std.ArrayList(u16),
|
|
) !usize {
|
|
var offset = start_offset;
|
|
switch (operand.quantifier) {
|
|
.variadic => while (offset < inst.operands.len) {
|
|
offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
|
|
},
|
|
.optional => if (offset < inst.operands.len) {
|
|
offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
|
|
},
|
|
.required => {
|
|
offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
|
|
},
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
fn parseOperandKindResultIds(
|
|
self: *Parser,
|
|
binary: BinaryModule,
|
|
inst: Instruction,
|
|
kind: spec.OperandKind,
|
|
start_offset: usize,
|
|
offsets: *std.ArrayList(u16),
|
|
) !usize {
|
|
var offset = start_offset;
|
|
if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
|
|
|
|
switch (kind.category()) {
|
|
.bit_enum => {
|
|
const mask = inst.operands[offset];
|
|
offset += 1;
|
|
for (kind.enumerants()) |enumerant| {
|
|
if ((mask & enumerant.value) != 0) {
|
|
for (enumerant.parameters) |param_kind| {
|
|
offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
|
|
}
|
|
}
|
|
}
|
|
},
|
|
.value_enum => {
|
|
const value = inst.operands[offset];
|
|
offset += 1;
|
|
for (kind.enumerants()) |enumerant| {
|
|
if (value == enumerant.value) {
|
|
for (enumerant.parameters) |param_kind| {
|
|
offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
},
|
|
.id => {
|
|
try offsets.append(@intCast(offset));
|
|
offset += 1;
|
|
},
|
|
else => switch (kind) {
|
|
.literal_integer, .literal_float => offset += 1,
|
|
.literal_string => while (true) {
|
|
if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
|
|
const word = inst.operands[offset];
|
|
offset += 1;
|
|
|
|
if (word & 0xFF000000 == 0 or
|
|
word & 0x00FF0000 == 0 or
|
|
word & 0x0000FF00 == 0 or
|
|
word & 0x000000FF == 0)
|
|
{
|
|
break;
|
|
}
|
|
},
|
|
.literal_context_dependent_number => {
|
|
assert(inst.opcode == .OpConstant or inst.opcode == .OpSpecConstantOp);
|
|
const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
|
|
log.err("invalid LiteralContextDependentNumber type {}", .{inst.operands[0]});
|
|
return error.InvalidId;
|
|
};
|
|
offset += switch (bit_width) {
|
|
1...32 => 1,
|
|
33...64 => 2,
|
|
else => unreachable,
|
|
};
|
|
},
|
|
.literal_ext_inst_integer => unreachable,
|
|
.literal_spec_constant_op_integer => unreachable,
|
|
.pair_literal_integer_id_ref => { // Switch case
|
|
assert(inst.opcode == .OpSwitch);
|
|
const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
|
|
log.err("invalid OpSwitch type {}", .{inst.operands[0]});
|
|
return error.InvalidId;
|
|
};
|
|
offset += switch (bit_width) {
|
|
1...32 => 1,
|
|
33...64 => 2,
|
|
else => unreachable,
|
|
};
|
|
try offsets.append(@intCast(offset));
|
|
offset += 1;
|
|
},
|
|
.pair_id_ref_literal_integer => {
|
|
try offsets.append(@intCast(offset));
|
|
offset += 2;
|
|
},
|
|
.pair_id_ref_id_ref => {
|
|
try offsets.append(@intCast(offset));
|
|
try offsets.append(@intCast(offset + 1));
|
|
offset += 2;
|
|
},
|
|
else => unreachable,
|
|
},
|
|
}
|
|
return offset;
|
|
}
|
|
};
|