zig/src/codegen/spirv/Assembler.zig
2025-03-19 11:45:35 +08:00

1144 lines
44 KiB
Zig
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const Assembler = @This();
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const spec = @import("spec.zig");
const Opcode = spec.Opcode;
const Word = spec.Word;
const IdRef = spec.IdRef;
const IdResult = spec.IdResult;
const StorageClass = spec.StorageClass;
const SpvModule = @import("Module.zig");
/// Represents a token in the assembly template.
const Token = struct {
tag: Tag,
start: u32,
end: u32,
const Tag = enum {
/// Returned when there was no more input to match.
eof,
/// %identifier
result_id,
/// %identifier when appearing on the LHS of an equals sign.
/// While not technically a token, its relatively easy to resolve
/// this during lexical analysis and relieves a bunch of headaches
/// during parsing.
result_id_assign,
/// Mask, int, or float. These are grouped together as some
/// SPIR-V enumerants look a bit like integers as well (for example
/// "3D"), and so it is easier to just interpret them as the expected
/// type when resolving an instruction's operands.
value,
/// An enumerant that looks like an opcode, that is, OpXxxx.
/// Not necessarily a *valid* opcode.
opcode,
/// String literals.
/// Note, this token is also returned for unterminated
/// strings. In this case the closing " is not present.
string,
/// |.
pipe,
/// =.
equals,
/// $identifier. This is used (for now) for constant values, like integers.
/// These can be used in place of a normal `value`.
placeholder,
fn name(self: Tag) []const u8 {
return switch (self) {
.eof => "<end of input>",
.result_id => "<result-id>",
.result_id_assign => "<assigned result-id>",
.value => "<value>",
.opcode => "<opcode>",
.string => "<string literal>",
.pipe => "'|'",
.equals => "'='",
.placeholder => "<placeholder>",
};
}
};
};
/// This union represents utility information for a decoded operand.
/// Note that this union only needs to maintain a minimal amount of
/// bookkeeping: these values are enough to either decode the operands
/// into a spec type, or emit it directly into its binary form.
const Operand = union(enum) {
/// Any 'simple' 32-bit value. This could be a mask or
/// enumerant, etc, depending on the operands.
value: u32,
/// An int- or float literal encoded as 1 word. This may be
/// a 32-bit literal or smaller, already in the proper format:
/// the opper bits are 0 for floats and unsigned ints, and sign-extended
/// for signed ints.
literal32: u32,
/// An int- or float literal encoded as 2 words. This may be a 33-bit
/// to 64 bit literal, already in the proper format:
/// the opper bits are 0 for floats and unsigned ints, and sign-extended
/// for signed ints.
literal64: u64,
/// A result-id which is assigned to in this instruction. If present,
/// this is the first operand of the instruction.
result_id: AsmValue.Ref,
/// A result-id which referred to (not assigned to) in this instruction.
ref_id: AsmValue.Ref,
/// Offset into `inst.string_bytes`. The string ends at the next zero-terminator.
string: u32,
};
/// A structure representing an error message that the assembler may return, when
/// the assembly source is not syntactically or semantically correct.
const ErrorMsg = struct {
/// The offset in bytes from the start of `src` that this error occured.
byte_offset: u32,
/// An explanatory error message.
/// Memory is owned by `self.gpa`. TODO: Maybe allocate this with an arena
/// allocator if it is needed elsewhere?
msg: []const u8,
};
/// Possible errors the `assemble` function may return.
const Error = error{ AssembleFail, OutOfMemory };
/// This union is used to keep track of results of spir-v instructions. This can either be just a plain
/// result-id, in the case of most instructions, or for example a type that is constructed from
/// an OpTypeXxx instruction.
const AsmValue = union(enum) {
/// The results are stored in an array hash map, and can be referred to either by name (without the %),
/// or by values of this index type.
pub const Ref = u32;
/// This result-value is the RHS of the current instruction.
just_declared,
/// This is used as placeholder for ref-ids of which the result-id is not yet known.
/// It will be further resolved at a later stage to a more concrete forward reference.
unresolved_forward_reference,
/// This result-value is a normal result produced by a different instruction.
value: IdRef,
/// This result-value represents a type registered into the module's type system.
ty: IdRef,
/// This is a pre-supplied constant integer value.
constant: u32,
/// This is a pre-supplied constant string value.
string: []const u8,
/// Retrieve the result-id of this AsmValue. Asserts that this AsmValue
/// is of a variant that allows the result to be obtained (not an unresolved
/// forward declaration, not in the process of being declared, etc).
pub fn resultId(self: AsmValue) IdRef {
return switch (self) {
.just_declared,
.unresolved_forward_reference,
// TODO: Lower this value as constant?
.constant,
.string,
=> unreachable,
.value => |result| result,
.ty => |result| result,
};
}
};
/// This map type maps results to values. Results can be addressed either by name (without the %), or by
/// AsmValue.Ref in AsmValueMap.keys/.values.
const AsmValueMap = std.StringArrayHashMapUnmanaged(AsmValue);
/// An allocator used for common allocations.
gpa: Allocator,
/// A list of errors that occured during processing the assembly.
errors: std.ArrayListUnmanaged(ErrorMsg) = .empty,
/// The source code that is being assembled.
/// This is set when calling `assemble()`.
src: []const u8 = undefined,
/// The module that this assembly is associated to.
/// Instructions like OpType*, OpDecorate, etc are emitted into this module.
spv: *SpvModule,
/// The function that the function-specific instructions should be emitted to.
func: *SpvModule.Fn,
/// `self.src` tokenized.
tokens: std.ArrayListUnmanaged(Token) = .empty,
/// The token that is next during parsing.
current_token: u32 = 0,
/// This field groups the properties of the instruction that is currently
/// being parsed or has just been parsed.
inst: struct {
/// The opcode of the current instruction.
opcode: Opcode = undefined,
/// Operands of the current instruction.
operands: std.ArrayListUnmanaged(Operand) = .empty,
/// This is where string data resides. Strings are zero-terminated.
string_bytes: std.ArrayListUnmanaged(u8) = .empty,
/// Return a reference to the result of this instruction, if any.
fn result(self: @This()) ?AsmValue.Ref {
// The result, if present, is either the first or second
// operand of an instruction.
for (self.operands.items[0..@min(self.operands.items.len, 2)]) |op| {
switch (op) {
.result_id => |index| return index,
else => {},
}
}
return null;
}
} = .{},
/// This map maps results to their tracked values.
value_map: AsmValueMap = .{},
/// This set is used to quickly transform from an opcode name to the
/// index in its instruction set. The index of the key is the
/// index in `spec.InstructionSet.core.instructions()`.
instruction_map: std.StringArrayHashMapUnmanaged(void) = .empty,
/// Free the resources owned by this assembler.
pub fn deinit(self: *Assembler) void {
for (self.errors.items) |err| {
self.gpa.free(err.msg);
}
self.tokens.deinit(self.gpa);
self.errors.deinit(self.gpa);
self.inst.operands.deinit(self.gpa);
self.inst.string_bytes.deinit(self.gpa);
self.value_map.deinit(self.gpa);
self.instruction_map.deinit(self.gpa);
}
pub fn assemble(self: *Assembler, src: []const u8) Error!void {
self.src = src;
self.errors.clearRetainingCapacity();
// Populate the opcode map if it isn't already
if (self.instruction_map.count() == 0) {
const instructions = spec.InstructionSet.core.instructions();
try self.instruction_map.ensureUnusedCapacity(self.gpa, @intCast(instructions.len));
for (spec.InstructionSet.core.instructions(), 0..) |inst, i| {
const entry = try self.instruction_map.getOrPut(self.gpa, inst.name);
assert(entry.index == i);
}
}
try self.tokenize();
while (!self.testToken(.eof)) {
try self.parseInstruction();
try self.processInstruction();
}
if (self.errors.items.len > 0)
return error.AssembleFail;
}
fn addError(self: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) !void {
const msg = try std.fmt.allocPrint(self.gpa, fmt, args);
errdefer self.gpa.free(msg);
try self.errors.append(self.gpa, .{
.byte_offset = offset,
.msg = msg,
});
}
fn fail(self: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) Error {
try self.addError(offset, fmt, args);
return error.AssembleFail;
}
fn todo(self: *Assembler, comptime fmt: []const u8, args: anytype) Error {
return self.fail(0, "todo: " ++ fmt, args);
}
/// Attempt to process the instruction currently in `self.inst`.
/// This for example emits the instruction in the module or function, or
/// records type definitions.
/// If this function returns `error.AssembleFail`, an explanatory
/// error message has already been emitted into `self.errors`.
fn processInstruction(self: *Assembler) !void {
const result: AsmValue = switch (self.inst.opcode) {
.OpEntryPoint => {
return self.fail(0, "cannot export entry points via OpEntryPoint, export the kernel using callconv(.kernel)", .{});
},
.OpCapability => {
try self.spv.addCapability(@enumFromInt(self.inst.operands.items[0].value));
return;
},
.OpExtension => {
const ext_name_offset = self.inst.operands.items[0].string;
const ext_name = std.mem.sliceTo(self.inst.string_bytes.items[ext_name_offset..], 0);
try self.spv.addExtension(ext_name);
return;
},
.OpExtInstImport => blk: {
const set_name_offset = self.inst.operands.items[1].string;
const set_name = std.mem.sliceTo(self.inst.string_bytes.items[set_name_offset..], 0);
const set_tag = std.meta.stringToEnum(spec.InstructionSet, set_name) orelse {
return self.fail(set_name_offset, "unknown instruction set: {s}", .{set_name});
};
break :blk .{ .value = try self.spv.importInstructionSet(set_tag) };
},
else => switch (self.inst.opcode.class()) {
.TypeDeclaration => try self.processTypeInstruction(),
else => if (try self.processGenericInstruction()) |result|
result
else
return,
},
};
const result_ref = self.inst.result().?;
switch (self.value_map.values()[result_ref]) {
.just_declared => self.value_map.values()[result_ref] = result,
else => {
// TODO: Improve source location.
const name = self.value_map.keys()[result_ref];
return self.fail(0, "duplicate definition of %{s}", .{name});
},
}
}
/// Record `self.inst` into the module's type system, and return the AsmValue that
/// refers to the result.
fn processTypeInstruction(self: *Assembler) !AsmValue {
const operands = self.inst.operands.items;
const section = &self.spv.sections.types_globals_constants;
const id = switch (self.inst.opcode) {
.OpTypeVoid => try self.spv.voidType(),
.OpTypeBool => try self.spv.boolType(),
.OpTypeInt => blk: {
const signedness: std.builtin.Signedness = switch (operands[2].literal32) {
0 => .unsigned,
1 => .signed,
else => {
// TODO: Improve source location.
return self.fail(0, "{} is not a valid signedness (expected 0 or 1)", .{operands[2].literal32});
},
};
const width = std.math.cast(u16, operands[1].literal32) orelse {
return self.fail(0, "int type of {} bits is too large", .{operands[1].literal32});
};
break :blk try self.spv.intType(signedness, width);
},
.OpTypeFloat => blk: {
const bits = operands[1].literal32;
switch (bits) {
16, 32, 64 => {},
else => {
return self.fail(0, "{} is not a valid bit count for floats (expected 16, 32 or 64)", .{bits});
},
}
break :blk try self.spv.floatType(@intCast(bits));
},
.OpTypeVector => blk: {
const child_type = try self.resolveRefId(operands[1].ref_id);
break :blk try self.spv.vectorType(operands[2].literal32, child_type);
},
.OpTypeArray => {
// TODO: The length of an OpTypeArray is determined by a constant (which may be a spec constant),
// and so some consideration must be taken when entering this in the type system.
return self.todo("process OpTypeArray", .{});
},
.OpTypeRuntimeArray => blk: {
const element_type = try self.resolveRefId(operands[1].ref_id);
const result_id = self.spv.allocId();
try section.emit(self.spv.gpa, .OpTypeRuntimeArray, .{
.id_result = result_id,
.element_type = element_type,
});
break :blk result_id;
},
.OpTypePointer => blk: {
const storage_class: StorageClass = @enumFromInt(operands[1].value);
const child_type = try self.resolveRefId(operands[2].ref_id);
const result_id = self.spv.allocId();
try section.emit(self.spv.gpa, .OpTypePointer, .{
.id_result = result_id,
.storage_class = storage_class,
.type = child_type,
});
break :blk result_id;
},
.OpTypeStruct => blk: {
const ids = try self.gpa.alloc(IdRef, operands[1..].len);
defer self.gpa.free(ids);
for (operands[1..], ids) |op, *id| id.* = try self.resolveRefId(op.ref_id);
const result_id = self.spv.allocId();
try self.spv.structType(result_id, ids, null);
break :blk result_id;
},
.OpTypeImage => blk: {
const sampled_type = try self.resolveRefId(operands[1].ref_id);
const result_id = self.spv.allocId();
try section.emit(self.gpa, .OpTypeImage, .{
.id_result = result_id,
.sampled_type = sampled_type,
.dim = @enumFromInt(operands[2].value),
.depth = operands[3].literal32,
.arrayed = operands[4].literal32,
.ms = operands[5].literal32,
.sampled = operands[6].literal32,
.image_format = @enumFromInt(operands[7].value),
});
break :blk result_id;
},
.OpTypeSampler => blk: {
const result_id = self.spv.allocId();
try section.emit(self.gpa, .OpTypeSampler, .{ .id_result = result_id });
break :blk result_id;
},
.OpTypeSampledImage => blk: {
const image_type = try self.resolveRefId(operands[1].ref_id);
const result_id = self.spv.allocId();
try section.emit(self.gpa, .OpTypeSampledImage, .{ .id_result = result_id, .image_type = image_type });
break :blk result_id;
},
.OpTypeFunction => blk: {
const param_operands = operands[2..];
const return_type = try self.resolveRefId(operands[1].ref_id);
const param_types = try self.spv.gpa.alloc(IdRef, param_operands.len);
defer self.spv.gpa.free(param_types);
for (param_types, param_operands) |*param, operand| {
param.* = try self.resolveRefId(operand.ref_id);
}
const result_id = self.spv.allocId();
try section.emit(self.spv.gpa, .OpTypeFunction, .{
.id_result = result_id,
.return_type = return_type,
.id_ref_2 = param_types,
});
break :blk result_id;
},
else => return self.todo("process type instruction {s}", .{@tagName(self.inst.opcode)}),
};
return AsmValue{ .ty = id };
}
/// Emit `self.inst` into `self.spv` and `self.func`, and return the AsmValue
/// that this produces (if any). This function processes common instructions:
/// - No forward references are allowed in operands.
/// - Target section is determined from instruction type.
/// - Function-local instructions are emitted in `self.func`.
fn processGenericInstruction(self: *Assembler) !?AsmValue {
const operands = self.inst.operands.items;
var maybe_spv_decl_index: ?SpvModule.Decl.Index = null;
const section = switch (self.inst.opcode.class()) {
.ConstantCreation => &self.spv.sections.types_globals_constants,
.Annotation => &self.spv.sections.annotations,
.TypeDeclaration => unreachable, // Handled elsewhere.
else => switch (self.inst.opcode) {
.OpEntryPoint => unreachable,
.OpExecutionMode, .OpExecutionModeId => &self.spv.sections.execution_modes,
.OpVariable => section: {
const storage_class: spec.StorageClass = @enumFromInt(operands[2].value);
if (storage_class == .Function) break :section &self.func.prologue;
maybe_spv_decl_index = try self.spv.allocDecl(.global);
if (self.spv.version.minor < 4 and storage_class != .Input and storage_class != .Output) {
// Before version 1.4, the interfaces storage classes are limited to the Input and Output
break :section &self.spv.sections.types_globals_constants;
}
try self.func.decl_deps.put(self.spv.gpa, maybe_spv_decl_index.?, {});
// TODO: In theory this can be non-empty if there is an initializer which depends on another global...
try self.spv.declareDeclDeps(maybe_spv_decl_index.?, &.{});
break :section &self.spv.sections.types_globals_constants;
},
// Default case - to be worked out further.
else => &self.func.body,
},
};
var maybe_result_id: ?IdResult = null;
const first_word = section.instructions.items.len;
// At this point we're not quite sure how many operands this instruction is going to have,
// so insert 0 and patch up the actual opcode word later.
try section.ensureUnusedCapacity(self.spv.gpa, 1);
section.writeWord(0);
for (operands) |operand| {
switch (operand) {
.value, .literal32 => |word| {
try section.ensureUnusedCapacity(self.spv.gpa, 1);
section.writeWord(word);
},
.literal64 => |dword| {
try section.ensureUnusedCapacity(self.spv.gpa, 2);
section.writeDoubleWord(dword);
},
.result_id => {
maybe_result_id = if (maybe_spv_decl_index) |spv_decl_index|
self.spv.declPtr(spv_decl_index).result_id
else
self.spv.allocId();
try section.ensureUnusedCapacity(self.spv.gpa, 1);
section.writeOperand(IdResult, maybe_result_id.?);
},
.ref_id => |index| {
const result = try self.resolveRef(index);
try section.ensureUnusedCapacity(self.spv.gpa, 1);
section.writeOperand(spec.IdRef, result.resultId());
},
.string => |offset| {
const text = std.mem.sliceTo(self.inst.string_bytes.items[offset..], 0);
const size = std.math.divCeil(usize, text.len + 1, @sizeOf(Word)) catch unreachable;
try section.ensureUnusedCapacity(self.spv.gpa, size);
section.writeOperand(spec.LiteralString, text);
},
}
}
const actual_word_count = section.instructions.items.len - first_word;
section.instructions.items[first_word] |= @as(u32, @as(u16, @intCast(actual_word_count))) << 16 | @intFromEnum(self.inst.opcode);
if (maybe_result_id) |result| {
return AsmValue{ .value = result };
}
return null;
}
/// Resolve a value reference. This function makes sure that the reference is
/// not self-referential, but it does allow the result to be forward declared.
fn resolveMaybeForwardRef(self: *Assembler, ref: AsmValue.Ref) !AsmValue {
const value = self.value_map.values()[ref];
switch (value) {
.just_declared => {
const name = self.value_map.keys()[ref];
// TODO: Improve source location.
return self.fail(0, "self-referential parameter %{s}", .{name});
},
else => return value,
}
}
/// Resolve a value reference. This function
/// makes sure that the result is not self-referential, nor that it is forward declared.
fn resolveRef(self: *Assembler, ref: AsmValue.Ref) !AsmValue {
const value = try self.resolveMaybeForwardRef(ref);
switch (value) {
.just_declared => unreachable,
.unresolved_forward_reference => {
const name = self.value_map.keys()[ref];
// TODO: Improve source location.
return self.fail(0, "reference to undeclared result-id %{s}", .{name});
},
else => return value,
}
}
fn resolveRefId(self: *Assembler, ref: AsmValue.Ref) !IdRef {
const value = try self.resolveRef(ref);
return value.resultId();
}
/// Attempt to parse an instruction into `self.inst`.
/// If this function returns `error.AssembleFail`, an explanatory
/// error message has been emitted into `self.errors`.
fn parseInstruction(self: *Assembler) !void {
self.inst.opcode = undefined;
self.inst.operands.clearRetainingCapacity();
self.inst.string_bytes.clearRetainingCapacity();
const lhs_result_tok = self.currentToken();
const maybe_lhs_result: ?AsmValue.Ref = if (self.eatToken(.result_id_assign)) blk: {
const name = self.tokenText(lhs_result_tok)[1..];
const entry = try self.value_map.getOrPut(self.gpa, name);
try self.expectToken(.equals);
if (!entry.found_existing) {
entry.value_ptr.* = .just_declared;
}
break :blk @intCast(entry.index);
} else null;
const opcode_tok = self.currentToken();
if (maybe_lhs_result != null) {
try self.expectToken(.opcode);
} else if (!self.eatToken(.opcode)) {
return self.fail(opcode_tok.start, "expected start of instruction, found {s}", .{opcode_tok.tag.name()});
}
const opcode_text = self.tokenText(opcode_tok);
const index = self.instruction_map.getIndex(opcode_text) orelse {
return self.fail(opcode_tok.start, "invalid opcode '{s}'", .{opcode_text});
};
const inst = spec.InstructionSet.core.instructions()[index];
self.inst.opcode = @enumFromInt(inst.opcode);
const expected_operands = inst.operands;
// This is a loop because the result-id is not always the first operand.
const requires_lhs_result = for (expected_operands) |op| {
if (op.kind == .IdResult) break true;
} else false;
if (requires_lhs_result and maybe_lhs_result == null) {
return self.fail(opcode_tok.start, "opcode '{s}' expects result on left-hand side", .{@tagName(self.inst.opcode)});
} else if (!requires_lhs_result and maybe_lhs_result != null) {
return self.fail(
lhs_result_tok.start,
"opcode '{s}' does not expect a result-id on the left-hand side",
.{@tagName(self.inst.opcode)},
);
}
for (expected_operands) |operand| {
if (operand.kind == .IdResult) {
try self.inst.operands.append(self.gpa, .{ .result_id = maybe_lhs_result.? });
continue;
}
switch (operand.quantifier) {
.required => if (self.isAtInstructionBoundary()) {
return self.fail(
self.currentToken().start,
"missing required operand", // TODO: Operand name?
.{},
);
} else {
try self.parseOperand(operand.kind);
},
.optional => if (!self.isAtInstructionBoundary()) {
try self.parseOperand(operand.kind);
},
.variadic => while (!self.isAtInstructionBoundary()) {
try self.parseOperand(operand.kind);
},
}
}
}
/// Parse a single operand of a particular type.
fn parseOperand(self: *Assembler, kind: spec.OperandKind) Error!void {
switch (kind.category()) {
.bit_enum => try self.parseBitEnum(kind),
.value_enum => try self.parseValueEnum(kind),
.id => try self.parseRefId(),
else => switch (kind) {
.LiteralInteger => try self.parseLiteralInteger(),
.LiteralString => try self.parseString(),
.LiteralContextDependentNumber => try self.parseContextDependentNumber(),
.LiteralExtInstInteger => try self.parseLiteralExtInstInteger(),
.PairIdRefIdRef => try self.parsePhiSource(),
else => return self.todo("parse operand of type {s}", .{@tagName(kind)}),
},
}
}
/// Also handles parsing any required extra operands.
fn parseBitEnum(self: *Assembler, kind: spec.OperandKind) !void {
var tok = self.currentToken();
try self.expectToken(.value);
var text = self.tokenText(tok);
if (std.mem.eql(u8, text, "None")) {
try self.inst.operands.append(self.gpa, .{ .value = 0 });
return;
}
const enumerants = kind.enumerants();
var mask: u32 = 0;
while (true) {
const enumerant = for (enumerants) |enumerant| {
if (std.mem.eql(u8, enumerant.name, text))
break enumerant;
} else {
return self.fail(tok.start, "'{s}' is not a valid flag for bitmask {s}", .{ text, @tagName(kind) });
};
mask |= enumerant.value;
if (!self.eatToken(.pipe))
break;
tok = self.currentToken();
try self.expectToken(.value);
text = self.tokenText(tok);
}
try self.inst.operands.append(self.gpa, .{ .value = mask });
// Assume values are sorted.
// TODO: ensure in generator.
for (enumerants) |enumerant| {
if ((mask & enumerant.value) == 0)
continue;
for (enumerant.parameters) |param_kind| {
if (self.isAtInstructionBoundary()) {
return self.fail(self.currentToken().start, "missing required parameter for bit flag '{s}'", .{enumerant.name});
}
try self.parseOperand(param_kind);
}
}
}
/// Also handles parsing any required extra operands.
fn parseValueEnum(self: *Assembler, kind: spec.OperandKind) !void {
const tok = self.currentToken();
if (self.eatToken(.placeholder)) {
const name = self.tokenText(tok)[1..];
const value = self.value_map.get(name) orelse {
return self.fail(tok.start, "invalid placeholder '${s}'", .{name});
};
switch (value) {
.constant => |literal32| {
try self.inst.operands.append(self.gpa, .{ .value = literal32 });
},
.string => |str| {
const enumerant = for (kind.enumerants()) |enumerant| {
if (std.mem.eql(u8, enumerant.name, str)) break enumerant;
} else {
return self.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ str, @tagName(kind) });
};
try self.inst.operands.append(self.gpa, .{ .value = enumerant.value });
},
else => return self.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name}),
}
return;
}
try self.expectToken(.value);
const text = self.tokenText(tok);
const int_value = std.fmt.parseInt(u32, text, 0) catch null;
const enumerant = for (kind.enumerants()) |enumerant| {
if (int_value) |v| {
if (v == enumerant.value) break enumerant;
} else {
if (std.mem.eql(u8, enumerant.name, text)) break enumerant;
}
} else {
return self.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ text, @tagName(kind) });
};
try self.inst.operands.append(self.gpa, .{ .value = enumerant.value });
for (enumerant.parameters) |param_kind| {
if (self.isAtInstructionBoundary()) {
return self.fail(self.currentToken().start, "missing required parameter for enum variant '{s}'", .{enumerant.name});
}
try self.parseOperand(param_kind);
}
}
fn parseRefId(self: *Assembler) !void {
const tok = self.currentToken();
try self.expectToken(.result_id);
const name = self.tokenText(tok)[1..];
const entry = try self.value_map.getOrPut(self.gpa, name);
if (!entry.found_existing) {
entry.value_ptr.* = .unresolved_forward_reference;
}
const index: AsmValue.Ref = @intCast(entry.index);
try self.inst.operands.append(self.gpa, .{ .ref_id = index });
}
fn parseLiteralInteger(self: *Assembler) !void {
const tok = self.currentToken();
if (self.eatToken(.placeholder)) {
const name = self.tokenText(tok)[1..];
const value = self.value_map.get(name) orelse {
return self.fail(tok.start, "invalid placeholder '${s}'", .{name});
};
switch (value) {
.constant => |literal32| {
try self.inst.operands.append(self.gpa, .{ .literal32 = literal32 });
},
else => {
return self.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
},
}
return;
}
try self.expectToken(.value);
// According to the SPIR-V machine readable grammar, a LiteralInteger
// may consist of one or more words. From the SPIR-V docs it seems like there
// only one instruction where multiple words are allowed, the literals that make up the
// switch cases of OpSwitch. This case is handled separately, and so we just assume
// everything is a 32-bit integer in this function.
const text = self.tokenText(tok);
const value = std.fmt.parseInt(u32, text, 0) catch {
return self.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
};
try self.inst.operands.append(self.gpa, .{ .literal32 = value });
}
fn parseLiteralExtInstInteger(self: *Assembler) !void {
const tok = self.currentToken();
if (self.eatToken(.placeholder)) {
const name = self.tokenText(tok)[1..];
const value = self.value_map.get(name) orelse {
return self.fail(tok.start, "invalid placeholder '${s}'", .{name});
};
switch (value) {
.constant => |literal32| {
try self.inst.operands.append(self.gpa, .{ .literal32 = literal32 });
},
else => {
return self.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
},
}
return;
}
try self.expectToken(.value);
const text = self.tokenText(tok);
const value = std.fmt.parseInt(u32, text, 0) catch {
return self.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
};
try self.inst.operands.append(self.gpa, .{ .literal32 = value });
}
fn parseString(self: *Assembler) !void {
const tok = self.currentToken();
try self.expectToken(.string);
// Note, the string might not have a closing quote. In this case,
// an error is already emitted but we are trying to continue processing
// anyway, so in this function we have to deal with that situation.
const text = self.tokenText(tok);
assert(text.len > 0 and text[0] == '"');
const literal = if (text.len != 1 and text[text.len - 1] == '"')
text[1 .. text.len - 1]
else
text[1..];
const string_offset: u32 = @intCast(self.inst.string_bytes.items.len);
try self.inst.string_bytes.ensureUnusedCapacity(self.gpa, literal.len + 1);
self.inst.string_bytes.appendSliceAssumeCapacity(literal);
self.inst.string_bytes.appendAssumeCapacity(0);
try self.inst.operands.append(self.gpa, .{ .string = string_offset });
}
fn parseContextDependentNumber(self: *Assembler) !void {
// For context dependent numbers, the actual type to parse is determined by the instruction.
// Currently, this operand appears in OpConstant and OpSpecConstant, where the too-be-parsed type
// is determined by the result type. That means that in this instructions we have to resolve the
// operand type early and look at the result to see how we need to proceed.
assert(self.inst.opcode == .OpConstant or self.inst.opcode == .OpSpecConstant);
const tok = self.currentToken();
const result = try self.resolveRef(self.inst.operands.items[0].ref_id);
const result_id = result.resultId();
// We are going to cheat a little bit: The types we are interested in, int and float,
// are added to the module and cached via self.spv.intType and self.spv.floatType. Therefore,
// we can determine the width of these types by directly checking the cache.
// This only works if the Assembler and codegen both use spv.intType and spv.floatType though.
// We don't expect there to be many of these types, so just look it up every time.
// TODO: Count be improved to be a little bit more efficent.
{
var it = self.spv.cache.int_types.iterator();
while (it.next()) |entry| {
const id = entry.value_ptr.*;
if (id != result_id) continue;
const info = entry.key_ptr.*;
return try self.parseContextDependentInt(info.signedness, info.bits);
}
}
{
var it = self.spv.cache.float_types.iterator();
while (it.next()) |entry| {
const id = entry.value_ptr.*;
if (id != result_id) continue;
const info = entry.key_ptr.*;
switch (info.bits) {
16 => try self.parseContextDependentFloat(16),
32 => try self.parseContextDependentFloat(32),
64 => try self.parseContextDependentFloat(64),
else => return self.fail(tok.start, "cannot parse {}-bit info literal", .{info.bits}),
}
}
}
return self.fail(tok.start, "cannot parse literal constant", .{});
}
fn parseContextDependentInt(self: *Assembler, signedness: std.builtin.Signedness, width: u32) !void {
const tok = self.currentToken();
if (self.eatToken(.placeholder)) {
const name = self.tokenText(tok)[1..];
const value = self.value_map.get(name) orelse {
return self.fail(tok.start, "invalid placeholder '${s}'", .{name});
};
switch (value) {
.constant => |literal32| {
try self.inst.operands.append(self.gpa, .{ .literal32 = literal32 });
},
else => {
return self.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
},
}
return;
}
try self.expectToken(.value);
if (width == 0 or width > 2 * @bitSizeOf(spec.Word)) {
return self.fail(tok.start, "cannot parse {}-bit integer literal", .{width});
}
const text = self.tokenText(tok);
invalid: {
// Just parse the integer as the next larger integer type, and check if it overflows afterwards.
const int = std.fmt.parseInt(i128, text, 0) catch break :invalid;
const min = switch (signedness) {
.unsigned => 0,
.signed => -(@as(i128, 1) << (@as(u7, @intCast(width)) - 1)),
};
const max = (@as(i128, 1) << (@as(u7, @intCast(width)) - @intFromBool(signedness == .signed))) - 1;
if (int < min or int > max) {
break :invalid;
}
// Note, we store the sign-extended version here.
if (width <= @bitSizeOf(spec.Word)) {
try self.inst.operands.append(self.gpa, .{ .literal32 = @truncate(@as(u128, @bitCast(int))) });
} else {
try self.inst.operands.append(self.gpa, .{ .literal64 = @truncate(@as(u128, @bitCast(int))) });
}
return;
}
return self.fail(tok.start, "'{s}' is not a valid {s} {}-bit int literal", .{ text, @tagName(signedness), width });
}
fn parseContextDependentFloat(self: *Assembler, comptime width: u16) !void {
const Float = std.meta.Float(width);
const Int = std.meta.Int(.unsigned, width);
const tok = self.currentToken();
try self.expectToken(.value);
const text = self.tokenText(tok);
const value = std.fmt.parseFloat(Float, text) catch {
return self.fail(tok.start, "'{s}' is not a valid {}-bit float literal", .{ text, width });
};
const float_bits: Int = @bitCast(value);
if (width <= @bitSizeOf(spec.Word)) {
try self.inst.operands.append(self.gpa, .{ .literal32 = float_bits });
} else {
assert(width <= 2 * @bitSizeOf(spec.Word));
try self.inst.operands.append(self.gpa, .{ .literal64 = float_bits });
}
}
fn parsePhiSource(self: *Assembler) !void {
try self.parseRefId();
if (self.isAtInstructionBoundary()) {
return self.fail(self.currentToken().start, "missing phi block parent", .{});
}
try self.parseRefId();
}
/// Returns whether the `current_token` cursor is currently pointing
/// at the start of a new instruction.
fn isAtInstructionBoundary(self: Assembler) bool {
return switch (self.currentToken().tag) {
.opcode, .result_id_assign, .eof => true,
else => false,
};
}
fn expectToken(self: *Assembler, tag: Token.Tag) !void {
if (self.eatToken(tag))
return;
return self.fail(self.currentToken().start, "unexpected {s}, expected {s}", .{
self.currentToken().tag.name(),
tag.name(),
});
}
fn eatToken(self: *Assembler, tag: Token.Tag) bool {
if (self.testToken(tag)) {
self.current_token += 1;
return true;
}
return false;
}
fn testToken(self: Assembler, tag: Token.Tag) bool {
return self.currentToken().tag == tag;
}
fn currentToken(self: Assembler) Token {
return self.tokens.items[self.current_token];
}
fn tokenText(self: Assembler, tok: Token) []const u8 {
return self.src[tok.start..tok.end];
}
/// Tokenize `self.src` and put the tokens in `self.tokens`.
/// Any errors encountered are appended to `self.errors`.
fn tokenize(self: *Assembler) !void {
self.tokens.clearRetainingCapacity();
var offset: u32 = 0;
while (true) {
const tok = try self.nextToken(offset);
// Resolve result-id assignment now.
// Note: If the previous token wasn't a result-id, just ignore it,
// we will catch it while parsing.
if (tok.tag == .equals and self.tokens.items[self.tokens.items.len - 1].tag == .result_id) {
self.tokens.items[self.tokens.items.len - 1].tag = .result_id_assign;
}
try self.tokens.append(self.gpa, tok);
if (tok.tag == .eof)
break;
offset = tok.end;
}
}
/// Retrieve the next token from the input. This function will assert
/// that the token is surrounded by whitespace if required, but will not
/// interpret the token yet.
/// Note: This function doesn't handle .result_id_assign - this is handled in
/// tokenize().
fn nextToken(self: *Assembler, start_offset: u32) !Token {
// We generally separate the input into the following types:
// - Whitespace. Generally ignored, but also used as delimiter for some
// tokens.
// - Values. This entails integers, floats, enums - anything that
// consists of alphanumeric characters, delimited by whitespace.
// - Result-IDs. This entails anything that consists of alphanumeric characters and _, and
// starts with a %. In contrast to values, this entity can be checked for complete correctness
// relatively easily here.
// - Strings. This entails quote-delimited text such as "abc".
// SPIR-V strings have only two escapes, \" and \\.
// - Sigils, = and |. In this assembler, these are not required to have whitespace
// around them (they act as delimiters) as they do in SPIRV-Tools.
var state: enum {
start,
value,
result_id,
string,
string_end,
escape,
placeholder,
} = .start;
var token_start = start_offset;
var offset = start_offset;
var tag = Token.Tag.eof;
while (offset < self.src.len) : (offset += 1) {
const c = self.src[offset];
switch (state) {
.start => switch (c) {
' ', '\t', '\r', '\n' => token_start = offset + 1,
'"' => {
state = .string;
tag = .string;
},
'%' => {
state = .result_id;
tag = .result_id;
},
'|' => {
tag = .pipe;
offset += 1;
break;
},
'=' => {
tag = .equals;
offset += 1;
break;
},
'$' => {
state = .placeholder;
tag = .placeholder;
},
else => {
state = .value;
tag = .value;
},
},
.value => switch (c) {
'"' => {
try self.addError(offset, "unexpected string literal", .{});
// The user most likely just forgot a delimiter here - keep
// the tag as value.
break;
},
' ', '\t', '\r', '\n', '=', '|' => break,
else => {},
},
.result_id, .placeholder => switch (c) {
'_', 'a'...'z', 'A'...'Z', '0'...'9' => {},
' ', '\t', '\r', '\n', '=', '|' => break,
else => {
try self.addError(offset, "illegal character in result-id or placeholder", .{});
// Again, probably a forgotten delimiter here.
break;
},
},
.string => switch (c) {
'\\' => state = .escape,
'"' => state = .string_end,
else => {}, // Note, strings may include newlines
},
.string_end => switch (c) {
' ', '\t', '\r', '\n', '=', '|' => break,
else => {
try self.addError(offset, "unexpected character after string literal", .{});
// The token is still unmistakibly a string.
break;
},
},
// Escapes simply skip the next char.
.escape => state = .string,
}
}
var tok = Token{
.tag = tag,
.start = token_start,
.end = offset,
};
switch (state) {
.string, .escape => {
try self.addError(token_start, "unterminated string", .{});
},
.result_id => if (offset - token_start == 1) {
try self.addError(token_start, "result-id must have at least one name character", .{});
},
.value => {
const text = self.tokenText(tok);
const prefix = "Op";
const looks_like_opcode = text.len > prefix.len and
std.mem.startsWith(u8, text, prefix) and
std.ascii.isUpper(text[prefix.len]);
if (looks_like_opcode)
tok.tag = .opcode;
},
else => {},
}
return tok;
}