zig/src/codegen/spirv/Assembler.zig
2025-11-20 14:46:23 -08:00

1090 lines
40 KiB
Zig
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const CodeGen = @import("CodeGen.zig");
const Decl = @import("Module.zig").Decl;
const spec = @import("spec.zig");
const Opcode = spec.Opcode;
const Word = spec.Word;
const Id = spec.Id;
const StorageClass = spec.StorageClass;
const Assembler = @This();
cg: *CodeGen,
errors: std.ArrayList(ErrorMsg) = .empty,
src: []const u8 = undefined,
/// `ass.src` tokenized.
tokens: std.ArrayList(Token) = .empty,
current_token: u32 = 0,
/// The instruction that is currently being parsed or has just been parsed.
inst: struct {
opcode: Opcode = undefined,
operands: std.ArrayList(Operand) = .empty,
string_bytes: std.ArrayList(u8) = .empty,
fn result(ass: @This()) ?AsmValue.Ref {
for (ass.operands.items[0..@min(ass.operands.items.len, 2)]) |op| {
switch (op) {
.result_id => |index| return index,
else => {},
}
}
return null;
}
} = .{},
value_map: std.StringArrayHashMapUnmanaged(AsmValue) = .{},
inst_map: std.StringArrayHashMapUnmanaged(void) = .empty,
const Operand = union(enum) {
/// Any 'simple' 32-bit value. This could be a mask or
/// enumerant, etc, depending on the operands.
value: u32,
/// An int- or float literal encoded as 1 word.
literal32: u32,
/// An int- or float literal encoded as 2 words.
literal64: u64,
/// A result-id which is assigned to in this instruction.
/// If present, this is the first operand of the instruction.
result_id: AsmValue.Ref,
/// A result-id which referred to (not assigned to) in this instruction.
ref_id: AsmValue.Ref,
/// Offset into `inst.string_bytes`. The string ends at the next zero-terminator.
string: u32,
};
pub fn deinit(ass: *Assembler) void {
const gpa = ass.cg.module.gpa;
for (ass.errors.items) |err| gpa.free(err.msg);
ass.tokens.deinit(gpa);
ass.errors.deinit(gpa);
ass.inst.operands.deinit(gpa);
ass.inst.string_bytes.deinit(gpa);
ass.value_map.deinit(gpa);
ass.inst_map.deinit(gpa);
}
const Error = error{ AssembleFail, OutOfMemory };
pub fn assemble(ass: *Assembler, src: []const u8) Error!void {
const gpa = ass.cg.module.gpa;
ass.src = src;
ass.errors.clearRetainingCapacity();
// Populate the opcode map if it isn't already
if (ass.inst_map.count() == 0) {
const instructions = spec.InstructionSet.core.instructions();
try ass.inst_map.ensureUnusedCapacity(gpa, @intCast(instructions.len));
for (spec.InstructionSet.core.instructions(), 0..) |inst, i| {
const entry = try ass.inst_map.getOrPut(gpa, inst.name);
assert(entry.index == i);
}
}
try ass.tokenize();
while (!ass.testToken(.eof)) {
try ass.parseInstruction();
try ass.processInstruction();
}
if (ass.errors.items.len > 0) return error.AssembleFail;
}
const ErrorMsg = struct {
/// The offset in bytes from the start of `src` that this error occured.
byte_offset: u32,
msg: []const u8,
};
fn addError(ass: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) !void {
const gpa = ass.cg.module.gpa;
const msg = try std.fmt.allocPrint(gpa, fmt, args);
errdefer gpa.free(msg);
try ass.errors.append(gpa, .{
.byte_offset = offset,
.msg = msg,
});
}
fn fail(ass: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) Error {
try ass.addError(offset, fmt, args);
return error.AssembleFail;
}
fn todo(ass: *Assembler, comptime fmt: []const u8, args: anytype) Error {
return ass.fail(0, "todo: " ++ fmt, args);
}
const AsmValue = union(enum) {
/// The results are stored in an array hash map, and can be referred
/// to either by name (without the %), or by values of this index type.
pub const Ref = u32;
/// The RHS of the current instruction.
just_declared,
/// A placeholder for ref-ids of which the result-id is not yet known.
/// It will be further resolved at a later stage to a more concrete forward reference.
unresolved_forward_reference,
/// A normal result produced by a different instruction.
value: Id,
/// A type registered into the module's type system.
ty: Id,
/// A pre-supplied constant integer value.
constant: u32,
string: []const u8,
/// Retrieve the result-id of this AsmValue. Asserts that this AsmValue
/// is of a variant that allows the result to be obtained (not an unresolved
/// forward declaration, not in the process of being declared, etc).
pub fn resultId(value: AsmValue) Id {
return switch (value) {
.just_declared,
.unresolved_forward_reference,
// TODO: Lower this value as constant?
.constant,
.string,
=> unreachable,
.value => |result| result,
.ty => |result| result,
};
}
};
/// Attempt to process the instruction currently in `ass.inst`.
/// This for example emits the instruction in the module or function, or
/// records type definitions.
/// If this function returns `error.AssembleFail`, an explanatory
/// error message has already been emitted into `ass.errors`.
fn processInstruction(ass: *Assembler) !void {
const module = ass.cg.module;
const result: AsmValue = switch (ass.inst.opcode) {
.OpEntryPoint => {
return ass.fail(ass.currentToken().start, "cannot export entry points in assembly", .{});
},
.OpExecutionMode, .OpExecutionModeId => {
return ass.fail(ass.currentToken().start, "cannot set execution mode in assembly", .{});
},
.OpCapability => {
try module.addCapability(@enumFromInt(ass.inst.operands.items[0].value));
return;
},
.OpExtension => {
const ext_name_offset = ass.inst.operands.items[0].string;
const ext_name = std.mem.sliceTo(ass.inst.string_bytes.items[ext_name_offset..], 0);
try module.addExtension(ext_name);
return;
},
.OpExtInstImport => blk: {
const set_name_offset = ass.inst.operands.items[1].string;
const set_name = std.mem.sliceTo(ass.inst.string_bytes.items[set_name_offset..], 0);
const set_tag = std.meta.stringToEnum(spec.InstructionSet, set_name) orelse {
return ass.fail(set_name_offset, "unknown instruction set: {s}", .{set_name});
};
break :blk .{ .value = try module.importInstructionSet(set_tag) };
},
else => switch (ass.inst.opcode.class()) {
.type_declaration => try ass.processTypeInstruction(),
else => (try ass.processGenericInstruction()) orelse return,
},
};
const result_ref = ass.inst.result().?;
switch (ass.value_map.values()[result_ref]) {
.just_declared => ass.value_map.values()[result_ref] = result,
else => {
// TODO: Improve source location.
const name = ass.value_map.keys()[result_ref];
return ass.fail(0, "duplicate definition of %{s}", .{name});
},
}
}
fn processTypeInstruction(ass: *Assembler) !AsmValue {
const cg = ass.cg;
const gpa = cg.module.gpa;
const module = cg.module;
const operands = ass.inst.operands.items;
const section = &module.sections.globals;
const id = switch (ass.inst.opcode) {
.OpTypeVoid => try module.voidType(),
.OpTypeBool => try module.boolType(),
.OpTypeInt => blk: {
const signedness: std.builtin.Signedness = switch (operands[2].literal32) {
0 => .unsigned,
1 => .signed,
else => {
// TODO: Improve source location.
return ass.fail(0, "{} is not a valid signedness (expected 0 or 1)", .{operands[2].literal32});
},
};
const width = std.math.cast(u16, operands[1].literal32) orelse {
return ass.fail(0, "int type of {} bits is too large", .{operands[1].literal32});
};
break :blk try module.intType(signedness, width);
},
.OpTypeFloat => blk: {
const bits = operands[1].literal32;
switch (bits) {
16, 32, 64 => {},
else => {
return ass.fail(0, "{} is not a valid bit count for floats (expected 16, 32 or 64)", .{bits});
},
}
break :blk try module.floatType(@intCast(bits));
},
.OpTypeVector => blk: {
const child_type = try ass.resolveRefId(operands[1].ref_id);
break :blk try module.vectorType(operands[2].literal32, child_type);
},
.OpTypeArray => {
// TODO: The length of an OpTypeArray is determined by a constant (which may be a spec constant),
// and so some consideration must be taken when entering this in the type system.
return ass.todo("process OpTypeArray", .{});
},
.OpTypeRuntimeArray => blk: {
const element_type = try ass.resolveRefId(operands[1].ref_id);
const result_id = module.allocId();
try section.emit(module.gpa, .OpTypeRuntimeArray, .{
.id_result = result_id,
.element_type = element_type,
});
break :blk result_id;
},
.OpTypePointer => blk: {
const storage_class: StorageClass = @enumFromInt(operands[1].value);
const child_type = try ass.resolveRefId(operands[2].ref_id);
const result_id = module.allocId();
try section.emit(module.gpa, .OpTypePointer, .{
.id_result = result_id,
.storage_class = storage_class,
.type = child_type,
});
break :blk result_id;
},
.OpTypeStruct => blk: {
const scratch_top = cg.id_scratch.items.len;
defer cg.id_scratch.shrinkRetainingCapacity(scratch_top);
const ids = try cg.id_scratch.addManyAsSlice(gpa, operands[1..].len);
for (operands[1..], ids) |op, *id| id.* = try ass.resolveRefId(op.ref_id);
break :blk try module.structType(ids, null, null, .none);
},
.OpTypeImage => blk: {
const sampled_type = try ass.resolveRefId(operands[1].ref_id);
const result_id = module.allocId();
try section.emit(gpa, .OpTypeImage, .{
.id_result = result_id,
.sampled_type = sampled_type,
.dim = @enumFromInt(operands[2].value),
.depth = operands[3].literal32,
.arrayed = operands[4].literal32,
.ms = operands[5].literal32,
.sampled = operands[6].literal32,
.image_format = @enumFromInt(operands[7].value),
});
break :blk result_id;
},
.OpTypeSampler => blk: {
const result_id = module.allocId();
try section.emit(gpa, .OpTypeSampler, .{ .id_result = result_id });
break :blk result_id;
},
.OpTypeSampledImage => blk: {
const image_type = try ass.resolveRefId(operands[1].ref_id);
const result_id = module.allocId();
try section.emit(gpa, .OpTypeSampledImage, .{ .id_result = result_id, .image_type = image_type });
break :blk result_id;
},
.OpTypeFunction => blk: {
const param_operands = operands[2..];
const return_type = try ass.resolveRefId(operands[1].ref_id);
const scratch_top = cg.id_scratch.items.len;
defer cg.id_scratch.shrinkRetainingCapacity(scratch_top);
const param_types = try cg.id_scratch.addManyAsSlice(gpa, param_operands.len);
for (param_types, param_operands) |*param, operand| {
param.* = try ass.resolveRefId(operand.ref_id);
}
const result_id = module.allocId();
try section.emit(module.gpa, .OpTypeFunction, .{
.id_result = result_id,
.return_type = return_type,
.id_ref_2 = param_types,
});
break :blk result_id;
},
else => return ass.todo("process type instruction {s}", .{@tagName(ass.inst.opcode)}),
};
return .{ .ty = id };
}
/// - No forward references are allowed in operands.
/// - Target section is determined from instruction type.
fn processGenericInstruction(ass: *Assembler) !?AsmValue {
const module = ass.cg.module;
const target = module.zcu.getTarget();
const operands = ass.inst.operands.items;
var maybe_spv_decl_index: ?Decl.Index = null;
const section = switch (ass.inst.opcode.class()) {
.constant_creation => &module.sections.globals,
.annotation => &module.sections.annotations,
.type_declaration => unreachable, // Handled elsewhere.
else => switch (ass.inst.opcode) {
.OpEntryPoint => unreachable,
.OpExecutionMode, .OpExecutionModeId => &module.sections.execution_modes,
.OpVariable => section: {
const storage_class: spec.StorageClass = @enumFromInt(operands[2].value);
if (storage_class == .function) break :section &ass.cg.prologue;
maybe_spv_decl_index = try module.allocDecl(.global);
if (!target.cpu.has(.spirv, .v1_4) and storage_class != .input and storage_class != .output) {
// Before version 1.4, the interfaces storage classes are limited to the Input and Output
break :section &module.sections.globals;
}
try ass.cg.module.decl_deps.append(module.gpa, maybe_spv_decl_index.?);
break :section &module.sections.globals;
},
else => &ass.cg.body,
},
};
var maybe_result_id: ?Id = null;
const first_word = section.instructions.items.len;
// At this point we're not quite sure how many operands this instruction is
// going to have, so insert 0 and patch up the actual opcode word later.
try section.ensureUnusedCapacity(module.gpa, 1);
section.writeWord(0);
for (operands) |operand| {
switch (operand) {
.value, .literal32 => |word| {
try section.ensureUnusedCapacity(module.gpa, 1);
section.writeWord(word);
},
.literal64 => |dword| {
try section.ensureUnusedCapacity(module.gpa, 2);
section.writeDoubleWord(dword);
},
.result_id => {
maybe_result_id = if (maybe_spv_decl_index) |spv_decl_index|
module.declPtr(spv_decl_index).result_id
else
module.allocId();
try section.ensureUnusedCapacity(module.gpa, 1);
section.writeOperand(Id, maybe_result_id.?);
},
.ref_id => |index| {
const result = try ass.resolveRef(index);
try section.ensureUnusedCapacity(module.gpa, 1);
section.writeOperand(spec.Id, result.resultId());
},
.string => |offset| {
const text = std.mem.sliceTo(ass.inst.string_bytes.items[offset..], 0);
const size = std.math.divCeil(usize, text.len + 1, @sizeOf(Word)) catch unreachable;
try section.ensureUnusedCapacity(module.gpa, size);
section.writeOperand(spec.LiteralString, text);
},
}
}
const actual_word_count = section.instructions.items.len - first_word;
section.instructions.items[first_word] |= @as(u32, @as(u16, @intCast(actual_word_count))) << 16 | @intFromEnum(ass.inst.opcode);
if (maybe_result_id) |result| return .{ .value = result };
return null;
}
fn resolveMaybeForwardRef(ass: *Assembler, ref: AsmValue.Ref) !AsmValue {
const value = ass.value_map.values()[ref];
switch (value) {
.just_declared => {
const name = ass.value_map.keys()[ref];
// TODO: Improve source location.
return ass.fail(0, "ass-referential parameter %{s}", .{name});
},
else => return value,
}
}
fn resolveRef(ass: *Assembler, ref: AsmValue.Ref) !AsmValue {
const value = try ass.resolveMaybeForwardRef(ref);
switch (value) {
.just_declared => unreachable,
.unresolved_forward_reference => {
const name = ass.value_map.keys()[ref];
// TODO: Improve source location.
return ass.fail(0, "reference to undeclared result-id %{s}", .{name});
},
else => return value,
}
}
fn resolveRefId(ass: *Assembler, ref: AsmValue.Ref) !Id {
const value = try ass.resolveRef(ref);
return value.resultId();
}
fn parseInstruction(ass: *Assembler) !void {
const gpa = ass.cg.module.gpa;
ass.inst.opcode = undefined;
ass.inst.operands.clearRetainingCapacity();
ass.inst.string_bytes.clearRetainingCapacity();
const lhs_result_tok = ass.currentToken();
const maybe_lhs_result: ?AsmValue.Ref = if (ass.eatToken(.result_id_assign)) blk: {
const name = ass.tokenText(lhs_result_tok)[1..];
const entry = try ass.value_map.getOrPut(gpa, name);
try ass.expectToken(.equals);
if (!entry.found_existing) {
entry.value_ptr.* = .just_declared;
}
break :blk @intCast(entry.index);
} else null;
const opcode_tok = ass.currentToken();
if (maybe_lhs_result != null) {
try ass.expectToken(.opcode);
} else if (!ass.eatToken(.opcode)) {
return ass.fail(opcode_tok.start, "expected start of instruction, found {s}", .{opcode_tok.tag.name()});
}
const opcode_text = ass.tokenText(opcode_tok);
const index = ass.inst_map.getIndex(opcode_text) orelse {
return ass.fail(opcode_tok.start, "invalid opcode '{s}'", .{opcode_text});
};
const inst = spec.InstructionSet.core.instructions()[index];
ass.inst.opcode = @enumFromInt(inst.opcode);
const expected_operands = inst.operands;
// This is a loop because the result-id is not always the first operand.
const requires_lhs_result = for (expected_operands) |op| {
if (op.kind == .id_result) break true;
} else false;
if (requires_lhs_result and maybe_lhs_result == null) {
return ass.fail(opcode_tok.start, "opcode '{s}' expects result on left-hand side", .{@tagName(ass.inst.opcode)});
} else if (!requires_lhs_result and maybe_lhs_result != null) {
return ass.fail(
lhs_result_tok.start,
"opcode '{s}' does not expect a result-id on the left-hand side",
.{@tagName(ass.inst.opcode)},
);
}
for (expected_operands) |operand| {
if (operand.kind == .id_result) {
try ass.inst.operands.append(gpa, .{ .result_id = maybe_lhs_result.? });
continue;
}
switch (operand.quantifier) {
.required => if (ass.isAtInstructionBoundary()) {
return ass.fail(
ass.currentToken().start,
"missing required operand", // TODO: Operand name?
.{},
);
} else {
try ass.parseOperand(operand.kind);
},
.optional => if (!ass.isAtInstructionBoundary()) {
try ass.parseOperand(operand.kind);
},
.variadic => while (!ass.isAtInstructionBoundary()) {
try ass.parseOperand(operand.kind);
},
}
}
}
fn parseOperand(ass: *Assembler, kind: spec.OperandKind) Error!void {
switch (kind.category()) {
.bit_enum => try ass.parseBitEnum(kind),
.value_enum => try ass.parseValueEnum(kind),
.id => try ass.parseRefId(),
else => switch (kind) {
.literal_integer => try ass.parseLiteralInteger(),
.literal_string => try ass.parseString(),
.literal_context_dependent_number => try ass.parseContextDependentNumber(),
.literal_ext_inst_integer => try ass.parseLiteralExtInstInteger(),
.pair_id_ref_id_ref => try ass.parsePhiSource(),
else => return ass.todo("parse operand of type {s}", .{@tagName(kind)}),
},
}
}
/// Also handles parsing any required extra operands.
fn parseBitEnum(ass: *Assembler, kind: spec.OperandKind) !void {
const gpa = ass.cg.module.gpa;
var tok = ass.currentToken();
try ass.expectToken(.value);
var text = ass.tokenText(tok);
if (std.mem.eql(u8, text, "None")) {
try ass.inst.operands.append(gpa, .{ .value = 0 });
return;
}
const enumerants = kind.enumerants();
var mask: u32 = 0;
while (true) {
const enumerant = for (enumerants) |enumerant| {
if (std.mem.eql(u8, enumerant.name, text))
break enumerant;
} else {
return ass.fail(tok.start, "'{s}' is not a valid flag for bitmask {s}", .{ text, @tagName(kind) });
};
mask |= enumerant.value;
if (!ass.eatToken(.pipe))
break;
tok = ass.currentToken();
try ass.expectToken(.value);
text = ass.tokenText(tok);
}
try ass.inst.operands.append(gpa, .{ .value = mask });
// Assume values are sorted.
// TODO: ensure in generator.
for (enumerants) |enumerant| {
if ((mask & enumerant.value) == 0)
continue;
for (enumerant.parameters) |param_kind| {
if (ass.isAtInstructionBoundary()) {
return ass.fail(ass.currentToken().start, "missing required parameter for bit flag '{s}'", .{enumerant.name});
}
try ass.parseOperand(param_kind);
}
}
}
/// Also handles parsing any required extra operands.
fn parseValueEnum(ass: *Assembler, kind: spec.OperandKind) !void {
const gpa = ass.cg.module.gpa;
const tok = ass.currentToken();
if (ass.eatToken(.placeholder)) {
const name = ass.tokenText(tok)[1..];
const value = ass.value_map.get(name) orelse {
return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
};
switch (value) {
.constant => |literal32| {
try ass.inst.operands.append(gpa, .{ .value = literal32 });
},
.string => |str| {
const enumerant = for (kind.enumerants()) |enumerant| {
if (std.mem.eql(u8, enumerant.name, str)) break enumerant;
} else {
return ass.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ str, @tagName(kind) });
};
try ass.inst.operands.append(gpa, .{ .value = enumerant.value });
},
else => return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name}),
}
return;
}
try ass.expectToken(.value);
const text = ass.tokenText(tok);
const int_value = std.fmt.parseInt(u32, text, 0) catch null;
const enumerant = for (kind.enumerants()) |enumerant| {
if (int_value) |v| {
if (v == enumerant.value) break enumerant;
} else {
if (std.mem.eql(u8, enumerant.name, text)) break enumerant;
}
} else {
return ass.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ text, @tagName(kind) });
};
try ass.inst.operands.append(gpa, .{ .value = enumerant.value });
for (enumerant.parameters) |param_kind| {
if (ass.isAtInstructionBoundary()) {
return ass.fail(ass.currentToken().start, "missing required parameter for enum variant '{s}'", .{enumerant.name});
}
try ass.parseOperand(param_kind);
}
}
fn parseRefId(ass: *Assembler) !void {
const gpa = ass.cg.module.gpa;
const tok = ass.currentToken();
try ass.expectToken(.result_id);
const name = ass.tokenText(tok)[1..];
const entry = try ass.value_map.getOrPut(gpa, name);
if (!entry.found_existing) {
entry.value_ptr.* = .unresolved_forward_reference;
}
const index: AsmValue.Ref = @intCast(entry.index);
try ass.inst.operands.append(gpa, .{ .ref_id = index });
}
fn parseLiteralInteger(ass: *Assembler) !void {
const gpa = ass.cg.module.gpa;
const tok = ass.currentToken();
if (ass.eatToken(.placeholder)) {
const name = ass.tokenText(tok)[1..];
const value = ass.value_map.get(name) orelse {
return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
};
switch (value) {
.constant => |literal32| {
try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
},
else => {
return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
},
}
return;
}
try ass.expectToken(.value);
// According to the SPIR-V machine readable grammar, a LiteralInteger
// may consist of one or more words. From the SPIR-V docs it seems like there
// only one instruction where multiple words are allowed, the literals that make up the
// switch cases of OpSwitch. This case is handled separately, and so we just assume
// everything is a 32-bit integer in this function.
const text = ass.tokenText(tok);
const value = std.fmt.parseInt(u32, text, 0) catch {
return ass.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
};
try ass.inst.operands.append(gpa, .{ .literal32 = value });
}
fn parseLiteralExtInstInteger(ass: *Assembler) !void {
const gpa = ass.cg.module.gpa;
const tok = ass.currentToken();
if (ass.eatToken(.placeholder)) {
const name = ass.tokenText(tok)[1..];
const value = ass.value_map.get(name) orelse {
return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
};
switch (value) {
.constant => |literal32| {
try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
},
else => {
return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
},
}
return;
}
try ass.expectToken(.value);
const text = ass.tokenText(tok);
const value = std.fmt.parseInt(u32, text, 0) catch {
return ass.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
};
try ass.inst.operands.append(gpa, .{ .literal32 = value });
}
fn parseString(ass: *Assembler) !void {
const gpa = ass.cg.module.gpa;
const tok = ass.currentToken();
try ass.expectToken(.string);
// Note, the string might not have a closing quote. In this case,
// an error is already emitted but we are trying to continue processing
// anyway, so in this function we have to deal with that situation.
const text = ass.tokenText(tok);
assert(text.len > 0 and text[0] == '"');
const literal = if (text.len != 1 and text[text.len - 1] == '"')
text[1 .. text.len - 1]
else
text[1..];
const string_offset: u32 = @intCast(ass.inst.string_bytes.items.len);
try ass.inst.string_bytes.ensureUnusedCapacity(gpa, literal.len + 1);
ass.inst.string_bytes.appendSliceAssumeCapacity(literal);
ass.inst.string_bytes.appendAssumeCapacity(0);
try ass.inst.operands.append(gpa, .{ .string = string_offset });
}
fn parseContextDependentNumber(ass: *Assembler) !void {
const module = ass.cg.module;
// For context dependent numbers, the actual type to parse is determined by the instruction.
// Currently, this operand appears in OpConstant and OpSpecConstant, where the too-be-parsed type
// is determined by the result type. That means that in this instructions we have to resolve the
// operand type early and look at the result to see how we need to proceed.
assert(ass.inst.opcode == .OpConstant or ass.inst.opcode == .OpSpecConstant);
const tok = ass.currentToken();
const result = try ass.resolveRef(ass.inst.operands.items[0].ref_id);
const result_id = result.resultId();
// We are going to cheat a little bit: The types we are interested in, int and float,
// are added to the module and cached via module.intType and module.floatType. Therefore,
// we can determine the width of these types by directly checking the cache.
// This only works if the Assembler and codegen both use spv.intType and spv.floatType though.
// We don't expect there to be many of these types, so just look it up every time.
// TODO: Count be improved to be a little bit more efficent.
{
var it = module.cache.int_types.iterator();
while (it.next()) |entry| {
const id = entry.value_ptr.*;
if (id != result_id) continue;
const info = entry.key_ptr.*;
return try ass.parseContextDependentInt(info.signedness, info.bits);
}
}
{
var it = module.cache.float_types.iterator();
while (it.next()) |entry| {
const id = entry.value_ptr.*;
if (id != result_id) continue;
const info = entry.key_ptr.*;
switch (info.bits) {
16 => try ass.parseContextDependentFloat(16),
32 => try ass.parseContextDependentFloat(32),
64 => try ass.parseContextDependentFloat(64),
else => return ass.fail(tok.start, "cannot parse {}-bit info literal", .{info.bits}),
}
}
}
return ass.fail(tok.start, "cannot parse literal constant", .{});
}
fn parseContextDependentInt(ass: *Assembler, signedness: std.builtin.Signedness, width: u32) !void {
const gpa = ass.cg.module.gpa;
const tok = ass.currentToken();
if (ass.eatToken(.placeholder)) {
const name = ass.tokenText(tok)[1..];
const value = ass.value_map.get(name) orelse {
return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
};
switch (value) {
.constant => |literal32| {
try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
},
else => {
return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
},
}
return;
}
try ass.expectToken(.value);
if (width == 0 or width > 2 * @bitSizeOf(spec.Word)) {
return ass.fail(tok.start, "cannot parse {}-bit integer literal", .{width});
}
const text = ass.tokenText(tok);
invalid: {
// Just parse the integer as the next larger integer type, and check if it overflows afterwards.
const int = std.fmt.parseInt(i128, text, 0) catch break :invalid;
const min = switch (signedness) {
.unsigned => 0,
.signed => -(@as(i128, 1) << (@as(u7, @intCast(width)) - 1)),
};
const max = (@as(i128, 1) << (@as(u7, @intCast(width)) - @intFromBool(signedness == .signed))) - 1;
if (int < min or int > max) {
break :invalid;
}
// Note, we store the sign-extended version here.
if (width <= @bitSizeOf(spec.Word)) {
try ass.inst.operands.append(gpa, .{ .literal32 = @truncate(@as(u128, @bitCast(int))) });
} else {
try ass.inst.operands.append(gpa, .{ .literal64 = @truncate(@as(u128, @bitCast(int))) });
}
return;
}
return ass.fail(tok.start, "'{s}' is not a valid {s} {}-bit int literal", .{ text, @tagName(signedness), width });
}
fn parseContextDependentFloat(ass: *Assembler, comptime width: u16) !void {
const gpa = ass.cg.module.gpa;
const Float = std.meta.Float(width);
const Int = std.meta.Int(.unsigned, width);
const tok = ass.currentToken();
try ass.expectToken(.value);
const text = ass.tokenText(tok);
const value = std.fmt.parseFloat(Float, text) catch {
return ass.fail(tok.start, "'{s}' is not a valid {}-bit float literal", .{ text, width });
};
const float_bits: Int = @bitCast(value);
if (width <= @bitSizeOf(spec.Word)) {
try ass.inst.operands.append(gpa, .{ .literal32 = float_bits });
} else {
assert(width <= 2 * @bitSizeOf(spec.Word));
try ass.inst.operands.append(gpa, .{ .literal64 = float_bits });
}
}
fn parsePhiSource(ass: *Assembler) !void {
try ass.parseRefId();
if (ass.isAtInstructionBoundary()) {
return ass.fail(ass.currentToken().start, "missing phi block parent", .{});
}
try ass.parseRefId();
}
/// Returns whether the `current_token` cursor
/// is currently pointing at the start of a new instruction.
fn isAtInstructionBoundary(ass: Assembler) bool {
return switch (ass.currentToken().tag) {
.opcode, .result_id_assign, .eof => true,
else => false,
};
}
fn expectToken(ass: *Assembler, tag: Token.Tag) !void {
if (ass.eatToken(tag))
return;
return ass.fail(ass.currentToken().start, "unexpected {s}, expected {s}", .{
ass.currentToken().tag.name(),
tag.name(),
});
}
fn eatToken(ass: *Assembler, tag: Token.Tag) bool {
if (ass.testToken(tag)) {
ass.current_token += 1;
return true;
}
return false;
}
fn testToken(ass: Assembler, tag: Token.Tag) bool {
return ass.currentToken().tag == tag;
}
fn currentToken(ass: Assembler) Token {
return ass.tokens.items[ass.current_token];
}
fn tokenText(ass: Assembler, tok: Token) []const u8 {
return ass.src[tok.start..tok.end];
}
/// Tokenize `ass.src` and put the tokens in `ass.tokens`.
/// Any errors encountered are appended to `ass.errors`.
fn tokenize(ass: *Assembler) !void {
const gpa = ass.cg.module.gpa;
ass.tokens.clearRetainingCapacity();
var offset: u32 = 0;
while (true) {
const tok = try ass.nextToken(offset);
// Resolve result-id assignment now.
// NOTE: If the previous token wasn't a result-id, just ignore it,
// we will catch it while parsing.
if (tok.tag == .equals and ass.tokens.items[ass.tokens.items.len - 1].tag == .result_id) {
ass.tokens.items[ass.tokens.items.len - 1].tag = .result_id_assign;
}
try ass.tokens.append(gpa, tok);
if (tok.tag == .eof)
break;
offset = tok.end;
}
}
const Token = struct {
tag: Tag,
start: u32,
end: u32,
const Tag = enum {
/// Returned when there was no more input to match.
eof,
/// %identifier
result_id,
/// %identifier when appearing on the LHS of an equals sign.
/// While not technically a token, its relatively easy to resolve
/// this during lexical analysis and relieves a bunch of headaches
/// during parsing.
result_id_assign,
/// Mask, int, or float. These are grouped together as some
/// SPIR-V enumerants look a bit like integers as well (for example
/// "3D"), and so it is easier to just interpret them as the expected
/// type when resolving an instruction's operands.
value,
/// An enumerant that looks like an opcode, that is, OpXxxx.
/// Not necessarily a *valid* opcode.
opcode,
/// String literals.
/// Note, this token is also returned for unterminated
/// strings. In this case the closing " is not present.
string,
/// |.
pipe,
/// =.
equals,
/// $identifier. This is used (for now) for constant values, like integers.
/// These can be used in place of a normal `value`.
placeholder,
fn name(tag: Tag) []const u8 {
return switch (tag) {
.eof => "<end of input>",
.result_id => "<result-id>",
.result_id_assign => "<assigned result-id>",
.value => "<value>",
.opcode => "<opcode>",
.string => "<string literal>",
.pipe => "'|'",
.equals => "'='",
.placeholder => "<placeholder>",
};
}
};
};
/// Retrieve the next token from the input. This function will assert
/// that the token is surrounded by whitespace if required, but will not
/// interpret the token yet.
/// NOTE: This function doesn't handle .result_id_assign - this is handled in tokenize().
fn nextToken(ass: *Assembler, start_offset: u32) !Token {
// We generally separate the input into the following types:
// - Whitespace. Generally ignored, but also used as delimiter for some
// tokens.
// - Values. This entails integers, floats, enums - anything that
// consists of alphanumeric characters, delimited by whitespace.
// - Result-IDs. This entails anything that consists of alphanumeric characters and _, and
// starts with a %. In contrast to values, this entity can be checked for complete correctness
// relatively easily here.
// - Strings. This entails quote-delimited text such as "abc".
// SPIR-V strings have only two escapes, \" and \\.
// - Sigils, = and |. In this assembler, these are not required to have whitespace
// around them (they act as delimiters) as they do in SPIRV-Tools.
var state: enum {
start,
value,
result_id,
string,
string_end,
escape,
placeholder,
} = .start;
var token_start = start_offset;
var offset = start_offset;
var tag = Token.Tag.eof;
while (offset < ass.src.len) : (offset += 1) {
const c = ass.src[offset];
switch (state) {
.start => switch (c) {
' ', '\t', '\r', '\n' => token_start = offset + 1,
'"' => {
state = .string;
tag = .string;
},
'%' => {
state = .result_id;
tag = .result_id;
},
'|' => {
tag = .pipe;
offset += 1;
break;
},
'=' => {
tag = .equals;
offset += 1;
break;
},
'$' => {
state = .placeholder;
tag = .placeholder;
},
else => {
state = .value;
tag = .value;
},
},
.value => switch (c) {
'"' => {
try ass.addError(offset, "unexpected string literal", .{});
// The user most likely just forgot a delimiter here - keep
// the tag as value.
break;
},
' ', '\t', '\r', '\n', '=', '|' => break,
else => {},
},
.result_id, .placeholder => switch (c) {
'_', 'a'...'z', 'A'...'Z', '0'...'9' => {},
' ', '\t', '\r', '\n', '=', '|' => break,
else => {
try ass.addError(offset, "illegal character in result-id or placeholder", .{});
// Again, probably a forgotten delimiter here.
break;
},
},
.string => switch (c) {
'\\' => state = .escape,
'"' => state = .string_end,
else => {}, // Note, strings may include newlines
},
.string_end => switch (c) {
' ', '\t', '\r', '\n', '=', '|' => break,
else => {
try ass.addError(offset, "unexpected character after string literal", .{});
// The token is still unmistakibly a string.
break;
},
},
// Escapes simply skip the next char.
.escape => state = .string,
}
}
var tok: Token = .{
.tag = tag,
.start = token_start,
.end = offset,
};
switch (state) {
.string, .escape => {
try ass.addError(token_start, "unterminated string", .{});
},
.result_id => if (offset - token_start == 1) {
try ass.addError(token_start, "result-id must have at least one name character", .{});
},
.value => {
const text = ass.tokenText(tok);
const prefix = "Op";
const looks_like_opcode = text.len > prefix.len and
std.mem.startsWith(u8, text, prefix) and
std.ascii.isUpper(text[prefix.len]);
if (looks_like_opcode)
tok.tag = .opcode;
},
else => {},
}
return tok;
}