mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 05:44:20 +00:00
1090 lines
40 KiB
Zig
1090 lines
40 KiB
Zig
const std = @import("std");
|
||
const Allocator = std.mem.Allocator;
|
||
const assert = std.debug.assert;
|
||
|
||
const CodeGen = @import("CodeGen.zig");
|
||
const Decl = @import("Module.zig").Decl;
|
||
|
||
const spec = @import("spec.zig");
|
||
const Opcode = spec.Opcode;
|
||
const Word = spec.Word;
|
||
const Id = spec.Id;
|
||
const StorageClass = spec.StorageClass;
|
||
|
||
const Assembler = @This();
|
||
|
||
cg: *CodeGen,
|
||
errors: std.ArrayList(ErrorMsg) = .empty,
|
||
src: []const u8 = undefined,
|
||
/// `ass.src` tokenized.
|
||
tokens: std.ArrayList(Token) = .empty,
|
||
current_token: u32 = 0,
|
||
/// The instruction that is currently being parsed or has just been parsed.
|
||
inst: struct {
|
||
opcode: Opcode = undefined,
|
||
operands: std.ArrayList(Operand) = .empty,
|
||
string_bytes: std.ArrayList(u8) = .empty,
|
||
|
||
fn result(ass: @This()) ?AsmValue.Ref {
|
||
for (ass.operands.items[0..@min(ass.operands.items.len, 2)]) |op| {
|
||
switch (op) {
|
||
.result_id => |index| return index,
|
||
else => {},
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
} = .{},
|
||
value_map: std.StringArrayHashMapUnmanaged(AsmValue) = .{},
|
||
inst_map: std.StringArrayHashMapUnmanaged(void) = .empty,
|
||
|
||
const Operand = union(enum) {
|
||
/// Any 'simple' 32-bit value. This could be a mask or
|
||
/// enumerant, etc, depending on the operands.
|
||
value: u32,
|
||
/// An int- or float literal encoded as 1 word.
|
||
literal32: u32,
|
||
/// An int- or float literal encoded as 2 words.
|
||
literal64: u64,
|
||
/// A result-id which is assigned to in this instruction.
|
||
/// If present, this is the first operand of the instruction.
|
||
result_id: AsmValue.Ref,
|
||
/// A result-id which referred to (not assigned to) in this instruction.
|
||
ref_id: AsmValue.Ref,
|
||
/// Offset into `inst.string_bytes`. The string ends at the next zero-terminator.
|
||
string: u32,
|
||
};
|
||
|
||
pub fn deinit(ass: *Assembler) void {
|
||
const gpa = ass.cg.module.gpa;
|
||
for (ass.errors.items) |err| gpa.free(err.msg);
|
||
ass.tokens.deinit(gpa);
|
||
ass.errors.deinit(gpa);
|
||
ass.inst.operands.deinit(gpa);
|
||
ass.inst.string_bytes.deinit(gpa);
|
||
ass.value_map.deinit(gpa);
|
||
ass.inst_map.deinit(gpa);
|
||
}
|
||
|
||
const Error = error{ AssembleFail, OutOfMemory };
|
||
|
||
pub fn assemble(ass: *Assembler, src: []const u8) Error!void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
ass.src = src;
|
||
ass.errors.clearRetainingCapacity();
|
||
|
||
// Populate the opcode map if it isn't already
|
||
if (ass.inst_map.count() == 0) {
|
||
const instructions = spec.InstructionSet.core.instructions();
|
||
try ass.inst_map.ensureUnusedCapacity(gpa, @intCast(instructions.len));
|
||
for (spec.InstructionSet.core.instructions(), 0..) |inst, i| {
|
||
const entry = try ass.inst_map.getOrPut(gpa, inst.name);
|
||
assert(entry.index == i);
|
||
}
|
||
}
|
||
|
||
try ass.tokenize();
|
||
while (!ass.testToken(.eof)) {
|
||
try ass.parseInstruction();
|
||
try ass.processInstruction();
|
||
}
|
||
|
||
if (ass.errors.items.len > 0) return error.AssembleFail;
|
||
}
|
||
|
||
const ErrorMsg = struct {
|
||
/// The offset in bytes from the start of `src` that this error occured.
|
||
byte_offset: u32,
|
||
msg: []const u8,
|
||
};
|
||
|
||
fn addError(ass: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
const msg = try std.fmt.allocPrint(gpa, fmt, args);
|
||
errdefer gpa.free(msg);
|
||
try ass.errors.append(gpa, .{
|
||
.byte_offset = offset,
|
||
.msg = msg,
|
||
});
|
||
}
|
||
|
||
fn fail(ass: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) Error {
|
||
try ass.addError(offset, fmt, args);
|
||
return error.AssembleFail;
|
||
}
|
||
|
||
fn todo(ass: *Assembler, comptime fmt: []const u8, args: anytype) Error {
|
||
return ass.fail(0, "todo: " ++ fmt, args);
|
||
}
|
||
|
||
const AsmValue = union(enum) {
|
||
/// The results are stored in an array hash map, and can be referred
|
||
/// to either by name (without the %), or by values of this index type.
|
||
pub const Ref = u32;
|
||
|
||
/// The RHS of the current instruction.
|
||
just_declared,
|
||
/// A placeholder for ref-ids of which the result-id is not yet known.
|
||
/// It will be further resolved at a later stage to a more concrete forward reference.
|
||
unresolved_forward_reference,
|
||
/// A normal result produced by a different instruction.
|
||
value: Id,
|
||
/// A type registered into the module's type system.
|
||
ty: Id,
|
||
/// A pre-supplied constant integer value.
|
||
constant: u32,
|
||
string: []const u8,
|
||
|
||
/// Retrieve the result-id of this AsmValue. Asserts that this AsmValue
|
||
/// is of a variant that allows the result to be obtained (not an unresolved
|
||
/// forward declaration, not in the process of being declared, etc).
|
||
pub fn resultId(value: AsmValue) Id {
|
||
return switch (value) {
|
||
.just_declared,
|
||
.unresolved_forward_reference,
|
||
// TODO: Lower this value as constant?
|
||
.constant,
|
||
.string,
|
||
=> unreachable,
|
||
.value => |result| result,
|
||
.ty => |result| result,
|
||
};
|
||
}
|
||
};
|
||
|
||
/// Attempt to process the instruction currently in `ass.inst`.
|
||
/// This for example emits the instruction in the module or function, or
|
||
/// records type definitions.
|
||
/// If this function returns `error.AssembleFail`, an explanatory
|
||
/// error message has already been emitted into `ass.errors`.
|
||
fn processInstruction(ass: *Assembler) !void {
|
||
const module = ass.cg.module;
|
||
const result: AsmValue = switch (ass.inst.opcode) {
|
||
.OpEntryPoint => {
|
||
return ass.fail(ass.currentToken().start, "cannot export entry points in assembly", .{});
|
||
},
|
||
.OpExecutionMode, .OpExecutionModeId => {
|
||
return ass.fail(ass.currentToken().start, "cannot set execution mode in assembly", .{});
|
||
},
|
||
.OpCapability => {
|
||
try module.addCapability(@enumFromInt(ass.inst.operands.items[0].value));
|
||
return;
|
||
},
|
||
.OpExtension => {
|
||
const ext_name_offset = ass.inst.operands.items[0].string;
|
||
const ext_name = std.mem.sliceTo(ass.inst.string_bytes.items[ext_name_offset..], 0);
|
||
try module.addExtension(ext_name);
|
||
return;
|
||
},
|
||
.OpExtInstImport => blk: {
|
||
const set_name_offset = ass.inst.operands.items[1].string;
|
||
const set_name = std.mem.sliceTo(ass.inst.string_bytes.items[set_name_offset..], 0);
|
||
const set_tag = std.meta.stringToEnum(spec.InstructionSet, set_name) orelse {
|
||
return ass.fail(set_name_offset, "unknown instruction set: {s}", .{set_name});
|
||
};
|
||
break :blk .{ .value = try module.importInstructionSet(set_tag) };
|
||
},
|
||
else => switch (ass.inst.opcode.class()) {
|
||
.type_declaration => try ass.processTypeInstruction(),
|
||
else => (try ass.processGenericInstruction()) orelse return,
|
||
},
|
||
};
|
||
|
||
const result_ref = ass.inst.result().?;
|
||
switch (ass.value_map.values()[result_ref]) {
|
||
.just_declared => ass.value_map.values()[result_ref] = result,
|
||
else => {
|
||
// TODO: Improve source location.
|
||
const name = ass.value_map.keys()[result_ref];
|
||
return ass.fail(0, "duplicate definition of %{s}", .{name});
|
||
},
|
||
}
|
||
}
|
||
|
||
fn processTypeInstruction(ass: *Assembler) !AsmValue {
|
||
const cg = ass.cg;
|
||
const gpa = cg.module.gpa;
|
||
const module = cg.module;
|
||
const operands = ass.inst.operands.items;
|
||
const section = &module.sections.globals;
|
||
const id = switch (ass.inst.opcode) {
|
||
.OpTypeVoid => try module.voidType(),
|
||
.OpTypeBool => try module.boolType(),
|
||
.OpTypeInt => blk: {
|
||
const signedness: std.builtin.Signedness = switch (operands[2].literal32) {
|
||
0 => .unsigned,
|
||
1 => .signed,
|
||
else => {
|
||
// TODO: Improve source location.
|
||
return ass.fail(0, "{} is not a valid signedness (expected 0 or 1)", .{operands[2].literal32});
|
||
},
|
||
};
|
||
const width = std.math.cast(u16, operands[1].literal32) orelse {
|
||
return ass.fail(0, "int type of {} bits is too large", .{operands[1].literal32});
|
||
};
|
||
break :blk try module.intType(signedness, width);
|
||
},
|
||
.OpTypeFloat => blk: {
|
||
const bits = operands[1].literal32;
|
||
switch (bits) {
|
||
16, 32, 64 => {},
|
||
else => {
|
||
return ass.fail(0, "{} is not a valid bit count for floats (expected 16, 32 or 64)", .{bits});
|
||
},
|
||
}
|
||
break :blk try module.floatType(@intCast(bits));
|
||
},
|
||
.OpTypeVector => blk: {
|
||
const child_type = try ass.resolveRefId(operands[1].ref_id);
|
||
break :blk try module.vectorType(operands[2].literal32, child_type);
|
||
},
|
||
.OpTypeArray => {
|
||
// TODO: The length of an OpTypeArray is determined by a constant (which may be a spec constant),
|
||
// and so some consideration must be taken when entering this in the type system.
|
||
return ass.todo("process OpTypeArray", .{});
|
||
},
|
||
.OpTypeRuntimeArray => blk: {
|
||
const element_type = try ass.resolveRefId(operands[1].ref_id);
|
||
const result_id = module.allocId();
|
||
try section.emit(module.gpa, .OpTypeRuntimeArray, .{
|
||
.id_result = result_id,
|
||
.element_type = element_type,
|
||
});
|
||
break :blk result_id;
|
||
},
|
||
.OpTypePointer => blk: {
|
||
const storage_class: StorageClass = @enumFromInt(operands[1].value);
|
||
const child_type = try ass.resolveRefId(operands[2].ref_id);
|
||
const result_id = module.allocId();
|
||
try section.emit(module.gpa, .OpTypePointer, .{
|
||
.id_result = result_id,
|
||
.storage_class = storage_class,
|
||
.type = child_type,
|
||
});
|
||
break :blk result_id;
|
||
},
|
||
.OpTypeStruct => blk: {
|
||
const scratch_top = cg.id_scratch.items.len;
|
||
defer cg.id_scratch.shrinkRetainingCapacity(scratch_top);
|
||
const ids = try cg.id_scratch.addManyAsSlice(gpa, operands[1..].len);
|
||
for (operands[1..], ids) |op, *id| id.* = try ass.resolveRefId(op.ref_id);
|
||
break :blk try module.structType(ids, null, null, .none);
|
||
},
|
||
.OpTypeImage => blk: {
|
||
const sampled_type = try ass.resolveRefId(operands[1].ref_id);
|
||
const result_id = module.allocId();
|
||
try section.emit(gpa, .OpTypeImage, .{
|
||
.id_result = result_id,
|
||
.sampled_type = sampled_type,
|
||
.dim = @enumFromInt(operands[2].value),
|
||
.depth = operands[3].literal32,
|
||
.arrayed = operands[4].literal32,
|
||
.ms = operands[5].literal32,
|
||
.sampled = operands[6].literal32,
|
||
.image_format = @enumFromInt(operands[7].value),
|
||
});
|
||
break :blk result_id;
|
||
},
|
||
.OpTypeSampler => blk: {
|
||
const result_id = module.allocId();
|
||
try section.emit(gpa, .OpTypeSampler, .{ .id_result = result_id });
|
||
break :blk result_id;
|
||
},
|
||
.OpTypeSampledImage => blk: {
|
||
const image_type = try ass.resolveRefId(operands[1].ref_id);
|
||
const result_id = module.allocId();
|
||
try section.emit(gpa, .OpTypeSampledImage, .{ .id_result = result_id, .image_type = image_type });
|
||
break :blk result_id;
|
||
},
|
||
.OpTypeFunction => blk: {
|
||
const param_operands = operands[2..];
|
||
const return_type = try ass.resolveRefId(operands[1].ref_id);
|
||
|
||
const scratch_top = cg.id_scratch.items.len;
|
||
defer cg.id_scratch.shrinkRetainingCapacity(scratch_top);
|
||
const param_types = try cg.id_scratch.addManyAsSlice(gpa, param_operands.len);
|
||
|
||
for (param_types, param_operands) |*param, operand| {
|
||
param.* = try ass.resolveRefId(operand.ref_id);
|
||
}
|
||
const result_id = module.allocId();
|
||
try section.emit(module.gpa, .OpTypeFunction, .{
|
||
.id_result = result_id,
|
||
.return_type = return_type,
|
||
.id_ref_2 = param_types,
|
||
});
|
||
break :blk result_id;
|
||
},
|
||
else => return ass.todo("process type instruction {s}", .{@tagName(ass.inst.opcode)}),
|
||
};
|
||
|
||
return .{ .ty = id };
|
||
}
|
||
|
||
/// - No forward references are allowed in operands.
|
||
/// - Target section is determined from instruction type.
|
||
fn processGenericInstruction(ass: *Assembler) !?AsmValue {
|
||
const module = ass.cg.module;
|
||
const target = module.zcu.getTarget();
|
||
const operands = ass.inst.operands.items;
|
||
var maybe_spv_decl_index: ?Decl.Index = null;
|
||
const section = switch (ass.inst.opcode.class()) {
|
||
.constant_creation => &module.sections.globals,
|
||
.annotation => &module.sections.annotations,
|
||
.type_declaration => unreachable, // Handled elsewhere.
|
||
else => switch (ass.inst.opcode) {
|
||
.OpEntryPoint => unreachable,
|
||
.OpExecutionMode, .OpExecutionModeId => &module.sections.execution_modes,
|
||
.OpVariable => section: {
|
||
const storage_class: spec.StorageClass = @enumFromInt(operands[2].value);
|
||
if (storage_class == .function) break :section &ass.cg.prologue;
|
||
maybe_spv_decl_index = try module.allocDecl(.global);
|
||
if (!target.cpu.has(.spirv, .v1_4) and storage_class != .input and storage_class != .output) {
|
||
// Before version 1.4, the interface’s storage classes are limited to the Input and Output
|
||
break :section &module.sections.globals;
|
||
}
|
||
try ass.cg.module.decl_deps.append(module.gpa, maybe_spv_decl_index.?);
|
||
break :section &module.sections.globals;
|
||
},
|
||
else => &ass.cg.body,
|
||
},
|
||
};
|
||
|
||
var maybe_result_id: ?Id = null;
|
||
const first_word = section.instructions.items.len;
|
||
// At this point we're not quite sure how many operands this instruction is
|
||
// going to have, so insert 0 and patch up the actual opcode word later.
|
||
try section.ensureUnusedCapacity(module.gpa, 1);
|
||
section.writeWord(0);
|
||
|
||
for (operands) |operand| {
|
||
switch (operand) {
|
||
.value, .literal32 => |word| {
|
||
try section.ensureUnusedCapacity(module.gpa, 1);
|
||
section.writeWord(word);
|
||
},
|
||
.literal64 => |dword| {
|
||
try section.ensureUnusedCapacity(module.gpa, 2);
|
||
section.writeDoubleWord(dword);
|
||
},
|
||
.result_id => {
|
||
maybe_result_id = if (maybe_spv_decl_index) |spv_decl_index|
|
||
module.declPtr(spv_decl_index).result_id
|
||
else
|
||
module.allocId();
|
||
try section.ensureUnusedCapacity(module.gpa, 1);
|
||
section.writeOperand(Id, maybe_result_id.?);
|
||
},
|
||
.ref_id => |index| {
|
||
const result = try ass.resolveRef(index);
|
||
try section.ensureUnusedCapacity(module.gpa, 1);
|
||
section.writeOperand(spec.Id, result.resultId());
|
||
},
|
||
.string => |offset| {
|
||
const text = std.mem.sliceTo(ass.inst.string_bytes.items[offset..], 0);
|
||
const size = std.math.divCeil(usize, text.len + 1, @sizeOf(Word)) catch unreachable;
|
||
try section.ensureUnusedCapacity(module.gpa, size);
|
||
section.writeOperand(spec.LiteralString, text);
|
||
},
|
||
}
|
||
}
|
||
|
||
const actual_word_count = section.instructions.items.len - first_word;
|
||
section.instructions.items[first_word] |= @as(u32, @as(u16, @intCast(actual_word_count))) << 16 | @intFromEnum(ass.inst.opcode);
|
||
|
||
if (maybe_result_id) |result| return .{ .value = result };
|
||
return null;
|
||
}
|
||
|
||
fn resolveMaybeForwardRef(ass: *Assembler, ref: AsmValue.Ref) !AsmValue {
|
||
const value = ass.value_map.values()[ref];
|
||
switch (value) {
|
||
.just_declared => {
|
||
const name = ass.value_map.keys()[ref];
|
||
// TODO: Improve source location.
|
||
return ass.fail(0, "ass-referential parameter %{s}", .{name});
|
||
},
|
||
else => return value,
|
||
}
|
||
}
|
||
|
||
fn resolveRef(ass: *Assembler, ref: AsmValue.Ref) !AsmValue {
|
||
const value = try ass.resolveMaybeForwardRef(ref);
|
||
switch (value) {
|
||
.just_declared => unreachable,
|
||
.unresolved_forward_reference => {
|
||
const name = ass.value_map.keys()[ref];
|
||
// TODO: Improve source location.
|
||
return ass.fail(0, "reference to undeclared result-id %{s}", .{name});
|
||
},
|
||
else => return value,
|
||
}
|
||
}
|
||
|
||
fn resolveRefId(ass: *Assembler, ref: AsmValue.Ref) !Id {
|
||
const value = try ass.resolveRef(ref);
|
||
return value.resultId();
|
||
}
|
||
|
||
fn parseInstruction(ass: *Assembler) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
ass.inst.opcode = undefined;
|
||
ass.inst.operands.clearRetainingCapacity();
|
||
ass.inst.string_bytes.clearRetainingCapacity();
|
||
|
||
const lhs_result_tok = ass.currentToken();
|
||
const maybe_lhs_result: ?AsmValue.Ref = if (ass.eatToken(.result_id_assign)) blk: {
|
||
const name = ass.tokenText(lhs_result_tok)[1..];
|
||
const entry = try ass.value_map.getOrPut(gpa, name);
|
||
try ass.expectToken(.equals);
|
||
if (!entry.found_existing) {
|
||
entry.value_ptr.* = .just_declared;
|
||
}
|
||
break :blk @intCast(entry.index);
|
||
} else null;
|
||
|
||
const opcode_tok = ass.currentToken();
|
||
if (maybe_lhs_result != null) {
|
||
try ass.expectToken(.opcode);
|
||
} else if (!ass.eatToken(.opcode)) {
|
||
return ass.fail(opcode_tok.start, "expected start of instruction, found {s}", .{opcode_tok.tag.name()});
|
||
}
|
||
|
||
const opcode_text = ass.tokenText(opcode_tok);
|
||
const index = ass.inst_map.getIndex(opcode_text) orelse {
|
||
return ass.fail(opcode_tok.start, "invalid opcode '{s}'", .{opcode_text});
|
||
};
|
||
|
||
const inst = spec.InstructionSet.core.instructions()[index];
|
||
ass.inst.opcode = @enumFromInt(inst.opcode);
|
||
|
||
const expected_operands = inst.operands;
|
||
// This is a loop because the result-id is not always the first operand.
|
||
const requires_lhs_result = for (expected_operands) |op| {
|
||
if (op.kind == .id_result) break true;
|
||
} else false;
|
||
|
||
if (requires_lhs_result and maybe_lhs_result == null) {
|
||
return ass.fail(opcode_tok.start, "opcode '{s}' expects result on left-hand side", .{@tagName(ass.inst.opcode)});
|
||
} else if (!requires_lhs_result and maybe_lhs_result != null) {
|
||
return ass.fail(
|
||
lhs_result_tok.start,
|
||
"opcode '{s}' does not expect a result-id on the left-hand side",
|
||
.{@tagName(ass.inst.opcode)},
|
||
);
|
||
}
|
||
|
||
for (expected_operands) |operand| {
|
||
if (operand.kind == .id_result) {
|
||
try ass.inst.operands.append(gpa, .{ .result_id = maybe_lhs_result.? });
|
||
continue;
|
||
}
|
||
|
||
switch (operand.quantifier) {
|
||
.required => if (ass.isAtInstructionBoundary()) {
|
||
return ass.fail(
|
||
ass.currentToken().start,
|
||
"missing required operand", // TODO: Operand name?
|
||
.{},
|
||
);
|
||
} else {
|
||
try ass.parseOperand(operand.kind);
|
||
},
|
||
.optional => if (!ass.isAtInstructionBoundary()) {
|
||
try ass.parseOperand(operand.kind);
|
||
},
|
||
.variadic => while (!ass.isAtInstructionBoundary()) {
|
||
try ass.parseOperand(operand.kind);
|
||
},
|
||
}
|
||
}
|
||
}
|
||
|
||
fn parseOperand(ass: *Assembler, kind: spec.OperandKind) Error!void {
|
||
switch (kind.category()) {
|
||
.bit_enum => try ass.parseBitEnum(kind),
|
||
.value_enum => try ass.parseValueEnum(kind),
|
||
.id => try ass.parseRefId(),
|
||
else => switch (kind) {
|
||
.literal_integer => try ass.parseLiteralInteger(),
|
||
.literal_string => try ass.parseString(),
|
||
.literal_context_dependent_number => try ass.parseContextDependentNumber(),
|
||
.literal_ext_inst_integer => try ass.parseLiteralExtInstInteger(),
|
||
.pair_id_ref_id_ref => try ass.parsePhiSource(),
|
||
else => return ass.todo("parse operand of type {s}", .{@tagName(kind)}),
|
||
},
|
||
}
|
||
}
|
||
|
||
/// Also handles parsing any required extra operands.
|
||
fn parseBitEnum(ass: *Assembler, kind: spec.OperandKind) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
var tok = ass.currentToken();
|
||
try ass.expectToken(.value);
|
||
|
||
var text = ass.tokenText(tok);
|
||
if (std.mem.eql(u8, text, "None")) {
|
||
try ass.inst.operands.append(gpa, .{ .value = 0 });
|
||
return;
|
||
}
|
||
|
||
const enumerants = kind.enumerants();
|
||
var mask: u32 = 0;
|
||
while (true) {
|
||
const enumerant = for (enumerants) |enumerant| {
|
||
if (std.mem.eql(u8, enumerant.name, text))
|
||
break enumerant;
|
||
} else {
|
||
return ass.fail(tok.start, "'{s}' is not a valid flag for bitmask {s}", .{ text, @tagName(kind) });
|
||
};
|
||
mask |= enumerant.value;
|
||
if (!ass.eatToken(.pipe))
|
||
break;
|
||
|
||
tok = ass.currentToken();
|
||
try ass.expectToken(.value);
|
||
text = ass.tokenText(tok);
|
||
}
|
||
|
||
try ass.inst.operands.append(gpa, .{ .value = mask });
|
||
|
||
// Assume values are sorted.
|
||
// TODO: ensure in generator.
|
||
for (enumerants) |enumerant| {
|
||
if ((mask & enumerant.value) == 0)
|
||
continue;
|
||
|
||
for (enumerant.parameters) |param_kind| {
|
||
if (ass.isAtInstructionBoundary()) {
|
||
return ass.fail(ass.currentToken().start, "missing required parameter for bit flag '{s}'", .{enumerant.name});
|
||
}
|
||
|
||
try ass.parseOperand(param_kind);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Also handles parsing any required extra operands.
|
||
fn parseValueEnum(ass: *Assembler, kind: spec.OperandKind) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
const tok = ass.currentToken();
|
||
if (ass.eatToken(.placeholder)) {
|
||
const name = ass.tokenText(tok)[1..];
|
||
const value = ass.value_map.get(name) orelse {
|
||
return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
|
||
};
|
||
switch (value) {
|
||
.constant => |literal32| {
|
||
try ass.inst.operands.append(gpa, .{ .value = literal32 });
|
||
},
|
||
.string => |str| {
|
||
const enumerant = for (kind.enumerants()) |enumerant| {
|
||
if (std.mem.eql(u8, enumerant.name, str)) break enumerant;
|
||
} else {
|
||
return ass.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ str, @tagName(kind) });
|
||
};
|
||
try ass.inst.operands.append(gpa, .{ .value = enumerant.value });
|
||
},
|
||
else => return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name}),
|
||
}
|
||
return;
|
||
}
|
||
|
||
try ass.expectToken(.value);
|
||
|
||
const text = ass.tokenText(tok);
|
||
const int_value = std.fmt.parseInt(u32, text, 0) catch null;
|
||
const enumerant = for (kind.enumerants()) |enumerant| {
|
||
if (int_value) |v| {
|
||
if (v == enumerant.value) break enumerant;
|
||
} else {
|
||
if (std.mem.eql(u8, enumerant.name, text)) break enumerant;
|
||
}
|
||
} else {
|
||
return ass.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ text, @tagName(kind) });
|
||
};
|
||
|
||
try ass.inst.operands.append(gpa, .{ .value = enumerant.value });
|
||
|
||
for (enumerant.parameters) |param_kind| {
|
||
if (ass.isAtInstructionBoundary()) {
|
||
return ass.fail(ass.currentToken().start, "missing required parameter for enum variant '{s}'", .{enumerant.name});
|
||
}
|
||
|
||
try ass.parseOperand(param_kind);
|
||
}
|
||
}
|
||
|
||
fn parseRefId(ass: *Assembler) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
const tok = ass.currentToken();
|
||
try ass.expectToken(.result_id);
|
||
|
||
const name = ass.tokenText(tok)[1..];
|
||
const entry = try ass.value_map.getOrPut(gpa, name);
|
||
if (!entry.found_existing) {
|
||
entry.value_ptr.* = .unresolved_forward_reference;
|
||
}
|
||
|
||
const index: AsmValue.Ref = @intCast(entry.index);
|
||
try ass.inst.operands.append(gpa, .{ .ref_id = index });
|
||
}
|
||
|
||
fn parseLiteralInteger(ass: *Assembler) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
const tok = ass.currentToken();
|
||
if (ass.eatToken(.placeholder)) {
|
||
const name = ass.tokenText(tok)[1..];
|
||
const value = ass.value_map.get(name) orelse {
|
||
return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
|
||
};
|
||
switch (value) {
|
||
.constant => |literal32| {
|
||
try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
|
||
},
|
||
else => {
|
||
return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
|
||
},
|
||
}
|
||
return;
|
||
}
|
||
|
||
try ass.expectToken(.value);
|
||
// According to the SPIR-V machine readable grammar, a LiteralInteger
|
||
// may consist of one or more words. From the SPIR-V docs it seems like there
|
||
// only one instruction where multiple words are allowed, the literals that make up the
|
||
// switch cases of OpSwitch. This case is handled separately, and so we just assume
|
||
// everything is a 32-bit integer in this function.
|
||
const text = ass.tokenText(tok);
|
||
const value = std.fmt.parseInt(u32, text, 0) catch {
|
||
return ass.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
|
||
};
|
||
try ass.inst.operands.append(gpa, .{ .literal32 = value });
|
||
}
|
||
|
||
fn parseLiteralExtInstInteger(ass: *Assembler) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
const tok = ass.currentToken();
|
||
if (ass.eatToken(.placeholder)) {
|
||
const name = ass.tokenText(tok)[1..];
|
||
const value = ass.value_map.get(name) orelse {
|
||
return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
|
||
};
|
||
switch (value) {
|
||
.constant => |literal32| {
|
||
try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
|
||
},
|
||
else => {
|
||
return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
|
||
},
|
||
}
|
||
return;
|
||
}
|
||
|
||
try ass.expectToken(.value);
|
||
const text = ass.tokenText(tok);
|
||
const value = std.fmt.parseInt(u32, text, 0) catch {
|
||
return ass.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
|
||
};
|
||
try ass.inst.operands.append(gpa, .{ .literal32 = value });
|
||
}
|
||
|
||
fn parseString(ass: *Assembler) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
const tok = ass.currentToken();
|
||
try ass.expectToken(.string);
|
||
// Note, the string might not have a closing quote. In this case,
|
||
// an error is already emitted but we are trying to continue processing
|
||
// anyway, so in this function we have to deal with that situation.
|
||
const text = ass.tokenText(tok);
|
||
assert(text.len > 0 and text[0] == '"');
|
||
const literal = if (text.len != 1 and text[text.len - 1] == '"')
|
||
text[1 .. text.len - 1]
|
||
else
|
||
text[1..];
|
||
|
||
const string_offset: u32 = @intCast(ass.inst.string_bytes.items.len);
|
||
try ass.inst.string_bytes.ensureUnusedCapacity(gpa, literal.len + 1);
|
||
ass.inst.string_bytes.appendSliceAssumeCapacity(literal);
|
||
ass.inst.string_bytes.appendAssumeCapacity(0);
|
||
|
||
try ass.inst.operands.append(gpa, .{ .string = string_offset });
|
||
}
|
||
|
||
fn parseContextDependentNumber(ass: *Assembler) !void {
|
||
const module = ass.cg.module;
|
||
|
||
// For context dependent numbers, the actual type to parse is determined by the instruction.
|
||
// Currently, this operand appears in OpConstant and OpSpecConstant, where the too-be-parsed type
|
||
// is determined by the result type. That means that in this instructions we have to resolve the
|
||
// operand type early and look at the result to see how we need to proceed.
|
||
assert(ass.inst.opcode == .OpConstant or ass.inst.opcode == .OpSpecConstant);
|
||
|
||
const tok = ass.currentToken();
|
||
const result = try ass.resolveRef(ass.inst.operands.items[0].ref_id);
|
||
const result_id = result.resultId();
|
||
// We are going to cheat a little bit: The types we are interested in, int and float,
|
||
// are added to the module and cached via module.intType and module.floatType. Therefore,
|
||
// we can determine the width of these types by directly checking the cache.
|
||
// This only works if the Assembler and codegen both use spv.intType and spv.floatType though.
|
||
// We don't expect there to be many of these types, so just look it up every time.
|
||
// TODO: Count be improved to be a little bit more efficent.
|
||
|
||
{
|
||
var it = module.cache.int_types.iterator();
|
||
while (it.next()) |entry| {
|
||
const id = entry.value_ptr.*;
|
||
if (id != result_id) continue;
|
||
const info = entry.key_ptr.*;
|
||
return try ass.parseContextDependentInt(info.signedness, info.bits);
|
||
}
|
||
}
|
||
|
||
{
|
||
var it = module.cache.float_types.iterator();
|
||
while (it.next()) |entry| {
|
||
const id = entry.value_ptr.*;
|
||
if (id != result_id) continue;
|
||
const info = entry.key_ptr.*;
|
||
switch (info.bits) {
|
||
16 => try ass.parseContextDependentFloat(16),
|
||
32 => try ass.parseContextDependentFloat(32),
|
||
64 => try ass.parseContextDependentFloat(64),
|
||
else => return ass.fail(tok.start, "cannot parse {}-bit info literal", .{info.bits}),
|
||
}
|
||
}
|
||
}
|
||
|
||
return ass.fail(tok.start, "cannot parse literal constant", .{});
|
||
}
|
||
|
||
fn parseContextDependentInt(ass: *Assembler, signedness: std.builtin.Signedness, width: u32) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
const tok = ass.currentToken();
|
||
if (ass.eatToken(.placeholder)) {
|
||
const name = ass.tokenText(tok)[1..];
|
||
const value = ass.value_map.get(name) orelse {
|
||
return ass.fail(tok.start, "invalid placeholder '${s}'", .{name});
|
||
};
|
||
switch (value) {
|
||
.constant => |literal32| {
|
||
try ass.inst.operands.append(gpa, .{ .literal32 = literal32 });
|
||
},
|
||
else => {
|
||
return ass.fail(tok.start, "value '{s}' cannot be used as placeholder", .{name});
|
||
},
|
||
}
|
||
return;
|
||
}
|
||
|
||
try ass.expectToken(.value);
|
||
|
||
if (width == 0 or width > 2 * @bitSizeOf(spec.Word)) {
|
||
return ass.fail(tok.start, "cannot parse {}-bit integer literal", .{width});
|
||
}
|
||
|
||
const text = ass.tokenText(tok);
|
||
invalid: {
|
||
// Just parse the integer as the next larger integer type, and check if it overflows afterwards.
|
||
const int = std.fmt.parseInt(i128, text, 0) catch break :invalid;
|
||
const min = switch (signedness) {
|
||
.unsigned => 0,
|
||
.signed => -(@as(i128, 1) << (@as(u7, @intCast(width)) - 1)),
|
||
};
|
||
const max = (@as(i128, 1) << (@as(u7, @intCast(width)) - @intFromBool(signedness == .signed))) - 1;
|
||
if (int < min or int > max) {
|
||
break :invalid;
|
||
}
|
||
|
||
// Note, we store the sign-extended version here.
|
||
if (width <= @bitSizeOf(spec.Word)) {
|
||
try ass.inst.operands.append(gpa, .{ .literal32 = @truncate(@as(u128, @bitCast(int))) });
|
||
} else {
|
||
try ass.inst.operands.append(gpa, .{ .literal64 = @truncate(@as(u128, @bitCast(int))) });
|
||
}
|
||
return;
|
||
}
|
||
|
||
return ass.fail(tok.start, "'{s}' is not a valid {s} {}-bit int literal", .{ text, @tagName(signedness), width });
|
||
}
|
||
|
||
fn parseContextDependentFloat(ass: *Assembler, comptime width: u16) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
const Float = std.meta.Float(width);
|
||
const Int = std.meta.Int(.unsigned, width);
|
||
|
||
const tok = ass.currentToken();
|
||
try ass.expectToken(.value);
|
||
|
||
const text = ass.tokenText(tok);
|
||
|
||
const value = std.fmt.parseFloat(Float, text) catch {
|
||
return ass.fail(tok.start, "'{s}' is not a valid {}-bit float literal", .{ text, width });
|
||
};
|
||
|
||
const float_bits: Int = @bitCast(value);
|
||
if (width <= @bitSizeOf(spec.Word)) {
|
||
try ass.inst.operands.append(gpa, .{ .literal32 = float_bits });
|
||
} else {
|
||
assert(width <= 2 * @bitSizeOf(spec.Word));
|
||
try ass.inst.operands.append(gpa, .{ .literal64 = float_bits });
|
||
}
|
||
}
|
||
|
||
fn parsePhiSource(ass: *Assembler) !void {
|
||
try ass.parseRefId();
|
||
if (ass.isAtInstructionBoundary()) {
|
||
return ass.fail(ass.currentToken().start, "missing phi block parent", .{});
|
||
}
|
||
try ass.parseRefId();
|
||
}
|
||
|
||
/// Returns whether the `current_token` cursor
|
||
/// is currently pointing at the start of a new instruction.
|
||
fn isAtInstructionBoundary(ass: Assembler) bool {
|
||
return switch (ass.currentToken().tag) {
|
||
.opcode, .result_id_assign, .eof => true,
|
||
else => false,
|
||
};
|
||
}
|
||
|
||
fn expectToken(ass: *Assembler, tag: Token.Tag) !void {
|
||
if (ass.eatToken(tag))
|
||
return;
|
||
|
||
return ass.fail(ass.currentToken().start, "unexpected {s}, expected {s}", .{
|
||
ass.currentToken().tag.name(),
|
||
tag.name(),
|
||
});
|
||
}
|
||
|
||
fn eatToken(ass: *Assembler, tag: Token.Tag) bool {
|
||
if (ass.testToken(tag)) {
|
||
ass.current_token += 1;
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn testToken(ass: Assembler, tag: Token.Tag) bool {
|
||
return ass.currentToken().tag == tag;
|
||
}
|
||
|
||
fn currentToken(ass: Assembler) Token {
|
||
return ass.tokens.items[ass.current_token];
|
||
}
|
||
|
||
fn tokenText(ass: Assembler, tok: Token) []const u8 {
|
||
return ass.src[tok.start..tok.end];
|
||
}
|
||
|
||
/// Tokenize `ass.src` and put the tokens in `ass.tokens`.
|
||
/// Any errors encountered are appended to `ass.errors`.
|
||
fn tokenize(ass: *Assembler) !void {
|
||
const gpa = ass.cg.module.gpa;
|
||
|
||
ass.tokens.clearRetainingCapacity();
|
||
|
||
var offset: u32 = 0;
|
||
while (true) {
|
||
const tok = try ass.nextToken(offset);
|
||
// Resolve result-id assignment now.
|
||
// NOTE: If the previous token wasn't a result-id, just ignore it,
|
||
// we will catch it while parsing.
|
||
if (tok.tag == .equals and ass.tokens.items[ass.tokens.items.len - 1].tag == .result_id) {
|
||
ass.tokens.items[ass.tokens.items.len - 1].tag = .result_id_assign;
|
||
}
|
||
try ass.tokens.append(gpa, tok);
|
||
if (tok.tag == .eof)
|
||
break;
|
||
offset = tok.end;
|
||
}
|
||
}
|
||
|
||
const Token = struct {
|
||
tag: Tag,
|
||
start: u32,
|
||
end: u32,
|
||
|
||
const Tag = enum {
|
||
/// Returned when there was no more input to match.
|
||
eof,
|
||
/// %identifier
|
||
result_id,
|
||
/// %identifier when appearing on the LHS of an equals sign.
|
||
/// While not technically a token, its relatively easy to resolve
|
||
/// this during lexical analysis and relieves a bunch of headaches
|
||
/// during parsing.
|
||
result_id_assign,
|
||
/// Mask, int, or float. These are grouped together as some
|
||
/// SPIR-V enumerants look a bit like integers as well (for example
|
||
/// "3D"), and so it is easier to just interpret them as the expected
|
||
/// type when resolving an instruction's operands.
|
||
value,
|
||
/// An enumerant that looks like an opcode, that is, OpXxxx.
|
||
/// Not necessarily a *valid* opcode.
|
||
opcode,
|
||
/// String literals.
|
||
/// Note, this token is also returned for unterminated
|
||
/// strings. In this case the closing " is not present.
|
||
string,
|
||
/// |.
|
||
pipe,
|
||
/// =.
|
||
equals,
|
||
/// $identifier. This is used (for now) for constant values, like integers.
|
||
/// These can be used in place of a normal `value`.
|
||
placeholder,
|
||
|
||
fn name(tag: Tag) []const u8 {
|
||
return switch (tag) {
|
||
.eof => "<end of input>",
|
||
.result_id => "<result-id>",
|
||
.result_id_assign => "<assigned result-id>",
|
||
.value => "<value>",
|
||
.opcode => "<opcode>",
|
||
.string => "<string literal>",
|
||
.pipe => "'|'",
|
||
.equals => "'='",
|
||
.placeholder => "<placeholder>",
|
||
};
|
||
}
|
||
};
|
||
};
|
||
|
||
/// Retrieve the next token from the input. This function will assert
|
||
/// that the token is surrounded by whitespace if required, but will not
|
||
/// interpret the token yet.
|
||
/// NOTE: This function doesn't handle .result_id_assign - this is handled in tokenize().
|
||
fn nextToken(ass: *Assembler, start_offset: u32) !Token {
|
||
// We generally separate the input into the following types:
|
||
// - Whitespace. Generally ignored, but also used as delimiter for some
|
||
// tokens.
|
||
// - Values. This entails integers, floats, enums - anything that
|
||
// consists of alphanumeric characters, delimited by whitespace.
|
||
// - Result-IDs. This entails anything that consists of alphanumeric characters and _, and
|
||
// starts with a %. In contrast to values, this entity can be checked for complete correctness
|
||
// relatively easily here.
|
||
// - Strings. This entails quote-delimited text such as "abc".
|
||
// SPIR-V strings have only two escapes, \" and \\.
|
||
// - Sigils, = and |. In this assembler, these are not required to have whitespace
|
||
// around them (they act as delimiters) as they do in SPIRV-Tools.
|
||
|
||
var state: enum {
|
||
start,
|
||
value,
|
||
result_id,
|
||
string,
|
||
string_end,
|
||
escape,
|
||
placeholder,
|
||
} = .start;
|
||
var token_start = start_offset;
|
||
var offset = start_offset;
|
||
var tag = Token.Tag.eof;
|
||
while (offset < ass.src.len) : (offset += 1) {
|
||
const c = ass.src[offset];
|
||
switch (state) {
|
||
.start => switch (c) {
|
||
' ', '\t', '\r', '\n' => token_start = offset + 1,
|
||
'"' => {
|
||
state = .string;
|
||
tag = .string;
|
||
},
|
||
'%' => {
|
||
state = .result_id;
|
||
tag = .result_id;
|
||
},
|
||
'|' => {
|
||
tag = .pipe;
|
||
offset += 1;
|
||
break;
|
||
},
|
||
'=' => {
|
||
tag = .equals;
|
||
offset += 1;
|
||
break;
|
||
},
|
||
'$' => {
|
||
state = .placeholder;
|
||
tag = .placeholder;
|
||
},
|
||
else => {
|
||
state = .value;
|
||
tag = .value;
|
||
},
|
||
},
|
||
.value => switch (c) {
|
||
'"' => {
|
||
try ass.addError(offset, "unexpected string literal", .{});
|
||
// The user most likely just forgot a delimiter here - keep
|
||
// the tag as value.
|
||
break;
|
||
},
|
||
' ', '\t', '\r', '\n', '=', '|' => break,
|
||
else => {},
|
||
},
|
||
.result_id, .placeholder => switch (c) {
|
||
'_', 'a'...'z', 'A'...'Z', '0'...'9' => {},
|
||
' ', '\t', '\r', '\n', '=', '|' => break,
|
||
else => {
|
||
try ass.addError(offset, "illegal character in result-id or placeholder", .{});
|
||
// Again, probably a forgotten delimiter here.
|
||
break;
|
||
},
|
||
},
|
||
.string => switch (c) {
|
||
'\\' => state = .escape,
|
||
'"' => state = .string_end,
|
||
else => {}, // Note, strings may include newlines
|
||
},
|
||
.string_end => switch (c) {
|
||
' ', '\t', '\r', '\n', '=', '|' => break,
|
||
else => {
|
||
try ass.addError(offset, "unexpected character after string literal", .{});
|
||
// The token is still unmistakibly a string.
|
||
break;
|
||
},
|
||
},
|
||
// Escapes simply skip the next char.
|
||
.escape => state = .string,
|
||
}
|
||
}
|
||
|
||
var tok: Token = .{
|
||
.tag = tag,
|
||
.start = token_start,
|
||
.end = offset,
|
||
};
|
||
|
||
switch (state) {
|
||
.string, .escape => {
|
||
try ass.addError(token_start, "unterminated string", .{});
|
||
},
|
||
.result_id => if (offset - token_start == 1) {
|
||
try ass.addError(token_start, "result-id must have at least one name character", .{});
|
||
},
|
||
.value => {
|
||
const text = ass.tokenText(tok);
|
||
const prefix = "Op";
|
||
const looks_like_opcode = text.len > prefix.len and
|
||
std.mem.startsWith(u8, text, prefix) and
|
||
std.ascii.isUpper(text[prefix.len]);
|
||
if (looks_like_opcode)
|
||
tok.tag = .opcode;
|
||
},
|
||
else => {},
|
||
}
|
||
|
||
return tok;
|
||
}
|