stage2: miscellaneous fixes for the branch

* Breaking language change: inline assembly must use string literal
   syntax. This is in preparation for inline assembly improvements that
   involve more integration with the Zig language. This means we cannot
   rely on text substitution.
 * Liveness: properly handle inline assembly and function calls with
   more than 3 operands.
   - More than 35 operands is not yet supported. This is a low priority
     to implement.
   - This required implementation in codegen.zig as well.
 * Liveness: fix bug causing incorrect tomb bits.
 * Sema: enable switch expressions that are evaluated at compile-time.
   - Runtime switch instructions still need to be reworked in this
     branch. There was a TODO left here (by me) with a suggestion to do
     some bigger changes as part of the AIR memory reworking. Now that
     time has come and I plan to honor the suggestion in a future commit
     before merging this branch.
 * AIR printing: fix missing ')' on alive instructions.

We're back to "hello world" working for the x86_64 backend.
This commit is contained in:
Andrew Kelley 2021-07-16 23:06:59 -07:00
parent 2438f61f1c
commit d17f492017
8 changed files with 428 additions and 219 deletions

View file

@ -505,8 +505,8 @@ const LinuxThreadImpl = struct {
/// Ported over from musl libc's pthread detached implementation: /// Ported over from musl libc's pthread detached implementation:
/// https://github.com/ifduyue/musl/search?q=__unmapself /// https://github.com/ifduyue/musl/search?q=__unmapself
fn freeAndExit(self: *ThreadCompletion) noreturn { fn freeAndExit(self: *ThreadCompletion) noreturn {
const unmap_and_exit: []const u8 = switch (target.cpu.arch) { switch (target.cpu.arch) {
.i386 => ( .i386 => asm volatile (
\\ movl $91, %%eax \\ movl $91, %%eax
\\ movl %[ptr], %%ebx \\ movl %[ptr], %%ebx
\\ movl %[len], %%ecx \\ movl %[len], %%ecx
@ -514,8 +514,12 @@ const LinuxThreadImpl = struct {
\\ movl $1, %%eax \\ movl $1, %%eax
\\ movl $0, %%ebx \\ movl $0, %%ebx
\\ int $128 \\ int $128
:
: [ptr] "r" (@ptrToInt(self.mapped.ptr)),
[len] "r" (self.mapped.len)
: "memory"
), ),
.x86_64 => ( .x86_64 => asm volatile (
\\ movq $11, %%rax \\ movq $11, %%rax
\\ movq %[ptr], %%rbx \\ movq %[ptr], %%rbx
\\ movq %[len], %%rcx \\ movq %[len], %%rcx
@ -523,8 +527,12 @@ const LinuxThreadImpl = struct {
\\ movq $60, %%rax \\ movq $60, %%rax
\\ movq $1, %%rdi \\ movq $1, %%rdi
\\ syscall \\ syscall
:
: [ptr] "r" (@ptrToInt(self.mapped.ptr)),
[len] "r" (self.mapped.len)
: "memory"
), ),
.arm, .armeb, .thumb, .thumbeb => ( .arm, .armeb, .thumb, .thumbeb => asm volatile (
\\ mov r7, #91 \\ mov r7, #91
\\ mov r0, %[ptr] \\ mov r0, %[ptr]
\\ mov r1, %[len] \\ mov r1, %[len]
@ -532,8 +540,12 @@ const LinuxThreadImpl = struct {
\\ mov r7, #1 \\ mov r7, #1
\\ mov r0, #0 \\ mov r0, #0
\\ svc 0 \\ svc 0
:
: [ptr] "r" (@ptrToInt(self.mapped.ptr)),
[len] "r" (self.mapped.len)
: "memory"
), ),
.aarch64, .aarch64_be, .aarch64_32 => ( .aarch64, .aarch64_be, .aarch64_32 => asm volatile (
\\ mov x8, #215 \\ mov x8, #215
\\ mov x0, %[ptr] \\ mov x0, %[ptr]
\\ mov x1, %[len] \\ mov x1, %[len]
@ -541,8 +553,12 @@ const LinuxThreadImpl = struct {
\\ mov x8, #93 \\ mov x8, #93
\\ mov x0, #0 \\ mov x0, #0
\\ svc 0 \\ svc 0
:
: [ptr] "r" (@ptrToInt(self.mapped.ptr)),
[len] "r" (self.mapped.len)
: "memory"
), ),
.mips, .mipsel => ( .mips, .mipsel => asm volatile (
\\ move $sp, $25 \\ move $sp, $25
\\ li $2, 4091 \\ li $2, 4091
\\ move $4, %[ptr] \\ move $4, %[ptr]
@ -551,8 +567,12 @@ const LinuxThreadImpl = struct {
\\ li $2, 4001 \\ li $2, 4001
\\ li $4, 0 \\ li $4, 0
\\ syscall \\ syscall
:
: [ptr] "r" (@ptrToInt(self.mapped.ptr)),
[len] "r" (self.mapped.len)
: "memory"
), ),
.mips64, .mips64el => ( .mips64, .mips64el => asm volatile (
\\ li $2, 4091 \\ li $2, 4091
\\ move $4, %[ptr] \\ move $4, %[ptr]
\\ move $5, %[len] \\ move $5, %[len]
@ -560,8 +580,12 @@ const LinuxThreadImpl = struct {
\\ li $2, 4001 \\ li $2, 4001
\\ li $4, 0 \\ li $4, 0
\\ syscall \\ syscall
:
: [ptr] "r" (@ptrToInt(self.mapped.ptr)),
[len] "r" (self.mapped.len)
: "memory"
), ),
.powerpc, .powerpcle, .powerpc64, .powerpc64le => ( .powerpc, .powerpcle, .powerpc64, .powerpc64le => asm volatile (
\\ li 0, 91 \\ li 0, 91
\\ mr %[ptr], 3 \\ mr %[ptr], 3
\\ mr %[len], 4 \\ mr %[len], 4
@ -570,8 +594,12 @@ const LinuxThreadImpl = struct {
\\ li 3, 0 \\ li 3, 0
\\ sc \\ sc
\\ blr \\ blr
:
: [ptr] "r" (@ptrToInt(self.mapped.ptr)),
[len] "r" (self.mapped.len)
: "memory"
), ),
.riscv64 => ( .riscv64 => asm volatile (
\\ li a7, 215 \\ li a7, 215
\\ mv a0, %[ptr] \\ mv a0, %[ptr]
\\ mv a1, %[len] \\ mv a1, %[len]
@ -579,19 +607,13 @@ const LinuxThreadImpl = struct {
\\ li a7, 93 \\ li a7, 93
\\ mv a0, zero \\ mv a0, zero
\\ ecall \\ ecall
),
else => |cpu_arch| {
@compileLog("Unsupported linux arch ", cpu_arch);
},
};
asm volatile (unmap_and_exit
: :
: [ptr] "r" (@ptrToInt(self.mapped.ptr)), : [ptr] "r" (@ptrToInt(self.mapped.ptr)),
[len] "r" (self.mapped.len) [len] "r" (self.mapped.len)
: "memory" : "memory"
); ),
else => |cpu_arch| @compileError("Unsupported linux arch: " ++ @tagName(cpu_arch)),
}
unreachable; unreachable;
} }
}; };

View file

@ -46,34 +46,38 @@ test "fence/compilerFence" {
/// Signals to the processor that the caller is inside a busy-wait spin-loop. /// Signals to the processor that the caller is inside a busy-wait spin-loop.
pub inline fn spinLoopHint() void { pub inline fn spinLoopHint() void {
const hint_instruction = switch (target.cpu.arch) { switch (target.cpu.arch) {
// No-op instruction that can hint to save (or share with a hardware-thread) pipelining/power resources // No-op instruction that can hint to save (or share with a hardware-thread)
// pipelining/power resources
// https://software.intel.com/content/www/us/en/develop/articles/benefitting-power-and-performance-sleep-loops.html // https://software.intel.com/content/www/us/en/develop/articles/benefitting-power-and-performance-sleep-loops.html
.i386, .x86_64 => "pause", .i386, .x86_64 => asm volatile ("pause" ::: "memory"),
// No-op instruction that serves as a hardware-thread resource yield hint. // No-op instruction that serves as a hardware-thread resource yield hint.
// https://stackoverflow.com/a/7588941 // https://stackoverflow.com/a/7588941
.powerpc64, .powerpc64le => "or 27, 27, 27", .powerpc64, .powerpc64le => asm volatile ("or 27, 27, 27" ::: "memory"),
// `isb` appears more reliable for releasing execution resources than `yield` on common aarch64 CPUs. // `isb` appears more reliable for releasing execution resources than `yield`
// on common aarch64 CPUs.
// https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8258604 // https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8258604
// https://bugs.mysql.com/bug.php?id=100664 // https://bugs.mysql.com/bug.php?id=100664
.aarch64, .aarch64_be, .aarch64_32 => "isb", .aarch64, .aarch64_be, .aarch64_32 => asm volatile ("isb" ::: "memory"),
// `yield` was introduced in v6k but is also available on v6m. // `yield` was introduced in v6k but is also available on v6m.
// https://www.keil.com/support/man/docs/armasm/armasm_dom1361289926796.htm // https://www.keil.com/support/man/docs/armasm/armasm_dom1361289926796.htm
.arm, .armeb, .thumb, .thumbeb => blk: { .arm, .armeb, .thumb, .thumbeb => {
const can_yield = comptime std.Target.arm.featureSetHasAny(target.cpu.features, .{ .has_v6k, .has_v6m }); const can_yield = comptime std.Target.arm.featureSetHasAny(target.cpu.features, .{
const instruction = if (can_yield) "yield" else ""; .has_v6k, .has_v6m,
break :blk instruction; });
if (can_yield) {
asm volatile ("yield" ::: "memory");
} else {
asm volatile ("" ::: "memory");
}
}, },
else => "",
};
// Memory barrier to prevent the compiler from optimizing away the spin-loop // Memory barrier to prevent the compiler from optimizing away the spin-loop
// even if no hint_instruction was provided. // even if no hint_instruction was provided.
asm volatile (hint_instruction ::: "memory"); else => asm volatile ("" ::: "memory"),
}
} }
test "spinLoopHint" { test "spinLoopHint" {

View file

@ -178,26 +178,78 @@ pub fn Atomic(comptime T: type) type {
) u1 { ) u1 {
// x86 supports dedicated bitwise instructions // x86 supports dedicated bitwise instructions
if (comptime target.cpu.arch.isX86() and @sizeOf(T) >= 2 and @sizeOf(T) <= 8) { if (comptime target.cpu.arch.isX86() and @sizeOf(T) >= 2 and @sizeOf(T) <= 8) {
const instruction = switch (op) { const old_bit: u8 = switch (@sizeOf(T)) {
.Set => "lock bts", 2 => switch (op) {
.Reset => "lock btr", .Set => asm volatile ("lock btsw %[bit], %[ptr]"
.Toggle => "lock btc", // LLVM doesn't support u1 flag register return values
}; : [result] "={@ccc}" (-> u8)
const suffix = switch (@sizeOf(T)) {
2 => "w",
4 => "l",
8 => "q",
else => @compileError("Invalid atomic type " ++ @typeName(T)),
};
const old_bit = asm volatile (instruction ++ suffix ++ " %[bit], %[ptr]"
: [result] "={@ccc}" (-> u8) // LLVM doesn't support u1 flag register return values
: [ptr] "*p" (&self.value), : [ptr] "*p" (&self.value),
[bit] "X" (@as(T, bit)) [bit] "X" (@as(T, bit))
: "cc", "memory" : "cc", "memory"
); ),
.Reset => asm volatile ("lock btrw %[bit], %[ptr]"
// LLVM doesn't support u1 flag register return values
: [result] "={@ccc}" (-> u8)
: [ptr] "*p" (&self.value),
[bit] "X" (@as(T, bit))
: "cc", "memory"
),
.Toggle => asm volatile ("lock btcw %[bit], %[ptr]"
// LLVM doesn't support u1 flag register return values
: [result] "={@ccc}" (-> u8)
: [ptr] "*p" (&self.value),
[bit] "X" (@as(T, bit))
: "cc", "memory"
),
},
4 => switch (op) {
.Set => asm volatile ("lock btsl %[bit], %[ptr]"
// LLVM doesn't support u1 flag register return values
: [result] "={@ccc}" (-> u8)
: [ptr] "*p" (&self.value),
[bit] "X" (@as(T, bit))
: "cc", "memory"
),
.Reset => asm volatile ("lock btrl %[bit], %[ptr]"
// LLVM doesn't support u1 flag register return values
: [result] "={@ccc}" (-> u8)
: [ptr] "*p" (&self.value),
[bit] "X" (@as(T, bit))
: "cc", "memory"
),
.Toggle => asm volatile ("lock btcl %[bit], %[ptr]"
// LLVM doesn't support u1 flag register return values
: [result] "={@ccc}" (-> u8)
: [ptr] "*p" (&self.value),
[bit] "X" (@as(T, bit))
: "cc", "memory"
),
},
8 => switch (op) {
.Set => asm volatile ("lock btsq %[bit], %[ptr]"
// LLVM doesn't support u1 flag register return values
: [result] "={@ccc}" (-> u8)
: [ptr] "*p" (&self.value),
[bit] "X" (@as(T, bit))
: "cc", "memory"
),
.Reset => asm volatile ("lock btrq %[bit], %[ptr]"
// LLVM doesn't support u1 flag register return values
: [result] "={@ccc}" (-> u8)
: [ptr] "*p" (&self.value),
[bit] "X" (@as(T, bit))
: "cc", "memory"
),
.Toggle => asm volatile ("lock btcq %[bit], %[ptr]"
// LLVM doesn't support u1 flag register return values
: [result] "={@ccc}" (-> u8)
: [ptr] "*p" (&self.value),
[bit] "X" (@as(T, bit))
: "cc", "memory"
),
},
else => @compileError("Invalid atomic type " ++ @typeName(T)),
};
return @intCast(u1, old_bit); return @intCast(u1, old_bit);
} }

View file

@ -6601,7 +6601,7 @@ fn asmExpr(
const asm_source = switch (node_tags[full.ast.template]) { const asm_source = switch (node_tags[full.ast.template]) {
.string_literal => try astgen.strLitAsString(main_tokens[full.ast.template]), .string_literal => try astgen.strLitAsString(main_tokens[full.ast.template]),
.multiline_string_literal => try astgen.strLitNodeAsString(full.ast.template), .multiline_string_literal => try astgen.strLitNodeAsString(full.ast.template),
else => return astgen.failNode(node, "assembly code must use string literal syntax", .{}), else => return astgen.failNode(full.ast.template, "assembly code must use string literal syntax", .{}),
}; };
// See https://github.com/ziglang/zig/issues/215 and related issues discussing // See https://github.com/ziglang/zig/issues/215 and related issues discussing

View file

@ -24,6 +24,11 @@ const Log2Int = std.math.Log2Int;
tomb_bits: []usize, tomb_bits: []usize,
/// Sparse table of specially handled instructions. The value is an index into the `extra` /// Sparse table of specially handled instructions. The value is an index into the `extra`
/// array. The meaning of the data depends on the AIR tag. /// array. The meaning of the data depends on the AIR tag.
/// * `cond_br` - points to a `CondBr` in `extra` at this index.
/// * `switch_br` - points to a `SwitchBr` in `extra` at this index.
/// * `asm`, `call` - the value is a set of bits which are the extra tomb bits of operands.
/// The main tomb bits are still used and the extra ones are starting with the lsb of the
/// value here.
special: std.AutoHashMapUnmanaged(Air.Inst.Index, u32), special: std.AutoHashMapUnmanaged(Air.Inst.Index, u32),
/// Auxilliary data. The way this data is interpreted is determined contextually. /// Auxilliary data. The way this data is interpreted is determined contextually.
extra: []const u32, extra: []const u32,
@ -67,6 +72,8 @@ pub fn analyze(gpa: *Allocator, air: Air, zir: Zir) Allocator.Error!Liveness {
defer a.extra.deinit(gpa); defer a.extra.deinit(gpa);
defer a.table.deinit(gpa); defer a.table.deinit(gpa);
std.mem.set(usize, a.tomb_bits, 0);
const main_body = air.getMainBody(); const main_body = air.getMainBody();
try a.table.ensureTotalCapacity(gpa, @intCast(u32, main_body.len)); try a.table.ensureTotalCapacity(gpa, @intCast(u32, main_body.len));
try analyzeWithContext(&a, null, main_body); try analyzeWithContext(&a, null, main_body);
@ -103,7 +110,7 @@ pub fn clearOperandDeath(l: Liveness, inst: Air.Inst.Index, operand: OperandInt)
const usize_index = (inst * bpi) / @bitSizeOf(usize); const usize_index = (inst * bpi) / @bitSizeOf(usize);
const mask = @as(usize, 1) << const mask = @as(usize, 1) <<
@intCast(Log2Int(usize), (inst % (@bitSizeOf(usize) / bpi)) * bpi + operand); @intCast(Log2Int(usize), (inst % (@bitSizeOf(usize) / bpi)) * bpi + operand);
l.tomb_bits[usize_index] |= mask; l.tomb_bits[usize_index] &= ~mask;
} }
/// Higher level API. /// Higher level API.
@ -298,7 +305,17 @@ fn analyzeInst(
std.mem.copy(Air.Inst.Ref, buf[1..], args); std.mem.copy(Air.Inst.Ref, buf[1..], args);
return trackOperands(a, new_set, inst, main_tomb, buf); return trackOperands(a, new_set, inst, main_tomb, buf);
} }
@panic("TODO: liveness analysis for function call with greater than 2 args"); var extra_tombs: ExtraTombs = .{
.analysis = a,
.new_set = new_set,
.inst = inst,
.main_tomb = main_tomb,
};
try extra_tombs.feed(callee);
for (args) |arg| {
try extra_tombs.feed(arg);
}
return extra_tombs.finish();
}, },
.struct_field_ptr => { .struct_field_ptr => {
const extra = a.air.extraData(Air.StructField, inst_datas[inst].ty_pl.payload).data; const extra = a.air.extraData(Air.StructField, inst_datas[inst].ty_pl.payload).data;
@ -317,7 +334,19 @@ fn analyzeInst(
std.mem.copy(Air.Inst.Ref, buf[outputs.len..], args); std.mem.copy(Air.Inst.Ref, buf[outputs.len..], args);
return trackOperands(a, new_set, inst, main_tomb, buf); return trackOperands(a, new_set, inst, main_tomb, buf);
} }
@panic("TODO: liveness analysis for asm with greater than 3 args"); var extra_tombs: ExtraTombs = .{
.analysis = a,
.new_set = new_set,
.inst = inst,
.main_tomb = main_tomb,
};
for (outputs) |output| {
try extra_tombs.feed(output);
}
for (args) |arg| {
try extra_tombs.feed(arg);
}
return extra_tombs.finish();
}, },
.block => { .block => {
const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload); const extra = a.air.extraData(Air.Block, inst_datas[inst].ty_pl.payload);
@ -531,3 +560,40 @@ fn trackOperands(
} }
a.storeTombBits(inst, tomb_bits); a.storeTombBits(inst, tomb_bits);
} }
const ExtraTombs = struct {
analysis: *Analysis,
new_set: ?*std.AutoHashMapUnmanaged(Air.Inst.Index, void),
inst: Air.Inst.Index,
main_tomb: bool,
bit_index: usize = 0,
tomb_bits: Bpi = 0,
big_tomb_bits: u32 = 0,
fn feed(et: *ExtraTombs, op_ref: Air.Inst.Ref) !void {
const this_bit_index = et.bit_index;
assert(this_bit_index < 32); // TODO mechanism for when there are greater than 32 operands
et.bit_index += 1;
const gpa = et.analysis.gpa;
const op_int = @enumToInt(op_ref);
if (op_int < Air.Inst.Ref.typed_value_map.len) return;
const op_index: Air.Inst.Index = op_int - @intCast(u32, Air.Inst.Ref.typed_value_map.len);
const prev = try et.analysis.table.fetchPut(gpa, op_index, {});
if (prev == null) {
// Death.
if (et.new_set) |ns| try ns.putNoClobber(gpa, op_index, {});
if (this_bit_index < bpi - 1) {
et.tomb_bits |= @as(Bpi, 1) << @intCast(OperandInt, this_bit_index);
} else {
const big_bit_index = this_bit_index - (bpi - 1);
et.big_tomb_bits |= @as(u32, 1) << @intCast(u5, big_bit_index);
}
}
}
fn finish(et: *ExtraTombs) !void {
et.tomb_bits |= @as(Bpi, @boolToInt(et.main_tomb)) << (bpi - 1);
et.analysis.storeTombBits(et.inst, et.tomb_bits);
try et.analysis.special.put(et.analysis.gpa, et.inst, et.big_tomb_bits);
}
};

View file

@ -258,24 +258,24 @@ pub fn analyzeBody(
.slice_sentinel => try sema.zirSliceSentinel(block, inst), .slice_sentinel => try sema.zirSliceSentinel(block, inst),
.slice_start => try sema.zirSliceStart(block, inst), .slice_start => try sema.zirSliceStart(block, inst),
.str => try sema.zirStr(block, inst), .str => try sema.zirStr(block, inst),
//.switch_block => try sema.zirSwitchBlock(block, inst, false, .none), .switch_block => try sema.zirSwitchBlock(block, inst, false, .none),
//.switch_block_multi => try sema.zirSwitchBlockMulti(block, inst, false, .none), .switch_block_multi => try sema.zirSwitchBlockMulti(block, inst, false, .none),
//.switch_block_else => try sema.zirSwitchBlock(block, inst, false, .@"else"), .switch_block_else => try sema.zirSwitchBlock(block, inst, false, .@"else"),
//.switch_block_else_multi => try sema.zirSwitchBlockMulti(block, inst, false, .@"else"), .switch_block_else_multi => try sema.zirSwitchBlockMulti(block, inst, false, .@"else"),
//.switch_block_under => try sema.zirSwitchBlock(block, inst, false, .under), .switch_block_under => try sema.zirSwitchBlock(block, inst, false, .under),
//.switch_block_under_multi => try sema.zirSwitchBlockMulti(block, inst, false, .under), .switch_block_under_multi => try sema.zirSwitchBlockMulti(block, inst, false, .under),
//.switch_block_ref => try sema.zirSwitchBlock(block, inst, true, .none), .switch_block_ref => try sema.zirSwitchBlock(block, inst, true, .none),
//.switch_block_ref_multi => try sema.zirSwitchBlockMulti(block, inst, true, .none), .switch_block_ref_multi => try sema.zirSwitchBlockMulti(block, inst, true, .none),
//.switch_block_ref_else => try sema.zirSwitchBlock(block, inst, true, .@"else"), .switch_block_ref_else => try sema.zirSwitchBlock(block, inst, true, .@"else"),
//.switch_block_ref_else_multi => try sema.zirSwitchBlockMulti(block, inst, true, .@"else"), .switch_block_ref_else_multi => try sema.zirSwitchBlockMulti(block, inst, true, .@"else"),
//.switch_block_ref_under => try sema.zirSwitchBlock(block, inst, true, .under), .switch_block_ref_under => try sema.zirSwitchBlock(block, inst, true, .under),
//.switch_block_ref_under_multi => try sema.zirSwitchBlockMulti(block, inst, true, .under), .switch_block_ref_under_multi => try sema.zirSwitchBlockMulti(block, inst, true, .under),
//.switch_capture => try sema.zirSwitchCapture(block, inst, false, false), .switch_capture => try sema.zirSwitchCapture(block, inst, false, false),
//.switch_capture_ref => try sema.zirSwitchCapture(block, inst, false, true), .switch_capture_ref => try sema.zirSwitchCapture(block, inst, false, true),
//.switch_capture_multi => try sema.zirSwitchCapture(block, inst, true, false), .switch_capture_multi => try sema.zirSwitchCapture(block, inst, true, false),
//.switch_capture_multi_ref => try sema.zirSwitchCapture(block, inst, true, true), .switch_capture_multi_ref => try sema.zirSwitchCapture(block, inst, true, true),
//.switch_capture_else => try sema.zirSwitchCaptureElse(block, inst, false), .switch_capture_else => try sema.zirSwitchCaptureElse(block, inst, false),
//.switch_capture_else_ref => try sema.zirSwitchCaptureElse(block, inst, true), .switch_capture_else_ref => try sema.zirSwitchCaptureElse(block, inst, true),
.type_info => try sema.zirTypeInfo(block, inst), .type_info => try sema.zirTypeInfo(block, inst),
.size_of => try sema.zirSizeOf(block, inst), .size_of => try sema.zirSizeOf(block, inst),
.bit_size_of => try sema.zirBitSizeOf(block, inst), .bit_size_of => try sema.zirBitSizeOf(block, inst),
@ -534,7 +534,6 @@ pub fn analyzeBody(
return break_inst; return break_inst;
} }
}, },
else => |t| @panic(@tagName(t)),
}; };
if (sema.typeOf(air_inst).isNoReturn()) if (sema.typeOf(air_inst).isNoReturn())
return always_noreturn; return always_noreturn;
@ -4110,8 +4109,8 @@ fn analyzeSwitch(
const body = sema.code.extra[extra_index..][0..body_len]; const body = sema.code.extra[extra_index..][0..body_len];
extra_index += body_len; extra_index += body_len;
const item = sema.resolveInst(item_ref);
// Validation above ensured these will succeed. // Validation above ensured these will succeed.
const item = sema.resolveInst(item_ref) catch unreachable;
const item_val = sema.resolveConstValue(&child_block, .unneeded, item) catch unreachable; const item_val = sema.resolveConstValue(&child_block, .unneeded, item) catch unreachable;
if (operand_val.eql(item_val)) { if (operand_val.eql(item_val)) {
return sema.resolveBlockBody(block, src, &child_block, body, merges); return sema.resolveBlockBody(block, src, &child_block, body, merges);
@ -4132,9 +4131,9 @@ fn analyzeSwitch(
const body = sema.code.extra[extra_index + 2 * ranges_len ..][0..body_len]; const body = sema.code.extra[extra_index + 2 * ranges_len ..][0..body_len];
for (items) |item_ref| { for (items) |item_ref| {
const item = sema.resolveInst(item_ref);
// Validation above ensured these will succeed. // Validation above ensured these will succeed.
const item = sema.resolveInst(item_ref) catch unreachable; const item_val = sema.resolveConstValue(&child_block, .unneeded, item) catch unreachable;
const item_val = sema.resolveConstValue(&child_block, item.src, item) catch unreachable;
if (operand_val.eql(item_val)) { if (operand_val.eql(item_val)) {
return sema.resolveBlockBody(block, src, &child_block, body, merges); return sema.resolveBlockBody(block, src, &child_block, body, merges);
} }
@ -4171,156 +4170,157 @@ fn analyzeSwitch(
// TODO when reworking AIR memory layout make multi cases get generated as cases, // TODO when reworking AIR memory layout make multi cases get generated as cases,
// not as part of the "else" block. // not as part of the "else" block.
const cases = try sema.arena.alloc(Inst.SwitchBr.Case, scalar_cases_len); return mod.fail(&block.base, src, "TODO rework runtime switch Sema", .{});
//const cases = try sema.arena.alloc(Inst.SwitchBr.Case, scalar_cases_len);
var case_block = child_block.makeSubBlock(); //var case_block = child_block.makeSubBlock();
case_block.runtime_loop = null; //case_block.runtime_loop = null;
case_block.runtime_cond = operand.src; //case_block.runtime_cond = operand.src;
case_block.runtime_index += 1; //case_block.runtime_index += 1;
defer case_block.instructions.deinit(gpa); //defer case_block.instructions.deinit(gpa);
var extra_index: usize = special.end; //var extra_index: usize = special.end;
var scalar_i: usize = 0; //var scalar_i: usize = 0;
while (scalar_i < scalar_cases_len) : (scalar_i += 1) { //while (scalar_i < scalar_cases_len) : (scalar_i += 1) {
const item_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]); // const item_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]);
extra_index += 1; // extra_index += 1;
const body_len = sema.code.extra[extra_index]; // const body_len = sema.code.extra[extra_index];
extra_index += 1; // extra_index += 1;
const body = sema.code.extra[extra_index..][0..body_len]; // const body = sema.code.extra[extra_index..][0..body_len];
extra_index += body_len; // extra_index += body_len;
case_block.instructions.shrinkRetainingCapacity(0); // case_block.instructions.shrinkRetainingCapacity(0);
// We validate these above; these two calls are guaranteed to succeed. // const item = sema.resolveInst(item_ref);
const item = sema.resolveInst(item_ref) catch unreachable; // // We validate these above; these two calls are guaranteed to succeed.
const item_val = sema.resolveConstValue(&case_block, .unneeded, item) catch unreachable; // const item_val = sema.resolveConstValue(&case_block, .unneeded, item) catch unreachable;
_ = try sema.analyzeBody(&case_block, body); // _ = try sema.analyzeBody(&case_block, body);
cases[scalar_i] = .{ // cases[scalar_i] = .{
.item = item_val, // .item = item_val,
.body = .{ .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items) }, // .body = .{ .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items) },
}; // };
} //}
var first_else_body: Body = undefined; //var first_else_body: Body = undefined;
var prev_condbr: ?*Inst.CondBr = null; //var prev_condbr: ?*Inst.CondBr = null;
var multi_i: usize = 0; //var multi_i: usize = 0;
while (multi_i < multi_cases_len) : (multi_i += 1) { //while (multi_i < multi_cases_len) : (multi_i += 1) {
const items_len = sema.code.extra[extra_index]; // const items_len = sema.code.extra[extra_index];
extra_index += 1; // extra_index += 1;
const ranges_len = sema.code.extra[extra_index]; // const ranges_len = sema.code.extra[extra_index];
extra_index += 1; // extra_index += 1;
const body_len = sema.code.extra[extra_index]; // const body_len = sema.code.extra[extra_index];
extra_index += 1; // extra_index += 1;
const items = sema.code.refSlice(extra_index, items_len); // const items = sema.code.refSlice(extra_index, items_len);
extra_index += items_len; // extra_index += items_len;
case_block.instructions.shrinkRetainingCapacity(0); // case_block.instructions.shrinkRetainingCapacity(0);
var any_ok: ?Air.Inst.Index = null; // var any_ok: ?Air.Inst.Index = null;
for (items) |item_ref| { // for (items) |item_ref| {
const item = sema.resolveInst(item_ref); // const item = sema.resolveInst(item_ref);
_ = try sema.resolveConstValue(&child_block, item.src, item); // _ = try sema.resolveConstValue(&child_block, item.src, item);
const cmp_ok = try case_block.addBinOp(.cmp_eq, operand, item); // const cmp_ok = try case_block.addBinOp(.cmp_eq, operand, item);
if (any_ok) |some| { // if (any_ok) |some| {
any_ok = try case_block.addBinOp(.bool_or, some, cmp_ok); // any_ok = try case_block.addBinOp(.bool_or, some, cmp_ok);
} else { // } else {
any_ok = cmp_ok; // any_ok = cmp_ok;
} // }
} // }
var range_i: usize = 0; // var range_i: usize = 0;
while (range_i < ranges_len) : (range_i += 1) { // while (range_i < ranges_len) : (range_i += 1) {
const first_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]); // const first_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]);
extra_index += 1; // extra_index += 1;
const last_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]); // const last_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]);
extra_index += 1; // extra_index += 1;
const item_first = sema.resolveInst(first_ref); // const item_first = sema.resolveInst(first_ref);
const item_last = sema.resolveInst(last_ref); // const item_last = sema.resolveInst(last_ref);
_ = try sema.resolveConstValue(&child_block, item_first.src, item_first); // _ = try sema.resolveConstValue(&child_block, item_first.src, item_first);
_ = try sema.resolveConstValue(&child_block, item_last.src, item_last); // _ = try sema.resolveConstValue(&child_block, item_last.src, item_last);
// operand >= first and operand <= last // // operand >= first and operand <= last
const range_first_ok = try case_block.addBinOp( // const range_first_ok = try case_block.addBinOp(
.cmp_gte, // .cmp_gte,
operand, // operand,
item_first, // item_first,
); // );
const range_last_ok = try case_block.addBinOp( // const range_last_ok = try case_block.addBinOp(
.cmp_lte, // .cmp_lte,
operand, // operand,
item_last, // item_last,
); // );
const range_ok = try case_block.addBinOp( // const range_ok = try case_block.addBinOp(
.bool_and, // .bool_and,
range_first_ok, // range_first_ok,
range_last_ok, // range_last_ok,
); // );
if (any_ok) |some| { // if (any_ok) |some| {
any_ok = try case_block.addBinOp(.bool_or, some, range_ok); // any_ok = try case_block.addBinOp(.bool_or, some, range_ok);
} else { // } else {
any_ok = range_ok; // any_ok = range_ok;
} // }
} // }
const new_condbr = try sema.arena.create(Inst.CondBr); // const new_condbr = try sema.arena.create(Inst.CondBr);
new_condbr.* = .{ // new_condbr.* = .{
.base = .{ // .base = .{
.tag = .condbr, // .tag = .condbr,
.ty = Type.initTag(.noreturn), // .ty = Type.initTag(.noreturn),
.src = src, // .src = src,
}, // },
.condition = any_ok.?, // .condition = any_ok.?,
.then_body = undefined, // .then_body = undefined,
.else_body = undefined, // .else_body = undefined,
}; // };
try case_block.instructions.append(gpa, &new_condbr.base); // try case_block.instructions.append(gpa, &new_condbr.base);
const cond_body: Body = .{ // const cond_body: Body = .{
.instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items), // .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items),
}; // };
case_block.instructions.shrinkRetainingCapacity(0); // case_block.instructions.shrinkRetainingCapacity(0);
const body = sema.code.extra[extra_index..][0..body_len]; // const body = sema.code.extra[extra_index..][0..body_len];
extra_index += body_len; // extra_index += body_len;
_ = try sema.analyzeBody(&case_block, body); // _ = try sema.analyzeBody(&case_block, body);
new_condbr.then_body = .{ // new_condbr.then_body = .{
.instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items), // .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items),
}; // };
if (prev_condbr) |condbr| { // if (prev_condbr) |condbr| {
condbr.else_body = cond_body; // condbr.else_body = cond_body;
} else { // } else {
first_else_body = cond_body; // first_else_body = cond_body;
} // }
prev_condbr = new_condbr; // prev_condbr = new_condbr;
} //}
const final_else_body: Body = blk: { //const final_else_body: Body = blk: {
if (special.body.len != 0) { // if (special.body.len != 0) {
case_block.instructions.shrinkRetainingCapacity(0); // case_block.instructions.shrinkRetainingCapacity(0);
_ = try sema.analyzeBody(&case_block, special.body); // _ = try sema.analyzeBody(&case_block, special.body);
const else_body: Body = .{ // const else_body: Body = .{
.instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items), // .instructions = try sema.arena.dupe(Air.Inst.Index, case_block.instructions.items),
}; // };
if (prev_condbr) |condbr| { // if (prev_condbr) |condbr| {
condbr.else_body = else_body; // condbr.else_body = else_body;
break :blk first_else_body; // break :blk first_else_body;
} else { // } else {
break :blk else_body; // break :blk else_body;
} // }
} else { // } else {
break :blk .{ .instructions = &.{} }; // break :blk .{ .instructions = &.{} };
} // }
}; //};
_ = try child_block.addSwitchBr(src, operand, cases, final_else_body); //_ = try child_block.addSwitchBr(src, operand, cases, final_else_body);
return sema.analyzeBlockBody(block, src, &child_block, merges); //return sema.analyzeBlockBody(block, src, &child_block, merges);
} }
fn resolveSwitchItemVal( fn resolveSwitchItemVal(
@ -4332,16 +4332,17 @@ fn resolveSwitchItemVal(
range_expand: Module.SwitchProngSrc.RangeExpand, range_expand: Module.SwitchProngSrc.RangeExpand,
) CompileError!TypedValue { ) CompileError!TypedValue {
const item = sema.resolveInst(item_ref); const item = sema.resolveInst(item_ref);
const item_ty = sema.typeOf(item);
// Constructing a LazySrcLoc is costly because we only have the switch AST node. // Constructing a LazySrcLoc is costly because we only have the switch AST node.
// Only if we know for sure we need to report a compile error do we resolve the // Only if we know for sure we need to report a compile error do we resolve the
// full source locations. // full source locations.
if (sema.resolveConstValue(block, .unneeded, item)) |val| { if (sema.resolveConstValue(block, .unneeded, item)) |val| {
return TypedValue{ .ty = item.ty, .val = val }; return TypedValue{ .ty = item_ty, .val = val };
} else |err| switch (err) { } else |err| switch (err) {
error.NeededSourceLocation => { error.NeededSourceLocation => {
const src = switch_prong_src.resolve(sema.gpa, block.src_decl, switch_node_offset, range_expand); const src = switch_prong_src.resolve(sema.gpa, block.src_decl, switch_node_offset, range_expand);
return TypedValue{ return TypedValue{
.ty = item.ty, .ty = item_ty,
.val = try sema.resolveConstValue(block, src, item), .val = try sema.resolveConstValue(block, src, item),
}; };
}, },

View file

@ -452,6 +452,43 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}, },
}; };
const BigTomb = struct {
function: *Self,
inst: Air.Inst.Index,
tomb_bits: Liveness.Bpi,
big_tomb_bits: u32,
bit_index: usize,
fn feed(bt: *BigTomb, op_ref: Air.Inst.Ref) void {
const this_bit_index = bt.bit_index;
bt.bit_index += 1;
const op_int = @enumToInt(op_ref);
if (op_int < Air.Inst.Ref.typed_value_map.len) return;
const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len);
if (this_bit_index < Liveness.bpi - 1) {
const dies = @truncate(u1, bt.tomb_bits >> @intCast(Liveness.OperandInt, this_bit_index)) != 0;
if (!dies) return;
} else {
const big_bit_index = @intCast(u5, this_bit_index - (Liveness.bpi - 1));
const dies = @truncate(u1, bt.big_tomb_bits >> big_bit_index) != 0;
if (!dies) return;
}
bt.function.processDeath(op_index);
}
fn finishAir(bt: *BigTomb, result: MCValue) void {
const is_used = !bt.function.liveness.isUnused(bt.inst);
if (is_used) {
log.debug("{} => {}", .{ bt.inst, result });
const branch = &bt.function.branch_stack.items[bt.function.branch_stack.items.len - 1];
branch.inst_table.putAssumeCapacityNoClobber(bt.inst, result);
}
bt.function.finishAirBookkeeping();
}
};
const Self = @This(); const Self = @This();
fn generate( fn generate(
@ -921,8 +958,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
if (!dies) continue; if (!dies) continue;
const op_int = @enumToInt(op); const op_int = @enumToInt(op);
if (op_int < Air.Inst.Ref.typed_value_map.len) continue; if (op_int < Air.Inst.Ref.typed_value_map.len) continue;
const operand: Air.Inst.Index = op_int - @intCast(u32, Air.Inst.Ref.typed_value_map.len); const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len);
self.processDeath(operand); self.processDeath(op_index);
} }
const is_used = @truncate(u1, tomb_bits) == 0; const is_used = @truncate(u1, tomb_bits) == 0;
if (is_used) { if (is_used) {
@ -2739,7 +2776,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
std.mem.copy(Air.Inst.Ref, buf[1..], args); std.mem.copy(Air.Inst.Ref, buf[1..], args);
return self.finishAir(inst, result, buf); return self.finishAir(inst, result, buf);
} }
@panic("TODO: codegen for function call with greater than 2 args"); var bt = try self.iterateBigTomb(inst, 1 + args.len);
bt.feed(callee);
for (args) |arg| {
bt.feed(arg);
}
return bt.finishAir(result);
} }
fn airRef(self: *Self, inst: Air.Inst.Index) !void { fn airRef(self: *Self, inst: Air.Inst.Index) !void {
@ -3651,7 +3693,25 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
std.mem.copy(Air.Inst.Ref, buf[outputs.len..], args); std.mem.copy(Air.Inst.Ref, buf[outputs.len..], args);
return self.finishAir(inst, result, buf); return self.finishAir(inst, result, buf);
} }
@panic("TODO: codegen for asm with greater than 3 args"); var bt = try self.iterateBigTomb(inst, outputs.len + args.len);
for (outputs) |output| {
bt.feed(output);
}
for (args) |arg| {
bt.feed(arg);
}
return bt.finishAir(result);
}
fn iterateBigTomb(self: *Self, inst: Air.Inst.Index, operand_count: usize) !BigTomb {
try self.ensureProcessDeathCapacity(operand_count + 1);
return BigTomb{
.function = self,
.inst = inst,
.tomb_bits = self.liveness.getTombBits(inst),
.big_tomb_bits = self.liveness.special.get(inst) orelse 0,
.bit_index = 0,
};
} }
/// Sets the value without any modifications to register allocation metadata or stack allocation metadata. /// Sets the value without any modifications to register allocation metadata or stack allocation metadata.
@ -4492,7 +4552,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// First section of indexes correspond to a set number of constant values. // First section of indexes correspond to a set number of constant values.
const ref_int = @enumToInt(inst); const ref_int = @enumToInt(inst);
if (ref_int < Air.Inst.Ref.typed_value_map.len) { if (ref_int < Air.Inst.Ref.typed_value_map.len) {
return self.genTypedValue(Air.Inst.Ref.typed_value_map[ref_int]); const tv = Air.Inst.Ref.typed_value_map[ref_int];
if (!tv.ty.hasCodeGenBits()) {
return MCValue{ .none = {} };
}
return self.genTypedValue(tv);
} }
// If the type has no codegen bits, no need to store it. // If the type has no codegen bits, no need to store it.

View file

@ -89,7 +89,7 @@ const Writer = struct {
if (w.liveness.isUnused(inst)) { if (w.liveness.isUnused(inst)) {
try s.writeAll(") unused\n"); try s.writeAll(") unused\n");
} else { } else {
try s.writeAll("\n"); try s.writeAll(")\n");
} }
} }
} }