From ac1a975f9b5a7d939663fa90556a2f038250c531 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 28 Dec 2024 22:51:41 -0500 Subject: [PATCH] x86_64: implement clz and not --- lib/std/Target/Query.zig | 8 +- lib/std/Target/x86.zig | 65 + lib/std/math/big/int.zig | 13 +- lib/std/zig/system/x86.zig | 2 + src/arch/x86_64/CodeGen.zig | 6366 +++++++++++++++++++++++++------- src/arch/x86_64/Encoding.zig | 17 +- src/arch/x86_64/Lower.zig | 30 +- src/arch/x86_64/Mir.zig | 34 +- src/arch/x86_64/bits.zig | 12 +- src/arch/x86_64/encodings.zig | 238 +- src/dev.zig | 1 + src/link/Elf/Atom.zig | 37 +- src/link/MachO/Atom.zig | 12 +- test/behavior/math.zig | 9 +- test/behavior/vector.zig | 2 +- test/behavior/x86_64.zig | 3 +- test/behavior/x86_64/build.zig | 114 + test/behavior/x86_64/math.zig | 763 ++++ tools/update_cpu_features.zig | 138 +- 19 files changed, 6440 insertions(+), 1424 deletions(-) create mode 100644 test/behavior/x86_64/build.zig diff --git a/lib/std/Target/Query.zig b/lib/std/Target/Query.zig index 50db1fed5e..56387c27b3 100644 --- a/lib/std/Target/Query.zig +++ b/lib/std/Target/Query.zig @@ -6,13 +6,13 @@ /// `null` means native. cpu_arch: ?Target.Cpu.Arch = null, -cpu_model: CpuModel = CpuModel.determined_by_arch_os, +cpu_model: CpuModel = .determined_by_arch_os, /// Sparse set of CPU features to add to the set from `cpu_model`. -cpu_features_add: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty, +cpu_features_add: Target.Cpu.Feature.Set = .empty, /// Sparse set of CPU features to remove from the set from `cpu_model`. -cpu_features_sub: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty, +cpu_features_sub: Target.Cpu.Feature.Set = .empty, /// `null` means native. os_tag: ?Target.Os.Tag = null, @@ -38,7 +38,7 @@ abi: ?Target.Abi = null, /// When `os_tag` is `null`, then `null` means native. Otherwise it means the standard path /// based on the `os_tag`. -dynamic_linker: Target.DynamicLinker = Target.DynamicLinker.none, +dynamic_linker: Target.DynamicLinker = .none, /// `null` means default for the cpu/arch/os combo. ofmt: ?Target.ObjectFormat = null, diff --git a/lib/std/Target/x86.zig b/lib/std/Target/x86.zig index 86c334afda..76dddb222d 100644 --- a/lib/std/Target/x86.zig +++ b/lib/std/Target/x86.zig @@ -47,6 +47,7 @@ pub const Feature = enum { bmi2, branch_hint, branchfusion, + bsf_bsr_0_clobbers_result, ccmp, cf, cldemote, @@ -167,6 +168,8 @@ pub const Feature = enum { slow_unaligned_mem_32, sm3, sm4, + smap, + smep, soft_float, sse, sse2, @@ -497,6 +500,11 @@ pub const all_features = blk: { .description = "CMP/TEST can be fused with conditional branches", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.bsf_bsr_0_clobbers_result)] = .{ + .llvm_name = null, + .description = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.ccmp)] = .{ .llvm_name = "ccmp", .description = "Support conditional cmp & test instructions", @@ -1127,6 +1135,16 @@ pub const all_features = blk: { .avx2, }), }; + result[@intFromEnum(Feature.smap)] = .{ + .llvm_name = null, + .description = "Enable Supervisor Mode Access Prevention", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.smep)] = .{ + .llvm_name = null, + .description = "Enable Supervisor Mode Execution Prevention", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.soft_float)] = .{ .llvm_name = "soft-float", .description = "Use software floating point features", @@ -1371,6 +1389,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -1467,6 +1487,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .uintr, .vaes, @@ -1545,6 +1567,8 @@ pub const cpu = struct { .slow_3ops_lea, .sm3, .sm4, + .smap, + .smep, .tuning_fast_imm_vector_shift, .uintr, .vaes, @@ -1783,6 +1807,8 @@ pub const cpu = struct { .sahf, .sbb_dep_breaking, .slow_shld, + .smap, + .smep, .sse4a, .vzeroupper, .x87, @@ -1995,6 +2021,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .vzeroupper, .x87, .xsaveopt, @@ -2136,6 +2164,8 @@ pub const cpu = struct { .sahf, .sha, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vzeroupper, .x87, @@ -2195,6 +2225,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vzeroupper, .x87, @@ -2450,6 +2482,8 @@ pub const cpu = struct { .serialize, .sha, .shstk, + .smap, + .smep, .tsxldtrk, .tuning_fast_imm_vector_shift, .uintr, @@ -2519,6 +2553,8 @@ pub const cpu = struct { .slow_incdec, .slow_lea, .slow_two_mem_ops, + .smap, + .smep, .sse4_2, .use_glm_div_sqrt_costs, .vzeroupper, @@ -2898,6 +2934,7 @@ pub const cpu = struct { .rdrnd, .sahf, .slow_3ops_lea, + .smep, .vzeroupper, .x87, .xsaveopt, @@ -2907,6 +2944,7 @@ pub const cpu = struct { .name = "i386", .llvm_name = "i386", .features = featureSet(&[_]Feature{ + .bsf_bsr_0_clobbers_result, .slow_unaligned_mem_16, .vzeroupper, .x87, @@ -2916,6 +2954,7 @@ pub const cpu = struct { .name = "i486", .llvm_name = "i486", .features = featureSet(&[_]Feature{ + .bsf_bsr_0_clobbers_result, .slow_unaligned_mem_16, .vzeroupper, .x87, @@ -3096,6 +3135,7 @@ pub const cpu = struct { .sahf, .slow_3ops_lea, .slow_unaligned_mem_32, + .smep, .vzeroupper, .x87, .xsaveopt, @@ -3403,6 +3443,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -3766,6 +3808,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -3831,6 +3875,8 @@ pub const cpu = struct { .rdseed, .sahf, .sha, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -3939,6 +3985,8 @@ pub const cpu = struct { .serialize, .sha, .shstk, + .smap, + .smep, .tsxldtrk, .tuning_fast_imm_vector_shift, .uintr, @@ -4042,6 +4090,7 @@ pub const cpu = struct { .slow_lea, .slow_pmulld, .slow_two_mem_ops, + .smep, .sse4_2, .use_slm_arith_costs, .vzeroupper, @@ -4098,6 +4147,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vzeroupper, .x87, @@ -4150,6 +4201,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .vzeroupper, .x87, .xsavec, @@ -4305,6 +4358,8 @@ pub const cpu = struct { .sahf, .sha, .shstk, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -4574,6 +4629,8 @@ pub const cpu = struct { .sbb_dep_breaking, .sha, .slow_shld, + .smap, + .smep, .sse4a, .vzeroupper, .x87, @@ -4629,6 +4686,8 @@ pub const cpu = struct { .sbb_dep_breaking, .sha, .slow_shld, + .smap, + .smep, .sse4a, .vzeroupper, .wbnoinvd, @@ -4686,6 +4745,8 @@ pub const cpu = struct { .sbb_dep_breaking, .sha, .slow_shld, + .smap, + .smep, .sse4a, .vaes, .vpclmulqdq, @@ -4757,6 +4818,8 @@ pub const cpu = struct { .sha, .shstk, .slow_shld, + .smap, + .smep, .sse4a, .vaes, .vpclmulqdq, @@ -4833,6 +4896,8 @@ pub const cpu = struct { .sha, .shstk, .slow_shld, + .smap, + .smep, .sse4a, .vaes, .vpclmulqdq, diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 691ae02280..98d37d8994 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -2520,12 +2520,13 @@ pub const Const = struct { return order(a, b) == .eq; } + /// Returns the number of leading zeros in twos-complement form. pub fn clz(a: Const, bits: Limb) Limb { - // Limbs are stored in little-endian order but we need - // to iterate big-endian. + // Limbs are stored in little-endian order but we need to iterate big-endian. + if (!a.positive) return 0; var total_limb_lz: Limb = 0; var i: usize = a.limbs.len; - const bits_per_limb = @sizeOf(Limb) * 8; + const bits_per_limb = @bitSizeOf(Limb); while (i != 0) { i -= 1; const limb = a.limbs[i]; @@ -2537,13 +2538,15 @@ pub const Const = struct { return total_limb_lz + bits - total_limb_bits; } + /// Returns the number of trailing zeros in twos-complement form. pub fn ctz(a: Const, bits: Limb) Limb { - // Limbs are stored in little-endian order. + // Limbs are stored in little-endian order. Converting a negative number to twos-complement + // flips all bits above the lowest set bit, which does not affect the trailing zero count. var result: Limb = 0; for (a.limbs) |limb| { const limb_tz = @ctz(limb); result += limb_tz; - if (limb_tz != @sizeOf(Limb) * 8) break; + if (limb_tz != @bitSizeOf(Limb)) break; } return @min(result, bits); } diff --git a/lib/std/zig/system/x86.zig b/lib/std/zig/system/x86.zig index 7bd1148e13..2737c67d0c 100644 --- a/lib/std/zig/system/x86.zig +++ b/lib/std/zig/system/x86.zig @@ -369,6 +369,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .bmi, bit(leaf.ebx, 3)); // AVX2 is only supported if we have the OS save support from AVX. setFeature(cpu, .avx2, bit(leaf.ebx, 5) and has_avx_save); + setFeature(cpu, .smep, bit(leaf.ebx, 7)); setFeature(cpu, .bmi2, bit(leaf.ebx, 8)); setFeature(cpu, .invpcid, bit(leaf.ebx, 10)); setFeature(cpu, .rtm, bit(leaf.ebx, 11)); @@ -377,6 +378,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save); setFeature(cpu, .rdseed, bit(leaf.ebx, 18)); setFeature(cpu, .adx, bit(leaf.ebx, 19)); + setFeature(cpu, .smap, bit(leaf.ebx, 20)); setFeature(cpu, .avx512ifma, bit(leaf.ebx, 21) and has_avx512_save); setFeature(cpu, .clflushopt, bit(leaf.ebx, 23)); setFeature(cpu, .clwb, bit(leaf.ebx, 24)); diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 6f3ec618fe..d2d1fedb6f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1390,7 +1390,7 @@ fn asmOps(self: *CodeGen, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void { /// A `cc` of `.z_and_np` clobbers `reg2`! fn asmCmovccRegisterRegister(self: *CodeGen, cc: Condition, reg1: Register, reg2: Register) !void { - _ = try self.addInst(.{ + if (self.hasFeature(.cmov)) _ = try self.addInst(.{ .tag = switch (cc) { else => .cmov, .z_and_np, .nz_or_p => .pseudo, @@ -1408,12 +1408,16 @@ fn asmCmovccRegisterRegister(self: *CodeGen, cc: Condition, reg1: Register, reg2 .r1 = reg1, .r2 = reg2, } }, - }); + }) else { + const reloc = try self.asmJccReloc(cc.negate(), undefined); + try self.asmRegisterRegister(.{ ._, .mov }, reg1, reg2); + self.performReloc(reloc); + } } /// A `cc` of `.z_and_np` is not supported by this encoding! fn asmCmovccRegisterMemory(self: *CodeGen, cc: Condition, reg: Register, m: Memory) !void { - _ = try self.addInst(.{ + if (self.hasFeature(.cmov)) _ = try self.addInst(.{ .tag = switch (cc) { else => .cmov, .z_and_np => unreachable, @@ -1433,7 +1437,11 @@ fn asmCmovccRegisterMemory(self: *CodeGen, cc: Condition, reg: Register, m: Memo .r1 = reg, .payload = try self.addExtra(Mir.Memory.encode(m)), } }, - }); + }) else { + const reloc = try self.asmJccReloc(cc.negate(), undefined); + try self.asmRegisterMemory(.{ ._, .mov }, reg, m); + self.performReloc(reloc); + } } fn asmSetccRegister(self: *CodeGen, cc: Condition, reg: Register) !void { @@ -2319,6 +2327,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { + @setEvalBranchQuota(1_600); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -2354,9 +2363,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off - .not, - => |air_tag| try cg.airUnOp(inst, air_tag), - .add, .add_wrap, .sub, @@ -2434,7 +2440,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .memset_safe => try cg.airMemset(inst, true), .set_union_tag => try cg.airSetUnionTag(inst), .get_union_tag => try cg.airGetUnionTag(inst), - .clz => try cg.airClz(inst), .ctz => try cg.airCtz(inst), .popcount => try cg.airPopCount(inst), .byte_swap => try cg.airByteSwap(inst), @@ -2525,146 +2530,156 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .@"and", .@"or", .xor => |mir_tag| comptime &.{ .{ - .required_features = .{ .avx2, null }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc = .sse }}, .each = .{ .once = &.{ .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc = .sse }}, .each = .{ .once = &.{ .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc = .sse }}, .each = .{ .once = &.{ .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null }, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int_or_vec = .byte }, .{ .int_or_vec = .byte } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm8 } }, .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm8 } }, - .{ .src = .{ .imm8, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm8 } }, + .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int_or_vec = .word }, .{ .int_or_vec = .word } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm16 } }, .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm16 } }, - .{ .src = .{ .imm16, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm16 } }, + .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int_or_vec = .dword }, .{ .int_or_vec = .dword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm32 } }, .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm32 } }, - .{ .src = .{ .imm32, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm32 } }, + .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, mir_tag, .dst0d, .src1d, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .simm32 } }, .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .simm32 } }, - .{ .src = .{ .simm32, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .simm32 } }, + .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2677,16 +2692,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2699,16 +2719,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", .v_pd, .movu, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .v_pd, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2721,16 +2746,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_, mir_tag, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .v_dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2743,16 +2773,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, ._dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null }, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2765,16 +2800,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._ps, .movu, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, ._ps, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, ._ps, .movu, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2787,15 +2827,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._q, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, mir_tag, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, .{ ._, ._q, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2808,16 +2853,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, mir_tag, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, } }, - }) catch |err2| switch (err2) { + }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), cg.typeOf(bin_op.lhs).fmt(pt), @@ -2830,6 +2876,1058 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { if (ops[1].index != res[0].index) try ops[1].die(cg); try res[0].moveTo(inst, cg); }, + .not => |air_tag| if (use_old) try cg.airUnOp(inst, air_tag) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + var res: [1]Temp = undefined; + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .signed_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .not, .dst0b, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_or_exact_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .not, .dst0w, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_or_exact_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .not, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .not, .dst0q, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_mm, .none } }, + }, + .dst_temps = .{.{ .rc = .mmx }}, + .each = .{ .once = &.{ + .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ }, + .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_mm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_xmm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_xmm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ }, + .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_ymm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_ymm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) }, + .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ }, + .{ ._, .v_dqu, .mov, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ .@"0:", .vp_, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ }, + .{ ._, .v_dqa, .mov, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ }, + .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) }, + .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ }, + .{ ._, .v_pd, .movu, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ .@"0:", .v_pd, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ }, + .{ ._, .v_pd, .mova, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) }, + .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ }, + .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp1x, .tmp1x, .tmp1x, ._ }, + .{ .@"0:", .v_, .xor, .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._ }, + .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, .p_d, .cmpeq, .tmp1x, .tmp1x, ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, .xor, .tmp2x, .tmp1x, ._, ._ }, + .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memia(.dst0q, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .not, .memad(.dst0d, .add_size, -16), ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ }, + .{ ._, ._, .not, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .not, .memad(.dst0q, .add_size, -16), ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .not, .memad(.dst0d, .add_size, -8), ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .memad(.dst0d, .add_size, -16), .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .memad(.dst0d, .add_size, -8), .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_mm, .none } }, + }, + .dst_temps = .{.{ .rc = .mmx }}, + .each = .{ .once = &.{ + .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ }, + .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_mm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_xmm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_xmm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ }, + .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_ymm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_ymm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) }, + .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_src0_size), ._, ._ }, + .{ ._, ._, .xor, .tmp2q, .leaia(.qword, .tmp1, .tmp0, .add_src0_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_src0_size), .tmp2q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_src0_size), ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .leaia(.dword, .tmp1, .tmp0, .add_src0_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_src0_size), .tmp2d, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ + @tagName(air_tag), + cg.typeOf(ty_op.operand).fmt(pt), + ops[0].tracking(cg), + }), + else => |e| return e, + }; + if (ops[0].index != res[0].index) try ops[0].die(cg); + try res[0].moveTo(inst, cg); + }, + .block => if (use_old) try cg.airBlock(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = cg.air.extraData(Air.Block, ty_pl.payload); @@ -2880,6 +3978,2675 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_never_tail => try cg.airCall(inst, .never_tail), .call_never_inline => try cg.airCall(inst, .never_inline), + .clz => |air_tag| if (use_old) try cg.airClz(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + var res: [1]Temp = undefined; + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + .required_features = .{ .slow_incdec, null, null, null }, + .src_constraints = .{ .{ .exact_signed_int = 1 }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .add, .dst0b, .si(1), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_signed_int = 1 }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .inc, .dst0b, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_unsigned_int = 1 }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .si(1), ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .signed_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, + .src_constraints = .{ .{ .exact_int = 16 }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .exact_int = 16 }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .signed_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, + .src_constraints = .{ .{ .exact_int = 32 }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .exact_int = 32 }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .signed_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .exact_int = 64 }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .exact_int = 64 }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .dst0q, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_po2_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .signed_po2_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .signed_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_po2_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._nz, .cmov, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .signed_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .signed_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ }, + .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ }, + .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0q, .dst0q, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .tmp3b, ._, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .tmp3b, ._, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ + @tagName(air_tag), + cg.typeOf(ty_op.operand).fmt(pt), + ops[0].tracking(cg), + }), + else => |e| return e, + }; + if (ops[0].index != res[0].index) try ops[0].die(cg); + try res[0].moveTo(inst, cg); + }, + .cmp_vector, .cmp_vector_optimized => |air_tag| if (use_old) try cg.airCmpVector(inst) else fallback: { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data; @@ -2899,12 +6666,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .e, .ne => |cc| comptime &.{ .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2919,12 +6686,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2939,12 +6706,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_w, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2959,12 +6726,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2979,12 +6746,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_q, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2999,12 +6766,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -3019,12 +6786,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_w, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -3039,12 +6806,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -3059,12 +6826,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_q, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3079,12 +6846,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_b, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3099,12 +6866,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_w, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3119,12 +6886,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse4_1, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3139,12 +6906,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_q, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3159,12 +6926,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_b, .cmpeq, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3179,12 +6946,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_w, .cmpeq, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3203,16 +6970,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mut_mem, .imm8 } }, .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm8 } }, - .{ .src = .{ .imm8, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm8 } }, + .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ @@ -3228,16 +6995,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mut_mem, .imm16 } }, .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm16 } }, - .{ .src = .{ .imm16, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm16 } }, + .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ @@ -3253,16 +7020,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mut_mem, .imm32 } }, .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm32 } }, - .{ .src = .{ .imm32, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm32 } }, + .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ @@ -3274,21 +7041,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .simm32 } }, .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .simm32 } }, - .{ .src = .{ .simm32, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .simm32 } }, + .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ @@ -3304,7 +7071,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, @@ -3314,29 +7080,30 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .not, .tmp1p, ._, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3349,21 +7116,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, @@ -3371,13 +7139,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3390,10 +7158,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, @@ -3401,11 +7170,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, @@ -3414,13 +7183,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3433,21 +7202,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, @@ -3455,13 +7225,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3474,10 +7244,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, @@ -3486,49 +7257,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3541,21 +7312,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, @@ -3563,13 +7335,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3582,10 +7354,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, @@ -3593,11 +7366,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, @@ -3606,13 +7379,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3625,10 +7398,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, @@ -3637,49 +7411,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3692,10 +7466,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, @@ -3704,49 +7479,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3759,21 +7534,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, @@ -3781,13 +7557,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3800,10 +7576,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, @@ -3811,11 +7588,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, @@ -3824,13 +7601,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3843,10 +7620,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, @@ -3855,49 +7633,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse4_1, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3910,10 +7688,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, @@ -3922,49 +7701,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse, .mmx }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .sse, .mmx, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3977,21 +7756,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, @@ -3999,13 +7779,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse, .mmx }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .sse, .mmx, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4018,10 +7798,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .kind = .{ .rc = .mmx } }, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, @@ -4032,22 +7813,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, @@ -4055,28 +7836,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, - .{ ._, ._, .xor, .tmp4b, .i(0b1111), ._, ._ }, + .{ ._, ._, .xor, .tmp4b, .si(0b1111), ._, ._ }, .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse, .mmx }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .sse, .mmx, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4089,10 +7870,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .kind = .{ .rc = .mmx } }, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, @@ -4104,22 +7886,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, @@ -4128,29 +7910,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ }, .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, - .{ ._, ._, .xor, .tmp4b, .i(0b11), ._, ._ }, + .{ ._, ._, .xor, .tmp4b, .si(0b11), ._, ._ }, .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .slow_incdec, null }, .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4163,51 +7944,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4220,51 +7973,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4277,178 +8002,88 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .byte }}, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", null }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ }, .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ }, .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ }, .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .dst_constraints = .{.{ .bool_vec = .byte }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, - .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, - .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, - .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, - .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .dec, .tmp2d, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, - .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4461,9 +8096,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, @@ -4471,43 +8107,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1d, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4520,9 +8126,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, @@ -4530,43 +8137,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4579,9 +8156,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, @@ -4589,43 +8167,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4638,9 +8187,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, @@ -4648,115 +8198,50 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .dword }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ }, .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ }, .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ }, .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .dst_constraints = .{.{ .bool_vec = .dword }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, - .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, - .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, - .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, - .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .dec, .tmp2d, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, - .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, - .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4769,9 +8254,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, @@ -4779,44 +8265,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4829,53 +8285,24 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4888,9 +8315,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, @@ -4898,44 +8326,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4948,9 +8346,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, @@ -4958,42 +8357,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, @@ -5007,66 +8376,30 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ }, .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ }, .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ }, .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, - .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, - .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, - .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, - .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .dec, .tmp2d, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, - .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, - .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5079,8 +8412,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ }, @@ -5089,62 +8423,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ }, - .{ ._, ._, .add, .tmp1d, .i(1), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, + .{ ._, ._, .add, .tmp1d, .si(1), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(1), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, - .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .reg = .ecx } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - }, - .dst_temps = .{.mem}, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, - .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ }, - .{ ._, ._, .mov, .tmp4b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp4b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ }, - .{ ._, ._, .inc, .tmp1d, ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, - .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, - .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, - .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ }, - .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ .@"1:", ._, .inc, .tmp0p, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, - .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ }, } }, } }, - }) catch |err2| switch (err2) { + }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), cg.typeOf(extra.lhs).fmt(pt), @@ -5175,9 +8470,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .cmp_lte, .cmp_lte_optimized => .lte, .cmp_gte, .cmp_gte_optimized => .gte, .cmp_gt, .cmp_gt_optimized => .gt, - }) else { + }) else fallback: { const bin_op = air_datas[@intFromEnum(inst)].bin_op; const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu); + if (scalar_ty.isRuntimeFloat()) break :fallback try cg.airCmp(inst, switch (air_tag) { + else => unreachable, + .cmp_lt, .cmp_lt_optimized => .lt, + .cmp_lte, .cmp_lte_optimized => .lte, + .cmp_gte, .cmp_gte_optimized => .gte, + .cmp_gt, .cmp_gt_optimized => .gt, + }); const signedness = if (scalar_ty.isAbiInt(zcu)) scalar_ty.intInfo(zcu).signedness else @@ -5205,11 +8507,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, } }, @@ -5217,12 +8519,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mem, .imm8 } }, - .{ .src = .{ .gpr, .imm8 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm8 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, } }, @@ -5230,11 +8532,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, } }, @@ -5242,12 +8544,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .mem, .imm16 } }, - .{ .src = .{ .gpr, .imm16 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm16 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, } }, @@ -5255,11 +8557,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, } }, @@ -5267,45 +8569,50 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .mem, .imm32 } }, - .{ .src = .{ .gpr, .imm32 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm32 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .mem, .simm32 } }, - .{ .src = .{ .gpr, .simm32 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .simm32 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, }, .{ + .src_constraints = .{ .any_int, .any_int }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, + .{ .src = .{ .to_mem, .to_mem }, .commute = switch (cc) { + else => unreachable, + .l, .ge, .b, .ae => .{ 0, 0 }, + .le, .g, .be, .a => .{ 0, 1 }, + } }, }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, @@ -5315,17 +8622,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._r, .sh, .tmp1b, .i(1), ._, ._ }, + .{ .@"0:", ._r, .sh, .tmp1b, .si(1), ._, ._ }, .{ ._, ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .sbb, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._c, .set, .tmp1b, ._, ._, ._ }, - .{ ._, .fromCondition(cc), .set, .dst0b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, .fromCondition(switch (cc) { + else => unreachable, + .l, .ge, .b, .ae => cc, + .le, .g, .be, .a => cc.commute(), + }), .set, .dst0b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, } }, @@ -5342,13 +8653,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { if (ops[1].index != res[0].index) try ops[1].die(cg); try res[0].moveTo(inst, cg); }, - .cmp_eq, .cmp_eq_optimized, .cmp_neq, .cmp_neq_optimized => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) { + .cmp_eq, + .cmp_eq_optimized, + .cmp_neq, + .cmp_neq_optimized, + => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) { else => unreachable, .cmp_eq, .cmp_eq_optimized => .eq, .cmp_neq, .cmp_neq_optimized => .neq, }) else fallback: { const bin_op = air_datas[@intFromEnum(inst)].bin_op; - if (ip.isOptionalType(cg.typeOf(bin_op.lhs).toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) { + const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu); + if (scalar_ty.isRuntimeFloat() or ip.isOptionalType(scalar_ty.toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) { else => unreachable, .cmp_eq, .cmp_eq_optimized => .eq, .cmp_neq, .cmp_neq_optimized => .neq, @@ -5362,14 +8678,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .e, .ne => |cc| comptime &.{ .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, @@ -5379,19 +8694,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ }, .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, @@ -5401,19 +8716,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ }, .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, @@ -5423,33 +8738,33 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ }, .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse4_1, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, @@ -5459,22 +8774,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .i(0xffff), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .si(0xffff), ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, .mmx }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .sse, .mmx, null, null }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .mmx } }, @@ -5484,26 +8799,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .i(0xff), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .si(0xff), ._, ._ }, } }, }, .{ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mem, .imm8 } }, .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .imm8 } }, - .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm8 } }, + .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, } }, @@ -5512,14 +8828,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mem, .imm16 } }, .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .imm16 } }, - .{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm16 } }, + .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, } }, @@ -5528,36 +8844,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mem, .imm32 } }, .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .imm32 } }, - .{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm32 } }, + .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .mem, .simm32 } }, .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .simm32 } }, - .{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .simm32 } }, + .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ + .{ .remainder_int = .{ .of = .yword, .is = .xword } }, + .{ .remainder_int = .{ .of = .yword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5570,18 +8890,51 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ }, + .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ }, + .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .remainder_int = .{ .of = .yword, .is = .xword } }, + .{ .remainder_int = .{ .of = .yword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5594,18 +8947,47 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .v_pd, .movu, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ }, + .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_pd, .mova, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ ._, .v_pd, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ }, + .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, .{ .@"0:", .v_pd, .movu, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5618,18 +9000,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .tmp1x, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .vp_, .@"or", .tmp1x, .tmp1x, .tmp2x, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse4_1, null }, + .required_features = .{ .sse4_1, null, null, null }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5642,18 +9025,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .p_, .@"test", .tmp1x, .tmp1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, + .required_features = .{ .sse2, null, null, null }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5666,21 +9050,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp1x, .tmp2x, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, - .{ ._, ._, .cmp, .tmp0d, .i(0xffff), ._, ._ }, + .{ ._, ._, .cmp, .tmp0d, .si(0xffff), ._, ._ }, } }, }, .{ - .required_features = .{ .sse, .mmx }, + .required_features = .{ .sse, .mmx, null, null }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5693,18 +9078,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, .{ .@"0:", ._q, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .xor, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .@"or", .tmp1q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp1q, .tmp2q, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, - .{ ._, ._, .cmp, .tmp0d, .i(0xff), ._, ._ }, + .{ ._, ._, .cmp, .tmp0d, .si(0xff), ._, ._ }, } }, }, .{ .patterns = &.{ @@ -5719,13 +9105,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1p, .tmp1p, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .xor, .tmp2p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .@"or", .tmp1p, .tmp2p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp2, .add_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp2, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, ._, .@"test", .tmp1p, .tmp1p, ._, ._ }, } }, @@ -6453,11 +9840,8 @@ fn regClassForType(self: *CodeGen, ty: Type) Register.Class { else => .sse, }, .vector => switch (ty.childType(zcu).toIntern()) { - .bool_type, .u1_type => .general_purpose, - else => if (ty.isAbiInt(zcu) and ty.intInfo(zcu).bits == 1) - .general_purpose - else - .sse, + .bool_type => .general_purpose, + else => .sse, }, else => .general_purpose, }; @@ -8383,32 +11767,54 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { const lhs_mcv = try self.resolveInst(bin_op.lhs); const rhs_mcv = try self.resolveInst(bin_op.rhs); - const mat_lhs_mcv = switch (lhs_mcv) { - .load_symbol => mat_lhs_mcv: { + const mat_lhs_mcv = mat_lhs_mcv: switch (lhs_mcv) { + .register => |lhs_reg| switch (lhs_reg.class()) { + else => lhs_mcv, + .sse => { + const mat_lhs_mcv: MCValue = .{ + .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp), + }; + try self.genCopy(dst_ty, mat_lhs_mcv, lhs_mcv, .{}); + break :mat_lhs_mcv mat_lhs_mcv; + }, + }, + .load_symbol => { // TODO clean this up! const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => lhs_mcv, }; - const mat_lhs_lock = switch (mat_lhs_mcv) { - .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), - else => null, + const mat_lhs_locks: [2]?RegisterLock = switch (mat_lhs_mcv) { + .register_pair => |mat_lhs_regs| self.register_manager.lockRegs(2, mat_lhs_regs), + .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null }, + else => @splat(null), }; - defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); - const mat_rhs_mcv = switch (rhs_mcv) { - .load_symbol => mat_rhs_mcv: { + defer for (mat_lhs_locks) |mat_lhs_lock| if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); + const mat_rhs_mcv = mat_rhs_mcv: switch (rhs_mcv) { + .register => |rhs_reg| switch (rhs_reg.class()) { + else => rhs_mcv, + .sse => { + const mat_rhs_mcv: MCValue = .{ + .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp), + }; + try self.genCopy(dst_ty, mat_rhs_mcv, rhs_mcv, .{}); + break :mat_rhs_mcv mat_rhs_mcv; + }, + }, + .load_symbol => { // TODO clean this up! const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => rhs_mcv, }; - const mat_rhs_lock = switch (mat_rhs_mcv) { - .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), - else => null, + const mat_rhs_locks: [2]?RegisterLock = switch (mat_rhs_mcv) { + .register_pair => |mat_rhs_regs| self.register_manager.lockRegs(2, mat_rhs_regs), + .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null }, + else => @splat(null), }; - defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); + defer for (mat_rhs_locks) |mat_rhs_lock| if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, @@ -10003,7 +13409,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { } }, }, .u(0)); _ = try self.asmJccReloc(.e, loop); - try self.asmRegisterMemory(.{ ._, .bsr }, dst_reg.to64(), .{ + try self.asmRegisterMemory(.{ ._r, .bs }, dst_reg.to64(), .{ .base = .{ .frame = src_frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, @@ -10080,8 +13486,8 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(wide_lock); try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._, .bsr }, .u16, dst_mcv, .{ .register = wide_reg }); - } else try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._r, .bs }, .u16, dst_mcv, .{ .register = wide_reg }); + } else try self.genBinOpMir(.{ ._r, .bs }, src_ty, dst_mcv, mat_src_mcv); try self.asmCmovccRegisterRegister( .z, @@ -10103,7 +13509,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { try self.truncateRegister(src_ty, wide_reg); try self.genBinOpMir( - .{ ._, .bsr }, + .{ ._r, .bs }, if (src_bits <= 8) .u16 else src_ty, dst_mcv, .{ .register = wide_reg }, @@ -10200,7 +13606,7 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { } }, }, .u(0)); _ = try self.asmJccReloc(.e, loop); - try self.asmRegisterMemory(.{ ._, .bsf }, dst_reg.to64(), .{ + try self.asmRegisterMemory(.{ ._f, .bs }, dst_reg.to64(), .{ .base = .{ .frame = src_frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, @@ -10280,8 +13686,8 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(wide_lock); try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._, .bsf }, wide_ty, dst_mcv, .{ .register = wide_reg }); - } else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._f, .bs }, wide_ty, dst_mcv, .{ .register = wide_reg }); + } else try self.genBinOpMir(.{ ._f, .bs }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2); try self.asmCmovccRegisterRegister( @@ -12975,7 +16381,18 @@ fn genShiftBinOp( const rcx_lock = self.register_manager.lockReg(.rcx); defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock); - const lhs_lock = switch (lhs_mcv) { + const mat_lhs_mcv: MCValue, const can_reuse_lhs = switch (lhs_mcv) { + .register => |lhs_reg| switch (lhs_reg.class()) { + .general_purpose => .{ lhs_mcv, true }, + else => lhs: { + const mat_lhs_mcv = try self.allocTempRegOrMem(lhs_ty, true); + try self.genCopy(lhs_ty, mat_lhs_mcv, lhs_mcv, .{}); + break :lhs .{ mat_lhs_mcv, false }; + }, + }, + else => .{ lhs_mcv, true }, + }; + const lhs_lock = switch (mat_lhs_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, }; @@ -12988,12 +16405,12 @@ fn genShiftBinOp( defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); const dst_mcv: MCValue = dst: { - if (maybe_inst) |inst| { + if (can_reuse_lhs) if (maybe_inst) |inst| { const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) break :dst lhs_mcv; - } + if (self.reuseOperand(inst, bin_op.lhs, 0, mat_lhs_mcv)) break :dst mat_lhs_mcv; + }; const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true); - try self.genCopy(lhs_ty, dst_mcv, lhs_mcv, .{}); + try self.genCopy(lhs_ty, dst_mcv, mat_lhs_mcv, .{}); break :dst dst_mcv; }; @@ -18337,12 +21754,28 @@ const MoveStrategy = union(enum) { try self.asmRegister(.{ .f_, .ld }, src_reg); try self.asmMemory(.{ .f_p, .st }, dst_mem); }, - .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate( - ie.extract, - dst_mem, - src_reg, - .u(0), - ), + .insert_extract, .vex_insert_extract => |ie| if (ie.extract[0] != .p_w or self.hasFeature(.sse4_1)) + try self.asmMemoryRegisterImmediate(ie.extract, dst_mem, src_reg, .u(0)) + else if (self.hasFeature(.sse2)) { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + try self.asmRegisterRegisterImmediate(ie.extract, tmp_reg.to32(), src_reg.to128(), .u(0)); + try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16()); + } else { + const tmp_frame_index = try self.allocFrameIndex(.init(.{ + .size = 16, + .alignment = .@"16", + })); + try self.asmMemoryRegister(.{ ._ps, .mova }, .{ + .base = .{ .frame = tmp_frame_index }, + .mod = .{ .rm = .{ .size = .xword } }, + }, src_reg.to128()); + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg.to16(), .{ + .base = .{ .frame = tmp_frame_index }, + .mod = .{ .rm = .{ .size = .word } }, + }); + try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16()); + }, } } }; @@ -18400,8 +21833,10 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) .{ ._ss, .mov } }, 5...8 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } + else if (self.hasFeature(.sse2)) + .{ ._sd, .mov } else - .{ ._sd, .mov } }, + .{ ._ps, .movl } }, 9...16 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_pd, if (aligned) .mova else .movu } else if (self.hasFeature(.sse2)) @@ -18427,8 +21862,10 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) .{ ._ss, .mov } }, 64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } + else if (self.hasFeature(.sse2)) + .{ ._sd, .mov } else - .{ ._sd, .mov } }, + .{ ._ps, .movl } }, 128 => return .{ .move = if (self.hasFeature(.avx)) .{ if (aligned) .v_dqa else .v_dqu, .mov } else if (self.hasFeature(.sse2)) @@ -18623,6 +22060,30 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C }, opts), inline .register_pair, .register_triple, .register_quadruple => |dst_regs| { const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = switch (src_mcv) { + .register => |src_reg| switch (dst_regs[0].class()) { + .general_purpose => switch (src_reg.class()) { + else => unreachable, + .sse => if (ty.abiSize(pt.zcu) <= 16) { + if (self.hasFeature(.avx)) { + try self.asmRegisterRegister(.{ .v_q, .mov }, dst_regs[0].to64(), src_reg.to128()); + try self.asmRegisterRegisterImmediate(.{ .vp_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1)); + } else if (self.hasFeature(.sse4_1)) { + try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128()); + try self.asmRegisterRegisterImmediate(.{ .p_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1)); + } else { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128()); + try self.asmRegisterRegister(.{ ._ps, .movhl }, tmp_reg.to128(), src_reg.to128()); + try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), src_reg.to128()); + } + return; + } else unreachable, + }, + else => unreachable, + }, .register_pair, .memory, .indirect, .load_frame => null, .load_symbol, .load_direct, .load_got, .load_tlv => src: { const src_addr_reg = @@ -18863,7 +22324,39 @@ fn genSetReg( inline .register_pair, .register_triple, .register_quadruple, - => |src_regs| try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts), + => |src_regs| switch (dst_reg.class()) { + .general_purpose => switch (src_regs[0].class()) { + .general_purpose => try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts), + else => unreachable, + }, + .sse => switch (src_regs[0].class()) { + .general_purpose => if (abi_size <= 16) { + if (self.hasFeature(.avx)) { + try self.asmRegisterRegister(.{ .v_q, .mov }, dst_reg.to128(), src_regs[0].to64()); + try self.asmRegisterRegisterRegisterImmediate( + .{ .vp_q, .insr }, + dst_reg.to128(), + dst_reg.to128(), + src_regs[1].to64(), + .u(1), + ); + } else if (self.hasFeature(.sse4_1)) { + try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64()); + try self.asmRegisterRegisterImmediate(.{ .p_q, .insr }, dst_reg.to128(), src_regs[1].to64(), .u(1)); + } else { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64()); + try self.asmRegisterRegister(.{ ._q, .mov }, tmp_reg.to128(), src_regs[1].to64()); + try self.asmRegisterRegister(.{ ._ps, .movlh }, dst_reg.to128(), tmp_reg.to128()); + } + } else unreachable, + else => unreachable, + }, + else => unreachable, + }, .register_offset, .indirect, .load_frame, @@ -23517,8 +27010,6 @@ fn promoteVarArg(self: *CodeGen, ty: Type) Type { } } -// ====================================== rewrite starts here ====================================== - const Temp = struct { index: Air.Inst.Index, @@ -24311,13 +27802,13 @@ const Select = struct { } const Case = struct { - required_features: [2]?std.Target.x86.Feature = @splat(null), + required_features: [4]?std.Target.x86.Feature = @splat(null), dst_constraints: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]Constraint = @splat(.any), src_constraints: [@intFromEnum(Select.Operand.Ref.none) - @intFromEnum(Select.Operand.Ref.src0)]Constraint = @splat(.any), patterns: []const Select.Pattern, - clobbers: struct { eflags: bool = false } = .{}, extra_temps: [@intFromEnum(Select.Operand.Ref.dst0) - @intFromEnum(Select.Operand.Ref.tmp0)]TempSpec = @splat(.unused), dst_temps: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]TempSpec.Kind = @splat(.unused), + clobbers: struct { eflags: bool = false } = .{}, each: union(enum) { once: []const Instruction, }, @@ -24327,9 +27818,32 @@ const Select = struct { any, any_bool_vec, any_int, + any_signed_int, any_float, bool_vec: Memory.Size, + vec: Memory.Size, + signed_int_vec: Memory.Size, + signed_int_or_full_vec: Memory.Size, + unsigned_int_vec: Memory.Size, + int_or_vec: Memory.Size, + exact_remainder_int_or_vec: struct { of: Memory.Size, is: Memory.Size }, int: Memory.Size, + scalar_int: Memory.Size, + scalar_signed_int: Memory.Size, + scalar_unsigned_int: Memory.Size, + scalar_remainder_int: struct { of: Memory.Size, is: Memory.Size }, + exact_int: u16, + exact_signed_int: u16, + exact_unsigned_int: u16, + signed_or_exact_int: Memory.Size, + unsigned_or_exact_int: Memory.Size, + po2_int: Memory.Size, + signed_po2_int: Memory.Size, + unsigned_po2_or_exact_int: Memory.Size, + remainder_int: struct { of: Memory.Size, is: Memory.Size }, + exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, + signed_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, + unsigned_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, signed_int: Memory.Size, unsigned_int: Memory.Size, @@ -24338,30 +27852,183 @@ const Select = struct { switch (constraint) { .any => return true, .any_bool_vec => return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type, - .any_int => { - const scalar_ty = ty.scalarType(zcu); - return scalar_ty.isAbiInt(zcu) or scalar_ty.isPtrAtRuntime(zcu); - }, + .any_int => return ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu) or ty.isAbiInt(zcu), + .any_signed_int => return ty.isAbiInt(zcu) and ty.intInfo(zcu).signedness == .signed, .any_float => return ty.scalarType(zcu).isRuntimeFloat(), - .bool_vec => |size| return ty.isVector(zcu) and - ty.scalarType(zcu).toIntern() == .bool_type and ty.vectorLen(zcu) <= size.bitSize(cg.target), - .int => |size| { + .bool_vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and + size.bitSize(cg.target) >= ty.vectorLen(zcu), + .vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() != .bool_type and + size.bitSize(cg.target) >= ty.abiSize(zcu), + .signed_int_vec => |size| { + if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false; const scalar_ty = ty.scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target); - return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).bits <= size.bitSize(cg.target); + return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .signed; + }, + .signed_int_or_full_vec => |size| { + if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false; + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return true; + if (!scalar_ty.isAbiInt(zcu)) return false; + const scalar_int_info = scalar_ty.intInfo(zcu); + return switch (scalar_int_info.signedness) { + .signed => true, + .unsigned => scalar_int_info.bits >= 8 and std.math.isPowerOfTwo(scalar_int_info.bits), + }; + }, + .unsigned_int_vec => |size| { + if (!ty.isVector(zcu) or size.bitSize(cg.target) < ty.bitSize(zcu)) return false; + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return true; + return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .unsigned; + }, + .int_or_vec => |size| { + if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and + size.bitSize(cg.target) >= 8 * ty.abiSize(zcu); + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits; + }, + .exact_remainder_int_or_vec => |of_is| { + if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and + of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1; + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; + }, + .int => |size| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits; + }, + .scalar_int => |size| { + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + return scalar_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= scalar_ty.intInfo(zcu).bits; + }, + .scalar_signed_int => |size| { + const scalar_ty = ty.scalarType(zcu); + if (!scalar_ty.isAbiInt(zcu)) return false; + const scalar_int_info = scalar_ty.intInfo(zcu); + return scalar_int_info.signedness == .signed and size.bitSize(cg.target) >= scalar_int_info.bits; + }, + .scalar_unsigned_int => |size| { + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + if (!scalar_ty.isAbiInt(zcu)) return false; + const scalar_int_info = scalar_ty.intInfo(zcu); + return scalar_int_info.signedness == .unsigned and size.bitSize(cg.target) >= scalar_int_info.bits; + }, + .scalar_remainder_int => |of_is| { + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!scalar_ty.isAbiInt(zcu)) return false; + return of_is.is.bitSize(cg.target) >= (scalar_ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; + }, + .exact_int => |bit_size| { + if (ty.toIntern() == .bool_type) return bit_size == 1; + if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth(); + return ty.isAbiInt(zcu) and bit_size == ty.intInfo(zcu).bits; + }, + .exact_signed_int => |bit_size| { + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .signed and bit_size == int_info.bits; + }, + .exact_unsigned_int => |bit_size| { + if (ty.toIntern() == .bool_type) return bit_size == 1; + if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .unsigned and bit_size == int_info.bits; + }, + .signed_or_exact_int => |size| { + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) == cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => size.bitSize(cg.target) >= int_info.bits, + .unsigned => size.bitSize(cg.target) == int_info.bits, + }; + }, + .unsigned_or_exact_int => |size| { + if (ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu)) return true; + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => size.bitSize(cg.target) == int_info.bits, + .unsigned => size.bitSize(cg.target) >= int_info.bits, + }; + }, + .po2_int => |size| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const bit_size = ty.intInfo(zcu).bits; + return std.math.isPowerOfTwo(bit_size) and size.bitSize(cg.target) >= bit_size; + }, + .signed_po2_int => |size| { + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .signed and std.math.isPowerOfTwo(int_info.bits) and + size.bitSize(cg.target) >= int_info.bits; + }, + .unsigned_po2_or_exact_int => |size| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => size.bitSize(cg.target) == int_info.bits, + .unsigned => std.math.isPowerOfTwo(int_info.bits) and size.bitSize(cg.target) >= int_info.bits, + }; + }, + .remainder_int => |of_is| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + return of_is.is.bitSize(cg.target) >= (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; + }, + .exact_remainder_int => |of_is| { + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; + }, + .signed_or_exact_remainder_int => |of_is| { + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + .unsigned => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + }; + }, + .unsigned_or_exact_remainder_int => |of_is| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + .unsigned => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + }; }, .signed_int => |size| { - const scalar_ty = ty.scalarType(zcu); - if (!scalar_ty.isAbiInt(zcu)) return false; - const info = scalar_ty.intInfo(zcu); - return info.signedness == .signed and info.bits <= size.bitSize(cg.target); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .signed and size.bitSize(cg.target) >= int_info.bits; }, .unsigned_int => |size| { - const scalar_ty = ty.scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target); - if (!scalar_ty.isAbiInt(zcu)) return false; - const info = scalar_ty.intInfo(zcu); - return info.signedness == .unsigned and info.bits <= size.bitSize(cg.target); + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .unsigned and size.bitSize(cg.target) >= int_info.bits; }, } } @@ -24379,97 +28046,107 @@ const Select = struct { imm32, simm32, mem, - mut_mem, to_mem, + mut_mem, + to_mut_mem, gpr, + to_gpr, mut_gpr, + to_mut_gpr, mm, + to_mm, mut_mm, + to_mut_mm, xmm, + to_xmm, mut_xmm, + to_mut_xmm, ymm, + to_ymm, mut_ymm, + to_mut_ymm, fn matches(src: Src, temp: Temp, cg: *CodeGen) bool { - switch (src) { + return switch (src) { .none => unreachable, - .any => return true, - .imm8 => return switch (temp.tracking(cg).short) { + .any => true, + .imm8 => switch (temp.tracking(cg).short) { .immediate => |imm| std.math.cast(u8, imm) != null, else => false, }, - .imm16 => return switch (temp.tracking(cg).short) { + .imm16 => switch (temp.tracking(cg).short) { .immediate => |imm| std.math.cast(u16, imm) != null, else => false, }, - .imm32 => return switch (temp.tracking(cg).short) { + .imm32 => switch (temp.tracking(cg).short) { .immediate => |imm| std.math.cast(u32, imm) != null, else => false, }, - .simm32 => return switch (temp.tracking(cg).short) { + .simm32 => switch (temp.tracking(cg).short) { .immediate => |imm| std.math.cast(i32, @as(i64, @bitCast(imm))) != null, else => false, }, - .mem => return temp.tracking(cg).short.isMemory(), - .mut_mem => return temp.isMut(cg) and temp.tracking(cg).short.isMemory(), - .to_mem => return true, - .gpr, .mut_gpr => { - const mcv = temp.tracking(cg).short; - const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); - return abi_size <= 8 and switch (mcv) { - .register => |reg| reg.class() == .general_purpose, - .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and - reg_off.off == 0, - .register_pair, .register_triple, .register_quadruple => false, - else => true, - }; + .mem => temp.tracking(cg).short.isMemory(), + .to_mem, .to_mut_mem => true, + .mut_mem => temp.isMut(cg) and temp.tracking(cg).short.isMemory(), + .gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .general_purpose, + .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0, + else => false, }, - .mm, .mut_mm => { - const mcv = temp.tracking(cg).short; - const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); - return abi_size <= 8 and switch (mcv) { - .register => |reg| reg.class() == .mmx, - .register_offset => |reg_off| reg_off.reg.class() == .mmx and - reg_off.off == 0, - else => false, - }; + .mut_gpr => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .general_purpose, + .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0, + else => false, }, - .xmm, .mut_xmm => { - const mcv = temp.tracking(cg).short; - const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); - return abi_size > 8 and abi_size <= 16 and switch (mcv) { - .register => |reg| reg.class() == .sse, - .register_offset => |reg_off| reg_off.reg.class() == .sse and - reg_off.off == 0, - .register_pair, .register_triple, .register_quadruple => false, - else => true, - }; + .to_gpr, .to_mut_gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8, + .mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .mmx, + .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, + else => false, }, - .ymm, .mut_ymm => { - const mcv = temp.tracking(cg).short; - const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); - return abi_size > 16 and abi_size <= 32 and switch (mcv) { - .register => |reg| reg.class() == .sse, - .register_offset => |reg_off| reg_off.reg.class() == .sse and - reg_off.off == 0, - .register_pair, .register_triple, .register_quadruple => false, - else => true, - }; + .mut_mm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .mmx, + .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, + else => false, }, - } + .to_mm, .to_mut_mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8, + .xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => false, + }, + .mut_xmm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => false, + }, + .to_xmm, .to_mut_xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16, + .ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => false, + }, + .mut_ymm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => false, + }, + .to_ymm, .to_mut_ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32, + }; } fn convert(src: Src, temp: *Temp, cg: *CodeGen) !bool { return switch (src) { .none => unreachable, .any, .imm8, .imm16, .imm32, .simm32 => false, - .mem, .mut_mem, .to_mem => try temp.toBase(cg), - .gpr => try temp.toRegClass(false, .general_purpose, cg), - .mut_gpr => try temp.toRegClass(true, .general_purpose, cg), - .mm => try temp.toRegClass(false, .mmx, cg), - .mut_mm => try temp.toRegClass(true, .mmx, cg), - .xmm, .ymm => try temp.toRegClass(false, .sse, cg), - .mut_xmm, .mut_ymm => try temp.toRegClass(true, .sse, cg), + .mem, .to_mem, .mut_mem, .to_mut_mem => try temp.toBase(cg), + .gpr, .to_gpr => try temp.toRegClass(false, .general_purpose, cg), + .mut_gpr, .to_mut_gpr => try temp.toRegClass(true, .general_purpose, cg), + .mm, .to_mm => try temp.toRegClass(false, .mmx, cg), + .mut_mm, .to_mut_mm => try temp.toRegClass(true, .mmx, cg), + .xmm, .to_xmm, .ymm, .to_ymm => try temp.toRegClass(false, .sse, cg), + .mut_xmm, .to_mut_xmm, .mut_ymm, .to_mut_ymm => try temp.toRegClass(true, .sse, cg), }; } }; @@ -24489,6 +28166,10 @@ const Select = struct { rc: Register.Class, rc_mask: struct { rc: Register.Class, info: MaskInfo }, mem, + smin_mem: Select.Operand.Ref, + smax_mem: Select.Operand.Ref, + umin_mem: Select.Operand.Ref, + umax_mem: Select.Operand.Ref, ref: Select.Operand.Ref, ref_mask: struct { ref: Select.Operand.Ref, info: MaskInfo }, @@ -24501,14 +28182,81 @@ const Select = struct { }; fn create(spec: TempSpec, s: *Select) !?Temp { + const cg = s.cg; return switch (spec.kind) { .unused => null, - .any => try s.cg.tempAlloc(spec.type), - .cc => |cc| try s.cg.tempFromValue(spec.type, .{ .eflags = cc }), - .reg => |reg| try s.cg.tempFromValue(spec.type, .{ .register = reg }), - .rc => |rc| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc)), - .rc_mask => |rc_mask| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)), - .mem => try s.cg.tempAllocMem(spec.type), + .any => try cg.tempAlloc(spec.type), + .cc => |cc| try cg.tempFromValue(spec.type, .{ .eflags = cc }), + .reg => |reg| try cg.tempFromValue(spec.type, .{ .register = reg }), + .rc => |rc| try cg.tempAllocReg(spec.type, regSetForRegClass(rc)), + .rc_mask => |rc_mask| try cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)), + .mem => try cg.tempAllocMem(spec.type), + .smin_mem, .smax_mem, .umin_mem, .umax_mem => |ty_ref| { + const pt = cg.pt; + const zcu = pt.zcu; + const ip = &zcu.intern_pool; + const ty = ty_ref.deref(s).typeOf(s.cg); + const vector_len, const scalar_ty: Type = switch (ip.indexToKey(ty.toIntern())) { + else => .{ null, ty }, + .vector_type => |vector_type| .{ vector_type.len, .fromInterned(vector_type.child) }, + }; + const res_scalar_ty, const res_scalar_val: Value = res_scalar: switch (scalar_ty.toIntern()) { + .bool_type => .{ + scalar_ty, + .fromInterned(switch (spec.kind) { + else => unreachable, + .smin_mem, .umax_mem => .bool_true, + .smax_mem, .umin_mem => .bool_false, + }), + }, + else => { + const scalar_info: InternPool.Key.IntType = if (scalar_ty.isAbiInt(zcu)) + scalar_ty.intInfo(zcu) + else + .{ .signedness = .unsigned, .bits = @intCast(scalar_ty.bitSize(zcu)) }; + const scalar_int_ty = try pt.intType(scalar_info.signedness, scalar_info.bits); + if (scalar_info.bits <= 64) { + const int_val: i64 = switch (spec.kind) { + else => unreachable, + .smin_mem => std.math.minInt(i64), + .smax_mem => std.math.maxInt(i64), + .umin_mem => 0, + .umax_mem => -1, + }; + const shift: u6 = @intCast(64 - scalar_info.bits); + break :res_scalar .{ scalar_int_ty, switch (scalar_info.signedness) { + .signed => try pt.intValue_i64(scalar_int_ty, int_val >> shift), + .unsigned => try pt.intValue_u64(scalar_int_ty, @as(u64, @bitCast(int_val)) >> shift), + } }; + } + var big_int: std.math.big.int.Managed = try .init(cg.gpa); + defer big_int.deinit(); + try big_int.setTwosCompIntLimit(switch (spec.kind) { + else => unreachable, + .smin_mem, .umin_mem => .min, + .smax_mem, .umax_mem => .max, + }, switch (spec.kind) { + else => unreachable, + .smin_mem, .smax_mem => .signed, + .umin_mem, .umax_mem => .unsigned, + }, scalar_info.bits); + try big_int.truncate(&big_int, scalar_info.signedness, scalar_info.bits); + break :res_scalar .{ scalar_int_ty, try pt.intValue_big(scalar_int_ty, big_int.toConst()) }; + }, + }; + const res_ty, const res_val: Value = if (vector_len) |len| res: { + const vector_ty = try pt.vectorType(.{ + .len = len, + .child = res_scalar_ty.toIntern(), + }); + const vector_val = try pt.intern(.{ .aggregate = .{ + .ty = vector_ty.toIntern(), + .storage = .{ .repeated_elem = res_scalar_val.toIntern() }, + } }); + break :res .{ vector_ty, .fromInterned(vector_val) }; + } else .{ res_scalar_ty, res_scalar_val }; + return try cg.tempFromValue(res_ty, try cg.genTypedValue(res_val)); + }, .ref => |ref| ref.deref(s), .ref_mask => |ref_mask| ref_mask.ref.deref(s), }; @@ -24541,21 +28289,51 @@ const Select = struct { forward_label, ref, simm, + uimm, lea, mem, }; - const Adjust = enum { - none, - add_ptr_size, - sub_ptr_size, - add_ptr_bit_size, - sub_ptr_bit_size, - add_size, - sub_size, - add_len, - sub_len, - add_elem_limbs, - sub_elem_limbs, + const Adjust = packed struct(u8) { + factor: i2, + scale: Memory.Scale, + amount: enum(u4) { + none, + ptr_size, + ptr_bit_size, + size, + src0_size, + bit_size, + src0_bit_size, + len, + elem_limbs, + src0_elem_size, + smin, + smax, + umax, + }, + + const none: Adjust = .{ .factor = 0, .scale = .@"1", .amount = .none }; + const sub_ptr_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .ptr_size }; + const add_ptr_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .ptr_bit_size }; + const add_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .size }; + const sub_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .size }; + const add_src0_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_size }; + const sub_src0_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_size }; + const add_2_bit_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .bit_size }; + const add_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .bit_size }; + const sub_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .bit_size }; + const add_src0_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_bit_size }; + const sub_src0_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_bit_size }; + const add_8_len: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .len }; + const add_4_len: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .len }; + const add_3_len: Adjust = .{ .factor = 1, .scale = .@"3", .amount = .len }; + const add_2_len: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .len }; + const add_len: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .len }; + const sub_len: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .len }; + const add_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size }; + const sub_src0_elem_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size }; + const add_elem_limbs: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .elem_limbs }; + const add_umax: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .umax }; }; const Ref = enum(u4) { tmp0, @@ -24741,15 +28519,24 @@ const Select = struct { const src1x: Select.Operand = .{ .tag = .ref, .base = .src1x }; const src1y: Select.Operand = .{ .tag = .ref, .base = .src1y }; - fn i(imm: i32) Select.Operand { + fn si(imm: i32) Select.Operand { return .{ .tag = .simm, .imm = imm }; } - fn a(base: Ref.Sized, adjust: Adjust) Select.Operand { + fn sa(base: Ref.Sized, adjust: Adjust) Select.Operand { return .{ .tag = .simm, .base = base, .adjust = adjust }; } - fn ia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand { + fn sia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand { return .{ .tag = .simm, .base = base, .adjust = adjust, .imm = imm }; } + fn ui(imm: i32) Select.Operand { + return .{ .tag = .uimm, .imm = imm }; + } + fn ua(base: Ref.Sized, adjust: Adjust) Select.Operand { + return .{ .tag = .uimm, .base = base, .adjust = adjust }; + } + fn uia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand { + return .{ .tag = .uimm, .base = base, .adjust = adjust, .imm = imm }; + } fn lea(size: Memory.Size, base: Ref) Select.Operand { return .{ @@ -24757,6 +28544,13 @@ const Select = struct { .base = .{ .ref = base, .size = size }, }; } + fn leaa(size: Memory.Size, base: Ref, adjust: Adjust) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .adjust = adjust, + }; + } fn lead(size: Memory.Size, base: Ref, disp: i32) Select.Operand { return .{ .tag = .lea, @@ -24768,14 +28562,22 @@ const Select = struct { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = .@"1" }, + .index = .{ .ref = index, .scale = .@"1" }, + }; + } + fn leaia(size: Memory.Size, base: Ref, index: Ref, adjust: Adjust) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .index = .{ .ref = index, .scale = .@"1" }, + .adjust = adjust, }; } fn leaid(size: Memory.Size, base: Ref, index: Ref, disp: i32) Select.Operand { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = .@"1" }, + .index = .{ .ref = index, .scale = .@"1" }, .imm = disp, }; } @@ -24783,22 +28585,22 @@ const Select = struct { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = scale }, + .index = .{ .ref = index, .scale = scale }, }; } fn leasid(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = scale }, + .index = .{ .ref = index, .scale = scale }, .imm = disp, }; } - fn leasida(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand { + fn leasiad(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = scale }, + .index = .{ .ref = index, .scale = scale }, .adjust = adjust, .imm = disp, }; @@ -24817,6 +28619,21 @@ const Select = struct { .imm = disp, }; } + fn mema(base: Ref.Sized, adjust: Adjust) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .adjust = adjust, + }; + } + fn memad(base: Ref.Sized, adjust: Adjust, disp: i32) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .adjust = adjust, + .imm = disp, + }; + } fn memi(base: Ref.Sized, index: Ref) Select.Operand { return .{ .tag = .mem, @@ -24832,6 +28649,15 @@ const Select = struct { .adjust = adjust, }; } + fn memiad(base: Ref.Sized, index: Ref, adjust: Adjust, disp: i32) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = .@"1" }, + .adjust = adjust, + .imm = disp, + }; + } fn memid(base: Ref.Sized, index: Ref, disp: i32) Select.Operand { return .{ .tag = .mem, @@ -24847,6 +28673,14 @@ const Select = struct { .index = .{ .ref = index, .scale = scale }, }; } + fn memsia(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = scale }, + .adjust = adjust, + }; + } fn memsid(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand { return .{ .tag = .mem, @@ -24855,7 +28689,7 @@ const Select = struct { .imm = disp, }; } - fn memsida(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand { + fn memsiad(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand { return .{ .tag = .mem, .base = base, @@ -24865,26 +28699,34 @@ const Select = struct { }; } - fn adjustedImm(op: Select.Operand, s: *const Select) i32 { - return switch (op.adjust) { - .none => op.imm, - .add_ptr_size => op.imm + @divExact(s.cg.target.ptrBitWidth(), 8), - .sub_ptr_size => op.imm - @divExact(s.cg.target.ptrBitWidth(), 8), - .add_ptr_bit_size => op.imm + s.cg.target.ptrBitWidth(), - .sub_ptr_bit_size => op.imm - s.cg.target.ptrBitWidth(), - .add_size => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))), - .sub_size => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))), - .add_len => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))), - .sub_len => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))), - .add_elem_limbs => op.imm + @as(i32, @intCast(@divExact( + fn adjustedImm(op: Select.Operand, comptime SignedImm: type, s: *const Select) SignedImm { + const UnsignedImm = @Type(.{ + .int = .{ .signedness = .unsigned, .bits = @typeInfo(SignedImm).int.bits }, + }); + return op.imm + @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) { + .none => 0, + .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8), + .ptr_bit_size => s.cg.target.ptrBitWidth(), + .size => @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)), + .src0_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)), + .bit_size => @intCast(op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)), + .src0_bit_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)), + .len => @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu)), + .elem_limbs => @intCast(@divExact( op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), @divExact(op.base.size.bitSize(s.cg.target), 8), - ))), - .sub_elem_limbs => op.imm - @as(i32, @intCast(@divExact( - op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), - @divExact(op.base.size.bitSize(s.cg.target), 8), - ))), - }; + )), + .src0_elem_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), + .smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate( + -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), + ), + .smax => @as(SignedImm, std.math.maxInt(SignedImm)) >> @truncate( + -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), + ), + .umax => @bitCast(@as(UnsignedImm, std.math.maxInt(UnsignedImm)) >> @truncate( + -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), + )), + }); } fn lower(op: Select.Operand, s: *Select) !CodeGen.Operand { @@ -24907,7 +28749,8 @@ const Select = struct { else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = op.base.size }) }, .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))) }, }, - .simm => .{ .imm = .s(op.adjustedImm(s)) }, + .simm => .{ .imm = .s(op.adjustedImm(i32, s)) }, + .uimm => .{ .imm = .u(@bitCast(op.adjustedImm(i64, s))) }, .lea => .{ .mem = .{ .base = .{ .reg = registerAlias(op.base.ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)) }, .mod = .{ .rm = .{ @@ -24917,7 +28760,7 @@ const Select = struct { .none => .none, }, .scale = op.index.scale, - .disp = op.adjustedImm(s), + .disp = op.adjustedImm(i32, s), } }, } }, .mem => .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, .{ @@ -24927,7 +28770,7 @@ const Select = struct { .none => .none, }, .scale = op.index.scale, - .disp = op.adjustedImm(s), + .disp = op.adjustedImm(i32, s), }) }, }; } @@ -24942,14 +28785,23 @@ fn select( ) !void { cases: for (cases) |case| { for (case.required_features) |required_feature| if (required_feature) |feature| if (!switch (feature) { - .@"64bit" => cg.target.ptrBitWidth() == 64, + .@"64bit" => switch (cg.target.cpu.arch) { + else => unreachable, + .x86 => false, + .x86_64 => true, + }, .mmx => false, else => cg.hasFeature(feature), }) continue :cases; for (case.dst_constraints[0..dst_temps.len], dst_tys) |dst_constraint, dst_ty| if (!dst_constraint.accepts(dst_ty, cg)) continue :cases; for (case.src_constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp.typeOf(cg), cg)) continue :cases; + if (std.debug.runtime_safety) { + for (case.dst_constraints[dst_temps.len..]) |dst_constraint| assert(dst_constraint == .any); + for (case.src_constraints[src_temps.len..]) |src_constraint| assert(src_constraint == .any); + } patterns: for (case.patterns) |pattern| { - for (pattern.src, src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns; + for (pattern.src[0..src_temps.len], src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns; + if (std.debug.runtime_safety) for (pattern.src[src_temps.len..]) |src_pattern| assert(src_pattern == .none); var s: Select = .{ .cg = cg, @@ -24960,9 +28812,11 @@ fn select( const dst_slots = s.temps[@intFromEnum(Select.Operand.Ref.dst0)..@intFromEnum(Select.Operand.Ref.src0)]; const src_slots = s.temps[@intFromEnum(Select.Operand.Ref.src0)..@intFromEnum(Select.Operand.Ref.none)]; + @memcpy(src_slots[0..src_temps.len], src_temps); + std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]); for (tmp_slots, case.extra_temps) |*slot, spec| slot.* = try spec.create(&s) orelse continue; - while (true) for (pattern.src, src_temps) |src_pattern, *src_temp| { + while (true) for (pattern.src[0..src_temps.len], src_temps) |src_pattern, *src_temp| { if (try src_pattern.convert(src_temp, cg)) break; } else break; @memcpy(src_slots[0..src_temps.len], src_temps); diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 251cf7d7cd..142fe4745b 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -64,7 +64,7 @@ pub fn findByMnemonic( comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' '); comptime var features: []const std.Target.x86.Feature = &.{}; inline while (comptime feature_it.next()) |feature| features = features ++ .{@field(std.Target.x86.Feature, feature)}; - break :has_features std.Target.x86.featureSetHasAll(target.cpu.features, features[0..features.len].*); + break :has_features std.Target.x86.featureSetHasAll(target.cpu.features, features[0..].*); }, }) continue; @@ -250,7 +250,8 @@ pub const Mnemonic = enum { // General-purpose adc, add, @"and", bsf, bsr, bswap, bt, btc, btr, bts, - call, cbw, cdq, cdqe, clflush, + call, cbw, cdq, cdqe, + clac, clc, cld, clflush, cli, clts, clui, cmova, cmovae, cmovb, cmovbe, cmovc, cmove, cmovg, cmovge, cmovl, cmovle, cmovna, cmovnae, cmovnb, cmovnbe, cmovnc, cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno, cmovnp, cmovns, cmovnz, cmovo, cmovp, cmovpe, cmovpo, cmovs, cmovz, @@ -274,7 +275,9 @@ pub const Mnemonic = enum { rcl, rcr, ret, rol, ror, rorx, sal, sar, sarx, sbb, scas, scasb, scasd, scasq, scasw, - shl, shld, shlx, shr, shrd, shrx, sub, syscall, + shl, shld, shlx, shr, shrd, shrx, + stac, stc, std, sti, stui, + sub, syscall, seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae, setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns, setnz, seto, setp, setpe, setpo, sets, setz, @@ -307,7 +310,7 @@ pub const Mnemonic = enum { ldmxcsr, maxps, maxss, minps, minss, - movaps, movhlps, movlhps, + movaps, movhlps, movhps, movlhps, movlps, movmskps, movss, movups, mulps, mulss, @@ -333,6 +336,7 @@ pub const Mnemonic = enum { minpd, minsd, movapd, movdqa, movdqu, + movhpd, movlpd, movmskpd, //movsd, movupd, @@ -395,7 +399,7 @@ pub const Mnemonic = enum { vmovd, vmovddup, vmovdqa, vmovdqu, - vmovhlps, vmovlhps, + vmovhlps, vmovhpd, vmovhps, vmovlhps, vmovlpd, vmovlps, vmovmskpd, vmovmskps, vmovq, vmovsd, @@ -823,6 +827,7 @@ pub const Feature = enum { avx2, bmi, bmi2, + cmov, f16c, fma, lzcnt, @@ -830,6 +835,7 @@ pub const Feature = enum { pclmul, @"pclmul avx", popcnt, + smap, sse, sse2, sse3, @@ -837,6 +843,7 @@ pub const Feature = enum { sse4_2, ssse3, sha, + uintr, vaes, vpclmulqdq, x87, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index e025f4ddbd..bfe699a825 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -418,8 +418,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) // Here, we currently assume local dynamic TLS vars, and so // we emit LD model. _ = lower.reloc(.{ .linker_tlsld = sym_index }, 0); - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .lea, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{ .{ .reg = .rdi }, .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }, }, lower.target); @@ -427,8 +426,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) _ = lower.reloc(.{ .linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null), }, 0); - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .call, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{ .{ .imm = .s(0) }, }, lower.target); lower.result_insts_len += 1; @@ -440,8 +438,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) }) }; } else { // Since we are linking statically, we emit LE model directly. - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .mov, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = .rax }, .{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) }, }, lower.target); @@ -464,8 +461,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) .mov => { if (elf_sym.flags.is_extern_ptr) { const reg = ops[0].reg; - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .mov, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = reg.to64() }, .{ .mem = Memory.initRip(.qword, 0) }, }, lower.target); @@ -496,16 +492,14 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) if (macho_sym.flags.tlv) { _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .mov, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = .rdi }, .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }, - }); + }, lower.target); lower.result_insts_len += 1; - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .call, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{ .{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .rdi } }) }, - }); + }, lower.target); lower.result_insts_len += 1; emit_mnemonic = .mov; break :op .{ .reg = .rax }; @@ -520,11 +514,10 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) .mov => { if (macho_sym.flags.is_extern_ptr) { const reg = ops[0].reg; - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .mov, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = reg.to64() }, .{ .mem = Memory.initRip(.qword, 0) }, - }); + }, lower.target); lower.result_insts_len += 1; break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ .base = .{ .reg = reg.to64(), @@ -541,8 +534,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) }, }; } - lower.result_insts[lower.result_insts_len] = - try Instruction.new(emit_prefix, emit_mnemonic, emit_ops, lower.target); + lower.result_insts[lower.result_insts_len] = try .new(emit_prefix, emit_mnemonic, emit_ops, lower.target); lower.result_insts_len += 1; } diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 19bacdcac6..c5f29d3a0c 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -38,6 +38,11 @@ pub const Inst = struct { /// ___ Right Without Affecting Flags _rx, + /// ___ Forward + _f, + /// ___ Reverse + //_r, + /// ___ Above _a, /// ___ Above Or Equal @@ -47,6 +52,7 @@ pub const Inst = struct { /// ___ Below Or Equal _be, /// ___ Carry + /// ___ Carry Flag _c, /// ___ Equal _e, @@ -98,6 +104,14 @@ pub const Inst = struct { _s, /// ___ Zero _z, + /// ___ Alignment Check Flag + _ac, + /// ___ Direction Flag + //_d, + /// ___ Interrupt Flag + _i, + /// ___ User Interrupt Flag + _ui, /// ___ Byte //_b, @@ -299,9 +313,8 @@ pub const Inst = struct { /// Bitwise logical and of packed double-precision floating-point values @"and", /// Bit scan forward - bsf, /// Bit scan reverse - bsr, + bs, /// Byte swap bswap, /// Bit test @@ -317,6 +330,10 @@ pub const Inst = struct { cdq, /// Convert doubleword to quadword cdqe, + /// Clear carry flag + /// Clear direction flag + /// Clear interrupt flag + cl, /// Flush cache line clflush, /// Conditional move @@ -443,6 +460,11 @@ pub const Inst = struct { /// Subtract packed double-precision floating-point values /// Subtract scalar double-precision floating-point values sub, + /// Set carry flag + /// Set direction flag + /// Set interrupt flag + /// Store floating-point value + st, /// Store string sto, /// Syscall @@ -478,8 +500,6 @@ pub const Inst = struct { ldenv, /// Store x87 FPU environment nstenv, - /// Store floating-point value - st, /// Store x87 FPU environment stenv, @@ -560,8 +580,14 @@ pub const Inst = struct { /// Move aligned packed single-precision floating-point values /// Move aligned packed double-precision floating-point values mova, + /// Move high packed single-precision floating-point values + /// Move high packed double-precision floating-point values + movh, /// Move packed single-precision floating-point values high to low movhl, + /// Move low packed single-precision floating-point values + /// Move low packed double-precision floating-point values + movl, /// Move packed single-precision floating-point values low to high movlh, /// Move unaligned packed single-precision floating-point values diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 6d1ab76c5a..500dc488e6 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -571,11 +571,15 @@ pub const Memory = struct { writer: anytype, ) @TypeOf(writer).Error!void { if (s == .none) return; - if (s != .ptr) { - try writer.writeAll(@tagName(s)); - try writer.writeByte(' '); + try writer.writeAll(@tagName(s)); + switch (s) { + .none => unreachable, + .ptr => {}, + else => { + try writer.writeByte(' '); + try writer.writeAll("ptr"); + }, } - try writer.writeAll("ptr"); } }; diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index a3a82cf4e2..f6f86cd828 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -132,98 +132,110 @@ pub const table = [_]Entry{ .{ .cdq, .zo, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, .{ .cqo, .zo, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, + .{ .clac, .zo, &.{}, &.{ 0x0f, 0x01, 0xca }, 0, .none, .smap }, + + .{ .clc, .zo, &.{}, &.{ 0xf8 }, 0, .none, .none }, + + .{ .cld, .zo, &.{}, &.{ 0xfc }, 0, .none, .none }, + .{ .clflush, .m, &.{ .m8 }, &.{ 0x0f, 0xae }, 7, .none, .none }, - .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, - .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, - .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, - .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, - .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, - .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, - .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, - .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, - .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, - .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, - .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, - .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, - .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, - .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, - .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, - .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, - .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, - .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, - .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, - .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, - .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, - .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, - .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, - .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, - .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, - .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, - .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, - .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, - .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, - .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, - .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, - .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, - .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, - .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, - .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, - .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, - .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, - .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, - .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, - .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, - .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, - .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, - .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, - .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, - .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, - .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, - .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, - .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, - .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, - .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, - .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, - .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, - .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, - .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, - .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .short, .none }, - .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .none }, - .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .none }, - .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, - .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, - .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, - .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .short, .none }, - .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .none }, - .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .none }, - .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, - .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, - .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, - .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .short, .none }, - .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .none }, - .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .none }, - .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, - .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, - .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, - .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, - .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, - .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, - .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, - .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, - .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, - .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .short, .none }, - .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .none }, - .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .none }, - .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, - .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, - .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, + .{ .cli, .zo, &.{}, &.{ 0xfa }, 0, .none, .none }, + + .{ .clts, .zo, &.{}, &.{ 0x0f, 0x06 }, 0, .none, .none }, + + .{ .clui, .zo, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xee }, 0, .none, .uintr }, + + .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .cmov }, + .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .cmov }, + .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .cmov }, + .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .cmov }, + .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .cmov }, + .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .cmov }, + .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .cmov }, + .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .cmov }, + .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .cmov }, + .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .cmov }, + .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .cmov }, + .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .cmov }, + .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .cmov }, + .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .cmov }, + .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .cmov }, + .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .cmov }, + .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .cmov }, + .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .cmov }, + .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .cmov }, + .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .cmov }, + .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .cmov }, + .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .cmov }, + .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .cmov }, + .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .cmov }, + .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .cmov }, + .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .cmov }, + .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .cmov }, + .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .cmov }, + .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .cmov }, + .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .cmov }, + .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .cmov }, + .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .cmov }, + .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .cmov }, + .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .cmov }, + .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .cmov }, + .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .cmov }, + .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .cmov }, + .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .cmov }, + .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .cmov }, + .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .cmov }, + .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .cmov }, + .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .cmov }, + .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .cmov }, + .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .cmov }, + .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .cmov }, + .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .cmov }, + .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .cmov }, + .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .cmov }, + .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .cmov }, + .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .cmov }, + .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .cmov }, + .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .cmov }, + .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .cmov }, + .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .cmov }, + .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .cmov }, + .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .cmov }, + .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .cmov }, + .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .cmov }, + .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .cmov }, + .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .cmov }, + .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .short, .cmov }, + .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .cmov }, + .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .cmov }, + .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .cmov }, + .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .cmov }, + .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .cmov }, + .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .short, .cmov }, + .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .cmov }, + .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .cmov }, + .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .cmov }, + .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .cmov }, + .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .cmov }, + .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .short, .cmov }, + .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .cmov }, + .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .cmov }, + .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .cmov }, + .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .cmov }, + .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .cmov }, + .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .cmov }, + .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .cmov }, + .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .cmov }, + .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .cmov }, + .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .cmov }, + .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .cmov }, + .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .short, .cmov }, + .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .cmov }, + .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .cmov }, + .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .cmov }, + .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .cmov }, + .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .cmov }, .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none, .none }, .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .short, .none }, @@ -747,6 +759,16 @@ pub const table = [_]Entry{ .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long, .none }, + .{ .stac, .zo, &.{}, &.{ 0x0f, 0x01, 0xcb }, 0, .none, .smap }, + + .{ .stc, .zo, &.{}, &.{ 0xf9 }, 0, .none, .none }, + + .{ .std, .zo, &.{}, &.{ 0xfd }, 0, .none, .none }, + + .{ .sti, .zo, &.{}, &.{ 0xfb }, 0, .none, .none }, + + .{ .stui, .zo, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xef }, 0, .none, .uintr }, + .{ .stos, .zo, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, .{ .stos, .zo, &.{ .m16 }, &.{ 0xab }, 0, .short, .none }, .{ .stos, .zo, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, @@ -927,8 +949,14 @@ pub const table = [_]Entry{ .{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse }, + .{ .movhps, .rm, &.{ .xmm, .m64 }, &.{ 0x0f, 0x16 }, 0, .none, .sse }, + .{ .movhps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x17 }, 0, .none, .sse }, + .{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse }, + .{ .movlps, .rm, &.{ .xmm, .m64 }, &.{ 0x0f, 0x12 }, 0, .none, .sse }, + .{ .movlps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x13 }, 0, .none, .sse }, + .{ .movmskps, .rm, &.{ .r32, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse }, .{ .movmskps, .rm, &.{ .r64, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse }, @@ -1037,6 +1065,12 @@ pub const table = [_]Entry{ .{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 }, .{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 }, + .{ .movhpd, .rm, &.{ .xmm, .m64 }, &.{ 0x66, 0x0f, 0x16 }, 0, .none, .sse2 }, + .{ .movhpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x17 }, 0, .none, .sse2 }, + + .{ .movlpd, .rm, &.{ .xmm, .m64 }, &.{ 0x66, 0x0f, 0x12 }, 0, .none, .sse2 }, + .{ .movlpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x13 }, 0, .none, .sse2 }, + .{ .movmskpd, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 }, .{ .movmskpd, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 }, @@ -1486,8 +1520,20 @@ pub const table = [_]Entry{ .{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovhpd, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x66, 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovhpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x17 }, 0, .vex_128_wig, .avx }, + + .{ .vmovhps, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovhps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x17 }, 0, .vex_128_wig, .avx }, + .{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovlpd, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x66, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovlpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x13 }, 0, .vex_128_wig, .avx }, + + .{ .vmovlps, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovlps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x13 }, 0, .vex_128_wig, .avx }, + .{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx }, .{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx }, @@ -1583,14 +1629,14 @@ pub const table = [_]Entry{ .{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx }, .{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx }, - .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx }, - .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx }, + .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .vex_128_w0, .avx }, + .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_w0, .avx }, - .{ .vpinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx }, - .{ .vpinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx }, - .{ .vpinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx }, + .{ .vpinsrb, .rvmi, &.{ .xmm, .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrd, .rvmi, &.{ .xmm, .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrq, .rvmi, &.{ .xmm, .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx }, - .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, + .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_w0, .avx }, .{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx }, .{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx }, diff --git a/src/dev.zig b/src/dev.zig index 2573e63f25..f4be5a36a9 100644 --- a/src/dev.zig +++ b/src/dev.zig @@ -135,6 +135,7 @@ pub const Env = enum { else => Env.ast_gen.supports(feature), }, .@"x86_64-linux" => switch (feature) { + .build_command, .stdio_listen, .incremental, .x86_64_backend, diff --git a/src/link/Elf/Atom.zig b/src/link/Elf/Atom.zig index 68cb154d3b..10e6f669d3 100644 --- a/src/link/Elf/Atom.zig +++ b/src/link/Elf/Atom.zig @@ -1274,19 +1274,19 @@ const x86_64 = struct { fn relaxGotpcrelx(code: []u8, t: *const std.Target) !void { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return error.RelaxFailure; - const inst = switch (old_inst.encoding.mnemonic) { - .call => try Instruction.new(old_inst.prefix, .call, &.{ + const inst: Instruction = switch (old_inst.encoding.mnemonic) { + .call => try .new(old_inst.prefix, .call, &.{ // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, + .{ .imm = .s(-129) }, }, t), - .jmp => try Instruction.new(old_inst.prefix, .jmp, &.{ + .jmp => try .new(old_inst.prefix, .jmp, &.{ // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, + .{ .imm = .s(-129) }, }, t), else => return error.RelaxFailure, }; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); - const nop = try Instruction.new(.none, .nop, &.{}, t); + const nop: Instruction = try .new(.none, .nop, &.{}, t); try encode(&.{ nop, inst }, code); } @@ -1295,7 +1295,7 @@ const x86_64 = struct { const old_inst = disassemble(code) orelse return error.RelaxFailure; switch (old_inst.encoding.mnemonic) { .mov => { - const inst = try Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t); + const inst: Instruction = try .new(old_inst.prefix, .lea, &old_inst.ops, t); relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); try encode(&.{inst}, code); }, @@ -1404,14 +1404,15 @@ const x86_64 = struct { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return false; switch (old_inst.encoding.mnemonic) { - .mov => if (Instruction.new(old_inst.prefix, .mov, &.{ - old_inst.ops[0], - // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, - }, t)) |inst| { + .mov => { + const inst = Instruction.new(old_inst.prefix, .mov, &.{ + old_inst.ops[0], + // TODO: hack to force imm32s in the assembler + .{ .imm = .s(-129) }, + }, t) catch return false; inst.encode(std.io.null_writer, .{}) catch return false; return true; - } else |_| return false, + }, else => return false, } } @@ -1424,7 +1425,7 @@ const x86_64 = struct { const inst = Instruction.new(old_inst.prefix, .mov, &.{ old_inst.ops[0], // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, + .{ .imm = .s(-129) }, }, t) catch unreachable; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); encode(&.{inst}, code) catch unreachable; @@ -1438,10 +1439,10 @@ const x86_64 = struct { const old_inst = disassemble(code) orelse return error.RelaxFailure; switch (old_inst.encoding.mnemonic) { .lea => { - const inst = try Instruction.new(old_inst.prefix, .mov, &.{ + const inst: Instruction = try .new(old_inst.prefix, .mov, &.{ old_inst.ops[0], // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, + .{ .imm = .s(-129) }, }, target); relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); try encode(&.{inst}, code); @@ -1781,7 +1782,7 @@ const aarch64 = struct { const off: u12 = @truncate(@as(u64, @bitCast(S_ + A))); aarch64_util.writeAddImmInst(off, code); } else { - const old_inst = Instruction{ + const old_inst: Instruction = .{ .add_subtract_immediate = mem.bytesToValue(std.meta.TagPayload( Instruction, Instruction.add_subtract_immediate, @@ -1795,7 +1796,7 @@ const aarch64 = struct { }, .TLSDESC_CALL => if (!target.flags.has_tlsdesc) { - const old_inst = Instruction{ + const old_inst: Instruction = .{ .unconditional_branch_register = mem.bytesToValue(std.meta.TagPayload( Instruction, Instruction.unconditional_branch_register, diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ed554ffb35..4270ff0306 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -640,7 +640,8 @@ fn resolveRelocInner( macho_file: *MachO, writer: anytype, ) ResolveError!void { - const cpu_arch = macho_file.getTarget().cpu.arch; + const t = &macho_file.base.comp.root_mod.resolved_target.result; + const cpu_arch = t.cpu.arch; const rel_offset = math.cast(usize, rel.offset - self.off) orelse return error.Overflow; const P = @as(i64, @intCast(self.getAddress(macho_file))) + @as(i64, @intCast(rel_offset)); const A = rel.addend + rel.getRelocAddend(cpu_arch); @@ -747,7 +748,7 @@ fn resolveRelocInner( const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); try writer.writeInt(i32, @intCast(S_ + A - P), .little); } else { - try x86_64.relaxTlv(code[rel_offset - 3 ..]); + try x86_64.relaxTlv(code[rel_offset - 3 ..], t); try writer.writeInt(i32, @intCast(S + A - P), .little); } }, @@ -893,11 +894,12 @@ fn resolveRelocInner( const x86_64 = struct { fn relaxGotLoad(self: Atom, code: []u8, rel: Relocation, macho_file: *MachO) ResolveError!void { dev.check(.x86_64_backend); + const t = &macho_file.base.comp.root_mod.resolved_target.result; const diags = &macho_file.base.comp.link_diags; const old_inst = disassemble(code) orelse return error.RelaxFail; switch (old_inst.encoding.mnemonic) { .mov => { - const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t) catch return error.RelaxFail; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); encode(&.{inst}, code) catch return error.RelaxFail; }, @@ -916,12 +918,12 @@ const x86_64 = struct { } } - fn relaxTlv(code: []u8) error{RelaxFail}!void { + fn relaxTlv(code: []u8, t: *const std.Target) error{RelaxFail}!void { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return error.RelaxFail; switch (old_inst.encoding.mnemonic) { .mov => { - const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t) catch return error.RelaxFail; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); encode(&.{inst}, code) catch return error.RelaxFail; }, diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 21f09a877f..789eeaef66 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -65,6 +65,8 @@ test "@clz" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try testClz(); try comptime testClz(); @@ -75,6 +77,7 @@ fn testClz() !void { try expect(testOneClz(u8, 0b00001010) == 4); try expect(testOneClz(u8, 0b00011010) == 3); try expect(testOneClz(u8, 0b00000000) == 8); + try expect(testOneClz(i8, -1) == 0); } test "@clz big ints" { @@ -100,7 +103,7 @@ fn testOneClz(comptime T: type, x: T) u32 { test "@clz vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -159,6 +162,8 @@ fn testCtz() !void { try expect(testOneCtz(u8, 0b10100000) == 5); try expect(testOneCtz(u8, 0b10001010) == 1); try expect(testOneCtz(u8, 0b00000000) == 8); + try expect(testOneCtz(i8, -1) == 0); + try expect(testOneCtz(i8, -2) == 1); try expect(testOneCtz(u16, 0b00000000) == 16); } @@ -1712,7 +1717,7 @@ test "mod lazy values" { test "@clz works on both vector and scalar inputs" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 3180d1554a..6b03ac90e3 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -646,7 +646,7 @@ test "vector division operators" { test "vector bitwise not operator" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/x86_64.zig b/test/behavior/x86_64.zig index ffb1750ff6..f72fa79ca5 100644 --- a/test/behavior/x86_64.zig +++ b/test/behavior/x86_64.zig @@ -1,8 +1,7 @@ //! CodeGen tests for the x86_64 backend. -const builtin = @import("builtin"); - test { + const builtin = @import("builtin"); if (builtin.zig_backend != .stage2_x86_64) return error.SkipZigTest; if (builtin.object_format == .coff) return error.SkipZigTest; _ = @import("x86_64/math.zig"); diff --git a/test/behavior/x86_64/build.zig b/test/behavior/x86_64/build.zig new file mode 100644 index 0000000000..dccda7236b --- /dev/null +++ b/test/behavior/x86_64/build.zig @@ -0,0 +1,114 @@ +const std = @import("std"); +pub fn build(b: *std.Build) void { + const compiler_rt_lib = b.addStaticLibrary(.{ + .name = "compiler_rt", + .use_llvm = false, + .use_lld = false, + .root_module = b.createModule(.{ + .root_source_file = b.addWriteFiles().add("compiler_rt.zig", ""), + .target = b.resolveTargetQuery(.{ .cpu_arch = .x86_64 }), + }), + }); + compiler_rt_lib.bundle_compiler_rt = true; + + for ([_]std.Target.Query{ + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.bsf_bsr_0_clobbers_result}), + //.cpu_features_sub = std.Target.x86.featureSet(&.{.sse}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.bsf_bsr_0_clobbers_result}), + .cpu_features_sub = std.Target.x86.featureSet(&.{ + .cmov, + //.sse, + }), + }, + //.{ + // .cpu_arch = .x86_64, + // .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + // .cpu_features_sub = std.Target.x86.featureSet(&.{.sse}), + //}, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_sub = std.Target.x86.featureSet(&.{.sse2}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.sse3}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.ssse3}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.sse4_1}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.sse4_2}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v2 }, + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v2 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.avx}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 }, + .cpu_features_sub = std.Target.x86.featureSet(&.{.avx2}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 }, + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 }, + }, + }) |query| { + const target = b.resolveTargetQuery(query); + const cpu = query.serializeCpuAlloc(b.allocator) catch @panic("OOM"); + for ([_][]const u8{ + "math.zig", + }) |path| { + const test_mod = b.createModule(.{ + .root_source_file = b.path(path), + .target = target, + }); + const test_exe = b.addTest(.{ + .name = std.fs.path.stem(path), + .use_llvm = false, + .use_lld = false, + .root_module = test_mod, + }); + if (!std.Target.x86.featureSetHas(target.result.cpu.features, .sse2)) { + test_exe.bundle_compiler_rt = false; + test_mod.linkLibrary(compiler_rt_lib); + } + const test_run = b.addRunArtifact(test_exe); + b.default_step.dependOn(&test_run.step); + for ([_]*std.Build.Step{ + &test_exe.step, + &test_run.step, + }) |step| step.name = b.fmt("{s} {s}", .{ step.name, cpu }); + } + } +} diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index 7860c945a0..5bb257a5f4 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -1,3 +1,709 @@ +fn testUnary(comptime op: anytype) !void { + const testType = struct { + fn testType(comptime Type: type, comptime imm_arg: Type) !void { + const expected = op(Type, imm_arg); + try struct { + fn testOne(actual: @TypeOf(expected)) !void { + if (switch (@typeInfo(@TypeOf(expected))) { + else => actual != expected, + .vector => @reduce(.Or, actual != expected), + }) return error.Unexpected; + } + noinline fn testOps(mem_arg: Type) !void { + var reg_arg = mem_arg; + _ = .{®_arg}; + try testOne(op(Type, reg_arg)); + try testOne(op(Type, mem_arg)); + try testOne(op(Type, imm_arg)); + } + }.testOps(imm_arg); + } + }.testType; + + try testType(i0, 0); + try testType(u0, 0); + + try testType(i1, -1); + try testType(i1, 0); + try testType(u1, 0); + try testType(u1, 1 << 0); + + try testType(i2, -1 << 1); + try testType(i2, -1); + try testType(i2, 0); + try testType(u2, 0); + try testType(u2, 1 << 0); + try testType(u2, 1 << 1); + + try testType(i3, -1 << 2); + try testType(i3, -1); + try testType(i3, 0); + try testType(u3, 0); + try testType(u3, 1 << 0); + try testType(u3, 1 << 1); + try testType(u3, 1 << 2); + + try testType(i4, -1 << 3); + try testType(i4, -1); + try testType(i4, 0); + try testType(u4, 0); + try testType(u4, 1 << 0); + try testType(u4, 1 << 1); + try testType(u4, 1 << 2); + try testType(u4, 1 << 3); + + try testType(i5, -1 << 4); + try testType(i5, -1); + try testType(i5, 0); + try testType(u5, 0); + try testType(u5, 1 << 0); + try testType(u5, 1 << 1); + try testType(u5, 1 << 3); + try testType(u5, 1 << 4); + + try testType(i7, -1 << 6); + try testType(i7, -1); + try testType(i7, 0); + try testType(u7, 0); + try testType(u7, 1 << 0); + try testType(u7, 1 << 1); + try testType(u7, 1 << 5); + try testType(u7, 1 << 6); + + try testType(i8, -1 << 7); + try testType(i8, -1); + try testType(i8, 0); + try testType(u8, 0); + try testType(u8, 1 << 0); + try testType(u8, 1 << 1); + try testType(u8, 1 << 6); + try testType(u8, 1 << 7); + + try testType(i9, -1 << 8); + try testType(i9, -1); + try testType(i9, 0); + try testType(u9, 0); + try testType(u9, 1 << 0); + try testType(u9, 1 << 1); + try testType(u9, 1 << 7); + try testType(u9, 1 << 8); + + try testType(i15, -1 << 14); + try testType(i15, -1); + try testType(i15, 0); + try testType(u15, 0); + try testType(u15, 1 << 0); + try testType(u15, 1 << 1); + try testType(u15, 1 << 13); + try testType(u15, 1 << 14); + + try testType(i16, -1 << 15); + try testType(i16, -1); + try testType(i16, 0); + try testType(u16, 0); + try testType(u16, 1 << 0); + try testType(u16, 1 << 1); + try testType(u16, 1 << 14); + try testType(u16, 1 << 15); + + try testType(i17, -1 << 16); + try testType(i17, -1); + try testType(i17, 0); + try testType(u17, 0); + try testType(u17, 1 << 0); + try testType(u17, 1 << 1); + try testType(u17, 1 << 15); + try testType(u17, 1 << 16); + + try testType(i31, -1 << 30); + try testType(i31, -1); + try testType(i31, 0); + try testType(u31, 0); + try testType(u31, 1 << 0); + try testType(u31, 1 << 1); + try testType(u31, 1 << 29); + try testType(u31, 1 << 30); + + try testType(i32, -1 << 31); + try testType(i32, -1); + try testType(i32, 0); + try testType(u32, 0); + try testType(u32, 1 << 0); + try testType(u32, 1 << 1); + try testType(u32, 1 << 30); + try testType(u32, 1 << 31); + + try testType(i33, -1 << 32); + try testType(i33, -1); + try testType(i33, 0); + try testType(u33, 0); + try testType(u33, 1 << 0); + try testType(u33, 1 << 1); + try testType(u33, 1 << 31); + try testType(u33, 1 << 32); + + try testType(i63, -1 << 62); + try testType(i63, -1); + try testType(i63, 0); + try testType(u63, 0); + try testType(u63, 1 << 0); + try testType(u63, 1 << 1); + try testType(u63, 1 << 61); + try testType(u63, 1 << 62); + + try testType(i64, -1 << 63); + try testType(i64, -1); + try testType(i64, 0); + try testType(u64, 0); + try testType(u64, 1 << 0); + try testType(u64, 1 << 1); + try testType(u64, 1 << 62); + try testType(u64, 1 << 63); + + try testType(i65, -1 << 64); + try testType(i65, -1); + try testType(i65, 0); + try testType(u65, 0); + try testType(u65, 1 << 0); + try testType(u65, 1 << 1); + try testType(u65, 1 << 63); + try testType(u65, 1 << 64); + + try testType(i95, -1 << 94); + try testType(i95, -1); + try testType(i95, 0); + try testType(u95, 0); + try testType(u95, 1 << 0); + try testType(u95, 1 << 1); + try testType(u95, 1 << 93); + try testType(u95, 1 << 94); + + try testType(i96, -1 << 95); + try testType(i96, -1); + try testType(i96, 0); + try testType(u96, 0); + try testType(u96, 1 << 0); + try testType(u96, 1 << 1); + try testType(u96, 1 << 94); + try testType(u96, 1 << 95); + + try testType(i97, -1 << 96); + try testType(i97, -1); + try testType(i97, 0); + try testType(u97, 0); + try testType(u97, 1 << 0); + try testType(u97, 1 << 1); + try testType(u97, 1 << 95); + try testType(u97, 1 << 96); + + try testType(i127, -1 << 126); + try testType(i127, -1); + try testType(i127, 0); + try testType(u127, 0); + try testType(u127, 1 << 0); + try testType(u127, 1 << 1); + try testType(u127, 1 << 125); + try testType(u127, 1 << 126); + + try testType(i128, -1 << 127); + try testType(i128, -1); + try testType(i128, 0); + try testType(u128, 0); + try testType(u128, 1 << 0); + try testType(u128, 1 << 1); + try testType(u128, 1 << 126); + try testType(u128, 1 << 127); + + try testType(i129, -1 << 128); + try testType(i129, -1); + try testType(i129, 0); + try testType(u129, 0); + try testType(u129, 1 << 0); + try testType(u129, 1 << 1); + try testType(u129, 1 << 127); + try testType(u129, 1 << 128); + + try testType(i159, -1 << 158); + try testType(i159, -1); + try testType(i159, 0); + try testType(u159, 0); + try testType(u159, 1 << 0); + try testType(u159, 1 << 1); + try testType(u159, 1 << 157); + try testType(u159, 1 << 158); + + try testType(i160, -1 << 159); + try testType(i160, -1); + try testType(i160, 0); + try testType(u160, 0); + try testType(u160, 1 << 0); + try testType(u160, 1 << 1); + try testType(u160, 1 << 158); + try testType(u160, 1 << 159); + + try testType(i161, -1 << 160); + try testType(i161, -1); + try testType(i161, 0); + try testType(u161, 0); + try testType(u161, 1 << 0); + try testType(u161, 1 << 1); + try testType(u161, 1 << 159); + try testType(u161, 1 << 160); + + try testType(i191, -1 << 190); + try testType(i191, -1); + try testType(i191, 0); + try testType(u191, 0); + try testType(u191, 1 << 0); + try testType(u191, 1 << 1); + try testType(u191, 1 << 189); + try testType(u191, 1 << 190); + + try testType(i192, -1 << 191); + try testType(i192, -1); + try testType(i192, 0); + try testType(u192, 0); + try testType(u192, 1 << 0); + try testType(u192, 1 << 1); + try testType(u192, 1 << 190); + try testType(u192, 1 << 191); + + try testType(i193, -1 << 192); + try testType(i193, -1); + try testType(i193, 0); + try testType(u193, 0); + try testType(u193, 1 << 0); + try testType(u193, 1 << 1); + try testType(u193, 1 << 191); + try testType(u193, 1 << 192); + + try testType(i223, -1 << 222); + try testType(i223, -1); + try testType(i223, 0); + try testType(u223, 0); + try testType(u223, 1 << 0); + try testType(u223, 1 << 1); + try testType(u223, 1 << 221); + try testType(u223, 1 << 222); + + try testType(i224, -1 << 223); + try testType(i224, -1); + try testType(i224, 0); + try testType(u224, 0); + try testType(u224, 1 << 0); + try testType(u224, 1 << 1); + try testType(u224, 1 << 222); + try testType(u224, 1 << 223); + + try testType(i225, -1 << 224); + try testType(i225, -1); + try testType(i225, 0); + try testType(u225, 0); + try testType(u225, 1 << 0); + try testType(u225, 1 << 1); + try testType(u225, 1 << 223); + try testType(u225, 1 << 224); + + try testType(i255, -1 << 254); + try testType(i255, -1); + try testType(i255, 0); + try testType(u255, 0); + try testType(u255, 1 << 0); + try testType(u255, 1 << 1); + try testType(u255, 1 << 253); + try testType(u255, 1 << 254); + + try testType(i256, -1 << 255); + try testType(i256, -1); + try testType(i256, 0); + try testType(u256, 0); + try testType(u256, 1 << 0); + try testType(u256, 1 << 1); + try testType(u256, 1 << 254); + try testType(u256, 1 << 255); + + try testType(i257, -1 << 256); + try testType(i257, -1); + try testType(i257, 0); + try testType(u257, 0); + try testType(u257, 1 << 0); + try testType(u257, 1 << 1); + try testType(u257, 1 << 255); + try testType(u257, 1 << 256); + + try testType(i511, -1 << 510); + try testType(i511, -1); + try testType(i511, 0); + try testType(u511, 0); + try testType(u511, 1 << 0); + try testType(u511, 1 << 1); + try testType(u511, 1 << 509); + try testType(u511, 1 << 510); + + try testType(i512, -1 << 511); + try testType(i512, -1); + try testType(i512, 0); + try testType(u512, 0); + try testType(u512, 1 << 0); + try testType(u512, 1 << 1); + try testType(u512, 1 << 510); + try testType(u512, 1 << 511); + + try testType(i513, -1 << 512); + try testType(i513, -1); + try testType(i513, 0); + try testType(u513, 0); + try testType(u513, 1 << 0); + try testType(u513, 1 << 1); + try testType(u513, 1 << 511); + try testType(u513, 1 << 512); + + try testType(i1023, -1 << 1022); + try testType(i1023, -1); + try testType(i1023, 0); + try testType(u1023, 0); + try testType(u1023, 1 << 0); + try testType(u1023, 1 << 1); + try testType(u1023, 1 << 1021); + try testType(u1023, 1 << 1022); + + try testType(i1024, -1 << 1023); + try testType(i1024, -1); + try testType(i1024, 0); + try testType(u1024, 0); + try testType(u1024, 1 << 0); + try testType(u1024, 1 << 1); + try testType(u1024, 1 << 1022); + try testType(u1024, 1 << 1023); + + try testType(i1025, -1 << 1024); + try testType(i1025, -1); + try testType(i1025, 0); + try testType(u1025, 0); + try testType(u1025, 1 << 0); + try testType(u1025, 1 << 1); + try testType(u1025, 1 << 1023); + try testType(u1025, 1 << 1024); + + try testType(@Vector(3, i0), .{ 0 << 0, 0, 0 }); + try testType(@Vector(3, u0), .{ 0, 0, 0 << 0 }); + + try testType(@Vector(3, i1), .{ -1 << 0, -1, 0 }); + try testType(@Vector(3, u1), .{ 0, 1, 1 << 0 }); + + try testType(@Vector(3, i2), .{ -1 << 1, -1, 0 }); + try testType(@Vector(3, u2), .{ 0, 1, 1 << 1 }); + + try testType(@Vector(3, i3), .{ -1 << 2, -1, 0 }); + try testType(@Vector(3, u3), .{ 0, 1, 1 << 2 }); + + try testType(@Vector(3, i4), .{ -1 << 3, -1, 0 }); + try testType(@Vector(3, u4), .{ 0, 1, 1 << 3 }); + try testType(@Vector(1, u4), .{ + 0xb, + }); + try testType(@Vector(2, u4), .{ + 0x3, 0x4, + }); + try testType(@Vector(4, u4), .{ + 0x9, 0x2, 0xf, 0xe, + }); + try testType(@Vector(8, u4), .{ + 0x8, 0x1, 0xb, 0x1, 0xf, 0x5, 0x9, 0x6, + }); + try testType(@Vector(16, u4), .{ + 0xb, 0x6, 0x0, 0x7, 0x8, 0x5, 0x6, 0x9, 0xe, 0xb, 0x3, 0xa, 0xb, 0x5, 0x8, 0xc, + }); + try testType(@Vector(32, u4), .{ + 0xe, 0x6, 0xe, 0xa, 0xb, 0x4, 0xa, 0xb, 0x1, 0x3, 0xb, 0xc, 0x0, 0xb, 0x9, 0x4, 0xd, 0xa, 0xd, 0xd, 0x4, 0x8, 0x8, 0x6, 0xb, 0xe, 0x9, 0x6, 0xc, 0xd, 0x5, 0xd, + }); + try testType(@Vector(64, u4), .{ + 0x1, 0xc, 0xe, 0x9, 0x9, 0xf, 0x3, 0xf, 0x9, 0x9, 0x5, 0x3, 0xb, 0xd, 0xd, 0xf, 0x1, 0x2, 0xf, 0x9, 0x4, 0x4, 0x8, 0x9, 0x2, 0x9, 0x8, 0xe, 0x8, 0xa, 0x4, 0x3, + 0x4, 0xc, 0xb, 0x6, 0x4, 0x0, 0xa, 0x5, 0x1, 0xa, 0x4, 0xe, 0xa, 0x7, 0xd, 0x0, 0x4, 0xe, 0xe, 0x7, 0x7, 0xa, 0x4, 0x5, 0x6, 0xc, 0x6, 0x2, 0x6, 0xa, 0xe, 0xa, + }); + try testType(@Vector(128, u4), .{ + 0xd, 0x5, 0x6, 0xe, 0x3, 0x3, 0x3, 0xe, 0xd, 0xd, 0x9, 0x0, 0x0, 0xe, 0xa, 0x9, 0x8, 0x7, 0xb, 0x5, 0x7, 0xf, 0xb, 0x8, 0x0, 0xf, 0xb, 0x3, 0xa, 0x2, 0xb, 0xc, + 0x1, 0x1, 0xc, 0x8, 0x8, 0x6, 0x9, 0x1, 0xb, 0x0, 0x2, 0xb, 0x2, 0x2, 0x7, 0x6, 0x1, 0x1, 0xb, 0x4, 0x6, 0x4, 0x7, 0xc, 0xd, 0xc, 0xa, 0x8, 0x1, 0x7, 0x8, 0xa, + 0x9, 0xa, 0x1, 0x8, 0x1, 0x7, 0x9, 0x4, 0x5, 0x9, 0xd, 0x0, 0xa, 0xf, 0x3, 0x3, 0x9, 0x2, 0xf, 0x5, 0xb, 0x8, 0x6, 0xb, 0xf, 0x5, 0x8, 0x3, 0x9, 0xf, 0x6, 0x8, + 0xc, 0x8, 0x3, 0x4, 0xa, 0xe, 0xc, 0x1, 0xe, 0x9, 0x1, 0x8, 0xf, 0x6, 0xc, 0xc, 0x6, 0xf, 0x6, 0xd, 0xb, 0x9, 0xc, 0x3, 0xd, 0xa, 0x6, 0x8, 0x4, 0xa, 0x6, 0x9, + }); + try testType(@Vector(256, u4), .{ + 0x6, 0xc, 0xe, 0x3, 0x8, 0x2, 0xb, 0xd, 0x3, 0xa, 0x3, 0x8, 0xb, 0x8, 0x3, 0x0, 0xb, 0x5, 0x1, 0x3, 0x2, 0x2, 0xf, 0xc, 0x5, 0x1, 0x3, 0xb, 0x1, 0xc, 0x2, 0xd, + 0xa, 0x8, 0x1, 0xc, 0xb, 0xa, 0x3, 0x1, 0xe, 0x4, 0xf, 0xb, 0xd, 0x8, 0xf, 0xa, 0xc, 0xb, 0xb, 0x0, 0xa, 0xc, 0xf, 0xe, 0x8, 0xd, 0x9, 0x3, 0xa, 0xe, 0x8, 0x7, + 0x5, 0xa, 0x0, 0xe, 0x0, 0xd, 0x2, 0x2, 0x9, 0x4, 0x8, 0x9, 0x0, 0x4, 0x4, 0x8, 0xe, 0x1, 0xf, 0x1, 0x9, 0x3, 0xf, 0xc, 0xa, 0x0, 0x3, 0x2, 0x4, 0x1, 0x2, 0x3, + 0xf, 0x2, 0x7, 0xb, 0x5, 0x0, 0xd, 0x3, 0x4, 0xf, 0xa, 0x3, 0xc, 0x2, 0x5, 0xe, 0x7, 0x5, 0xd, 0x7, 0x9, 0x0, 0xd, 0x7, 0x9, 0xd, 0x5, 0x7, 0xf, 0xd, 0xb, 0x4, + 0x9, 0x6, 0xf, 0xb, 0x1, 0xb, 0x6, 0xb, 0xf, 0x7, 0xf, 0x0, 0x4, 0x7, 0x5, 0xa, 0x8, 0x1, 0xf, 0x9, 0x9, 0x0, 0x6, 0xb, 0x1, 0x2, 0x4, 0x3, 0x2, 0x0, 0x7, 0x0, + 0x6, 0x7, 0xf, 0x1, 0xe, 0xa, 0x8, 0x2, 0x9, 0xc, 0x1, 0x5, 0x7, 0x1, 0xb, 0x0, 0x1, 0x3, 0xd, 0x3, 0x0, 0x1, 0xa, 0x0, 0x3, 0x7, 0x1, 0x2, 0xb, 0xc, 0x2, 0x9, + 0x8, 0x8, 0x7, 0x0, 0xd, 0x5, 0x1, 0x5, 0x7, 0x7, 0x2, 0x3, 0x8, 0x7, 0xc, 0x8, 0xf, 0xa, 0xf, 0xf, 0x3, 0x2, 0x0, 0x4, 0x7, 0x5, 0x6, 0xd, 0x6, 0x3, 0xa, 0x4, + 0x1, 0x1, 0x2, 0xc, 0x3, 0xe, 0x2, 0xc, 0x7, 0x6, 0xe, 0xf, 0xb, 0x8, 0x6, 0x6, 0x9, 0x0, 0x4, 0xb, 0xe, 0x4, 0x2, 0x7, 0xf, 0xc, 0x0, 0x6, 0xd, 0xa, 0xe, 0xc, + }); + + try testType(@Vector(3, i5), .{ -1 << 4, -1, 0 }); + try testType(@Vector(3, u5), .{ 0, 1, 1 << 4 }); + + try testType(@Vector(3, i7), .{ -1 << 6, -1, 0 }); + try testType(@Vector(3, u7), .{ 0, 1, 1 << 6 }); + + try testType(@Vector(3, i8), .{ -1 << 7, -1, 0 }); + try testType(@Vector(3, u8), .{ 0, 1, 1 << 7 }); + try testType(@Vector(1, u8), .{ + 0x33, + }); + try testType(@Vector(2, u8), .{ + 0x66, 0x87, + }); + try testType(@Vector(4, u8), .{ + 0x9d, 0xcb, 0x30, 0x7b, + }); + try testType(@Vector(8, u8), .{ + 0x4b, 0x35, 0x3f, 0x5c, 0xa5, 0x91, 0x23, 0x6d, + }); + try testType(@Vector(16, u8), .{ + 0xb7, 0x57, 0x27, 0x29, 0x58, 0xf8, 0xc9, 0x6c, 0xbe, 0x41, 0xf4, 0xd7, 0x4d, 0x01, 0xf0, 0x37, + }); + try testType(@Vector(32, u8), .{ + 0x5f, 0x61, 0x34, 0xe8, 0x37, 0x12, 0xba, 0x5a, 0x85, 0xf3, 0x3e, 0xa2, 0x0f, 0xd0, 0x65, 0xae, + 0xed, 0xf5, 0xe8, 0x65, 0x61, 0x28, 0x4a, 0x27, 0x2e, 0x01, 0x40, 0x8c, 0xe3, 0x36, 0x5d, 0xb6, + }); + try testType(@Vector(64, u8), .{ + 0xb0, 0x19, 0x5c, 0xc2, 0x3b, 0x16, 0x70, 0xad, 0x26, 0x45, 0xf2, 0xe1, 0x4f, 0x0f, 0x01, 0x72, + 0x7f, 0x1f, 0x07, 0x9e, 0xee, 0x9b, 0xb3, 0x38, 0x50, 0xf3, 0x56, 0x73, 0xd0, 0xd1, 0xee, 0xe3, + 0xeb, 0xf3, 0x1b, 0xe0, 0x77, 0x78, 0x75, 0xc6, 0x19, 0xe4, 0x69, 0xaa, 0x73, 0x08, 0xcd, 0x0c, + 0xf9, 0xed, 0x94, 0xf8, 0x79, 0x86, 0x63, 0x31, 0xbf, 0xd1, 0xe3, 0x17, 0x2b, 0xb9, 0xa1, 0x72, + }); + try testType(@Vector(128, u8), .{ + 0x2e, 0x93, 0x87, 0x09, 0x4f, 0x68, 0x14, 0xab, 0x3f, 0x04, 0x86, 0xc1, 0x95, 0xe8, 0x74, 0x11, + 0x57, 0x25, 0xe1, 0x88, 0xc0, 0x96, 0x33, 0x99, 0x15, 0x86, 0x2c, 0x84, 0x2e, 0xd7, 0x57, 0x21, + 0xd3, 0x18, 0xd5, 0x0e, 0xb4, 0x60, 0xe2, 0x08, 0xce, 0xbc, 0xd5, 0x4d, 0x8f, 0x59, 0x01, 0x67, + 0x71, 0x0a, 0x74, 0x48, 0xef, 0x39, 0x49, 0x7e, 0xa8, 0x39, 0x34, 0x75, 0x95, 0x3b, 0x38, 0xea, + 0x60, 0xd7, 0xed, 0x8f, 0xbb, 0xc0, 0x7d, 0xc2, 0x79, 0x2d, 0xbf, 0xa5, 0x64, 0xf4, 0x09, 0x86, + 0xfb, 0x29, 0xfe, 0xc7, 0xff, 0x62, 0x1a, 0x6f, 0xf8, 0xbd, 0xfe, 0xa4, 0xac, 0x24, 0xcf, 0x56, + 0x82, 0x69, 0x81, 0x0d, 0xc1, 0x51, 0x8d, 0x85, 0xf4, 0x00, 0xe7, 0x25, 0xab, 0xa5, 0x33, 0x45, + 0x66, 0x2e, 0x33, 0xc8, 0xf3, 0x35, 0x16, 0x7d, 0x1f, 0xc9, 0xf7, 0x44, 0xab, 0x66, 0x28, 0x0d, + }); + + try testType(@Vector(3, i9), .{ -1 << 8, -1, 0 }); + try testType(@Vector(3, u9), .{ 0, 1, 1 << 8 }); + + try testType(@Vector(3, i15), .{ -1 << 14, -1, 0 }); + try testType(@Vector(3, u15), .{ 0, 1, 1 << 14 }); + + try testType(@Vector(3, i16), .{ -1 << 15, -1, 0 }); + try testType(@Vector(3, u16), .{ 0, 1, 1 << 15 }); + try testType(@Vector(1, u16), .{ + 0x4da6, + }); + try testType(@Vector(2, u16), .{ + 0x04d7, 0x50c6, + }); + try testType(@Vector(4, u16), .{ + 0x4c06, 0xd71f, 0x4d8f, 0xe0a4, + }); + try testType(@Vector(8, u16), .{ + 0xee9a, 0x881d, 0x31fb, 0xd3f7, 0x2c74, 0x6949, 0x4e04, 0x53d7, + }); + try testType(@Vector(16, u16), .{ + 0xeafe, 0x9a7b, 0x0d6f, 0x18cb, 0xaf8f, 0x8ee4, 0xa47e, 0xd39a, + 0x6572, 0x9c53, 0xf36e, 0x982e, 0x41c1, 0x8682, 0xf5dc, 0x7e01, + }); + try testType(@Vector(32, u16), .{ + 0xdfb3, 0x7de6, 0xd9ed, 0xb42e, 0x95ac, 0x9b5b, 0x0422, 0xdfcd, + 0x6196, 0x4dbe, 0x1818, 0x8816, 0x75e7, 0xc9b0, 0x92f7, 0x1f71, + 0xe584, 0x576c, 0x043a, 0x0f31, 0xfc4c, 0x2c87, 0x6b02, 0x0229, + 0x25b7, 0x53cd, 0x9bab, 0x866b, 0x9008, 0xf0f3, 0xeb21, 0x88e2, + }); + try testType(@Vector(64, u16), .{ + 0x084c, 0x445f, 0xce89, 0xd3ee, 0xb399, 0x315d, 0x8ef8, 0x4f6f, + 0xf9af, 0xcbc4, 0x0332, 0xcd55, 0xa4dc, 0xbc38, 0x6e33, 0x8ead, + 0xd15a, 0x5057, 0x58ef, 0x657a, 0xe9f0, 0x1418, 0x2b62, 0x3387, + 0x1c15, 0x04e1, 0x0276, 0x3783, 0xad9c, 0xea9a, 0x0e5e, 0xe803, + 0x2ee7, 0x0cf1, 0x30f1, 0xb12a, 0x381b, 0x353d, 0xf637, 0xf853, + 0x2ac1, 0x7ce8, 0x6a50, 0xcbb8, 0xc9b8, 0x9b25, 0xd1e9, 0xeff0, + 0xc0a2, 0x8e51, 0xde7a, 0x4e58, 0x5685, 0xeb3f, 0xd29b, 0x66ed, + 0x3dd5, 0xcb59, 0x6003, 0xf710, 0x943a, 0x7276, 0xe547, 0xe48f, + }); + + try testType(@Vector(3, i17), .{ -1 << 16, -1, 0 }); + try testType(@Vector(3, u17), .{ 0, 1, 1 << 16 }); + + try testType(@Vector(3, i31), .{ -1 << 30, -1, 0 }); + try testType(@Vector(3, u31), .{ 0, 1, 1 << 30 }); + + try testType(@Vector(3, i32), .{ -1 << 31, -1, 0 }); + try testType(@Vector(3, u32), .{ 0, 1, 1 << 31 }); + try testType(@Vector(1, u32), .{ + 0x17e2805c, + }); + try testType(@Vector(2, u32), .{ + 0xdb6aadc5, 0xb1ff3754, + }); + try testType(@Vector(4, u32), .{ + 0xf7897b31, 0x342e1af9, 0x190fd76b, 0x283b5374, + }); + try testType(@Vector(8, u32), .{ + 0x81a0bd16, 0xc55da94e, 0x910f7e7c, 0x078d5ef7, + 0x0bdb1e4a, 0xf1a96e99, 0xcdd729b5, 0xe6966a1c, + }); + try testType(@Vector(16, u32), .{ + 0xfee812db, 0x29eacbed, 0xaed48136, 0x3053de13, + 0xbbda20df, 0x6faa274a, 0xe0b5ec3a, 0x1878b0dc, + 0x98204475, 0x810d8d05, 0x1e6996b6, 0xc543826a, + 0x53b47d8c, 0xc72c3142, 0x12f7e1f9, 0xf6782e54, + }); + try testType(@Vector(32, u32), .{ + 0xf0cf30d3, 0xe3c587b8, 0xcee44739, 0xe4a0bd72, + 0x41d44cce, 0x6d7c4259, 0xd85580a5, 0xec4b02d7, + 0xa366483d, 0x2d7b59d4, 0xe9c0ace4, 0x82cb441c, + 0xa23958ba, 0x04a70148, 0x3f0d20a3, 0xf9e21e37, + 0x009fce8b, 0x4a34a229, 0xf09c35cf, 0xc0977d4d, + 0xcc4d4647, 0xa30f1363, 0x27a65b14, 0xe572c785, + 0x8f42e320, 0x2b2cdeca, 0x11205bd4, 0x739d26aa, + 0xcbcc2df0, 0x5f7a3649, 0xbde1b7aa, 0x180a169f, + }); + + try testType(@Vector(3, i33), .{ -1 << 32, -1, 0 }); + try testType(@Vector(3, u33), .{ 0, 1, 1 << 32 }); + + try testType(@Vector(3, i63), .{ -1 << 62, -1, 0 }); + try testType(@Vector(3, u63), .{ 0, 1, 1 << 62 }); + + try testType(@Vector(3, i64), .{ -1 << 63, -1, 0 }); + try testType(@Vector(3, u64), .{ 0, 1, 1 << 63 }); + try testType(@Vector(1, u64), .{ + 0x7d2e439abb0edba7, + }); + try testType(@Vector(2, u64), .{ + 0x3749ee5a2d237b9f, 0x6d8f4c3e1378f389, + }); + try testType(@Vector(4, u64), .{ + 0x03c127040e10d52b, 0xa86fe019072e27eb, + 0x0a554a47b709cdba, 0xf4342cc597e196c3, + }); + try testType(@Vector(8, u64), .{ + 0xea455c104375a055, 0x5c35d9d945edb2fa, + 0xc11b73d9d9d546fc, 0x2a9d63aae838dd5b, + 0xed6603f1f5d574b3, 0x2f37b354c81c1e56, + 0xbe7f5e2476bc76bd, 0xb0c88eacfffa9a8f, + }); + try testType(@Vector(16, u64), .{ + 0x2258fc04b31f8dbe, 0x3a2e5483003a10d8, + 0xebf24b31c0460510, 0x15d5b4c09b53ffa5, + 0x05abf6e744b17cc6, 0x9747b483f2d159fe, + 0x4616d8b2c8673125, 0x8ae3f91d422447eb, + 0x18da2f101a9e9776, 0x77a1197fb0441007, + 0x4ba480c8ec2dd10b, 0xeb99b9c0a1725278, + 0xd9d0acc5084ecdf0, 0xa0a23317fff4f515, + 0x0901c59a9a6a408b, 0x7c77ca72e25df033, + }); + + try testType(@Vector(3, i65), .{ -1 << 64, -1, 0 }); + try testType(@Vector(3, u65), .{ 0, 1, 1 << 64 }); + + try testType(@Vector(3, i127), .{ -1 << 126, -1, 0 }); + try testType(@Vector(3, u127), .{ 0, 1, 1 << 126 }); + + try testType(@Vector(3, i128), .{ -1 << 127, -1, 0 }); + try testType(@Vector(3, u128), .{ 0, 1, 1 << 127 }); + try testType(@Vector(1, u128), .{ + 0x809f29e7fbafadc01145e1732590e7d9, + }); + try testType(@Vector(2, u128), .{ + 0x5150ac3438aacd0d51132cc2723b2995, + 0x151be9c47ad29cf719cf8358dd40165c, + }); + try testType(@Vector(4, u128), .{ + 0x4bae22df929f2f7cb9bd84deaad3e7a8, + 0x1ed46b2d6e1f3569f56b2ac33d8bc1cb, + 0xae93ea459d2ccfd5fb794e6d5c31aabb, + 0xb1177136acf099f550b70949ac202ec4, + }); + try testType(@Vector(8, u128), .{ + 0x7cd78db6baed6bfdf8c5265136c4e0fd, + 0xa41b8984c6bbde84640068194b7eba98, + 0xd33102778f2ae1a48d1e9bf8801bbbf0, + 0x0d59f6de003513a60055c86cbce2c200, + 0x825579d90012afddfbf04851c0748561, + 0xc2647c885e9d6f0ee1f5fac5da8ef7f5, + 0xcb4bbc1f81aa8ee68aa4dc140745687b, + 0x4ff10f914f74b46c694407f5bf7c7836, + }); + + try testType(@Vector(3, i129), .{ -1 << 128, -1, 0 }); + try testType(@Vector(3, u129), .{ 0, 1, 1 << 128 }); + + try testType(@Vector(3, i191), .{ -1 << 190, -1, 0 }); + try testType(@Vector(3, u191), .{ 0, 1, 1 << 190 }); + + try testType(@Vector(3, i192), .{ -1 << 191, -1, 0 }); + try testType(@Vector(3, u192), .{ 0, 1, 1 << 191 }); + try testType(@Vector(1, u192), .{ + 0xe7baafcb9781626a77571b0539b9471a60c97d6c02106c8b, + }); + try testType(@Vector(2, u192), .{ + 0xbc9510913ed09e2c2aa50ffab9f1bc7b303a87f36e232a83, + 0x1f37bee446d7712d1ad457c47a66812cb926198d052aee65, + }); + try testType(@Vector(4, u192), .{ + 0xdca6a7cfc19c69efc34022062a8ca36f2569ab3dce001202, + 0xd25a4529e621c9084181fdb6917c6a32eccc58b63601b35d, + 0x0a258afd6debbaf8c158f1caa61fed63b31871d13f51b43d, + 0x6b40a178674fcb82c623ac322f851623d5e993dac97a219a, + }); + + try testType(@Vector(3, i193), .{ -1 << 192, -1, 0 }); + try testType(@Vector(3, u193), .{ 0, 1, 1 << 192 }); + + try testType(@Vector(3, i255), .{ -1 << 254, -1, 0 }); + try testType(@Vector(3, u255), .{ 0, 1, 1 << 254 }); + + try testType(@Vector(3, i256), .{ -1 << 255, -1, 0 }); + try testType(@Vector(3, u256), .{ 0, 1, 1 << 255 }); + try testType(@Vector(1, u256), .{ + 0x230413bb481fa3a997796acf282010c560d1942e7339fd584a0f15a90c83fbda, + }); + try testType(@Vector(2, u256), .{ + 0x3ad569f8d91fdbc9da8ec0e933565919f2feb90b996c90c352b461aa0908e62d, + 0x0f109696d64647983f1f757042515510729ad1350e862cbf38cb73b5cf99f0f7, + }); + try testType(@Vector(4, u256), .{ + 0x1717c6ded4ac6de282d59f75f068da47d5a47a30f2c5053d2d59e715f9d28b97, + 0x3087189ce7540e2e0028b80af571ebc6353a00b2917f243a869ed29ecca0adaa, + 0x1507c6a9d104684bf503cdb08841cf91adab4644306bd67aafff5326604833ce, + 0x857e134ff9179733c871295b25f824bd3eb562977bad30890964fa0cdc15bb07, + }); + + try testType(@Vector(3, i257), .{ -1 << 256, -1, 0 }); + try testType(@Vector(3, u257), .{ 0, 1, 1 << 256 }); + + try testType(@Vector(3, i511), .{ -1 << 510, -1, 0 }); + try testType(@Vector(3, u511), .{ 0, 1, 1 << 510 }); + + try testType(@Vector(3, i512), .{ -1 << 511, -1, 0 }); + try testType(@Vector(3, u512), .{ 0, 1, 1 << 511 }); + try testType(@Vector(1, u512), .{ + 0xa3ff51a609f1370e5eeb96b05169bf7469e465cf76ac5b4ea8ffd166c1ba3cd94f2dedf0d647a1fe424f3a06e6d7940f03e257f28100970b00bd5528c52b9ae6, + }); + try testType(@Vector(2, u512), .{ + 0xc6d43cd46ae31ab71f9468a895c83bf17516c6b2f1c9b04b9aa113bf7fe1b789eb7d95fcf951f12a9a6f2124589551efdd8c00f528b366a7bfb852faf8f3da53, + 0xc9099d2bdf8d1a0d30485ec6db4a24cbc0d89a863de30e18313ee1d66f71dd2d26235caaa703286cf4a2b51e1a12ef96d2d944c66c0bd3f0d72dd4cf0fc8100e, + }); + + try testType(@Vector(3, i513), .{ -1 << 512, -1, 0 }); + try testType(@Vector(3, u513), .{ 0, 1, 1 << 512 }); + + try testType(@Vector(3, i1023), .{ -1 << 1022, -1, 0 }); + try testType(@Vector(3, u1023), .{ 0, 1, 1 << 1022 }); + + try testType(@Vector(3, i1024), .{ -1 << 1023, -1, 0 }); + try testType(@Vector(3, u1024), .{ 0, 1, 1 << 1023 }); + try testType(@Vector(1, u1024), .{ + 0xc6cfaa6571139552e1f067402dfc131d9b9a58aafda97198a78764b05138fb68cf26f085b7652f3d5ae0e56aa21732f296a581bb411d4a73795c213de793489fa49b173b9f5c089aa6295ff1fcdc14d491a05035b45d08fc35cd67a83d887a02b8db512f07518132e0ba56533c7d6fbe958255eddf5649bd8aba288c0dd84a25, + }); + + try testType(@Vector(3, i1025), .{ -1 << 1024, -1, 0 }); + try testType(@Vector(3, u1025), .{ 0, 1, 1 << 1024 }); +} + fn testBinary(comptime op: anytype) !void { const testType = struct { fn testType(comptime Type: type, comptime imm_lhs: Type, comptime imm_rhs: Type) !void { @@ -306,6 +1012,63 @@ fn testBinary(comptime op: anytype) !void { 0x8b0b4a27fc94a0e90652d19bc755b63d, 0xa858bce5ad0e48c13588a4e170e8667c, }); + + try testType(@Vector(1, u256), .{ + 0x28df37e1f57a56133ba3f5b5b2164ce24eb6c29a8973a597fd91fbee8ab4bafb, + }, .{ + 0x63f725028cab082b5b1e6cb474428c8c3655cf438f3bb05c7a87f8270198f357, + }); + try testType(@Vector(2, u256), .{ + 0xcc79740b85597ef411e6d7e92049dfaa2328781ea4911540a3dcb512b71c7f3c, + 0x51ae46d2f93cbecff1578481f6ddc633dacee94ecaf81597c752c5c5db0ae766, + }, .{ + 0x257f0107305cb71cef582a9a58612a019f335e390d7998f51f5898f245874a6e, + 0x0a95a17323a4d16a715720f122b752785e9877e3dd3d3f9b72cdac3d1139a81f, + }); + try testType(@Vector(4, u256), .{ + 0x19667a6e269342cba437a8904c7ba42a762358d32723723ae2637b01124e63c5, + 0x14f7d3599a7edc7bcc46874f68d4291793e6ef72bd1f3763bc5e923f54f2f781, + 0x1c939de0ae980b80de773a04088ba45813441336cdfdc281ee356c98d71f653b, + 0x39f5d755965382fe13d1b1d6690b8e3827f153f8166768c4ad8a28a963b781f2, + }, .{ + 0xbe03de37cdcb8126083b4e86cd8a9803121d31b186fd5ce555ad77ce624dd6c7, + 0xa0c0730f0d7f141cc959849d09730b049f00693361539f1bc4758270554a60c1, + 0x2664bdba8de4eaa36ecee72f6bfec5b4daa6b4e00272d8116f2cc532c29490cc, + 0xe47a122bd45d5e7d69722d864a6b795ddee965a0993094f8791dd309d692de8b, + }); + + try testType(@Vector(1, u512), .{ + 0x651058c1d89a8f34cfc5e66b6d25294eecfcc4a7e1e4a356eb51ee7d7b2db25378e4afee51b7d18d16e520772a60c50a02d7966f40ced1870b32c658e5821397, + }, .{ + 0xd726e265ec80cb99510ba4f480ca64e959de5c528a7f54c386ecad22eeeefa845f0fd44b1bd64258a5f868197ee2d8fed59df9c9f0b72e74051a7ff20230880e, + }); + try testType(@Vector(2, u512), .{ + 0x22c8183c95cca8b09fdf541e431b73e9e4a1a5a00dff12381937fab52681d09d38ea25727d7025a2be08942cfa01535759e1644792e347c7901ec94b343c6337, + 0x292fdf644e75927e1aea9465ae2f60fb27550cd095f1afdea2cf7855286d26fbeed1c0b9c0474b73cb6b75621f7eadaa2f94ec358179ce2aaa0766df20da1ef3, + }, .{ + 0xe1cd8c0ca244c6626d4415e10b4ac43fa69e454c529c24fec4b13e6b945684d4ea833709c16c636ca78cffa5c5bf0fe945cd714a9ad695184a6bdad31dec9e31, + 0x8fa3d86099e9e2789d72f8e792290356d659ab20ac0414ff94745984c6ae7d986082197bb849889f912e896670aa2c1a11bd7e66e3f650710b0f0a18a1533f90, + }); + + try testType(@Vector(1, u1024), .{ + 0x0ca1a0dfaf8bb1da714b457d23c71aef948e66c7cd45c0aa941498a796fb18502ec32f34e885d0a107d44ae81595f8b52c2f0fb38e584b7139903a0e8a823ae20d01ca0662722dd474e7efc40f32d74cc065d97d8a09d0447f1ab6107fa0a57f3f8c866ae872506627ce82f18add79cee8dc69837f4ead3ca770c4d622d7e544, + }, .{ + 0xf1e3bbe031d59351770a7a501b6e969b2c00d144f17648db3f944b69dfeb7be72e5ff933a061eba4eaa422f8ca09e5a97d0b0dd740fd4076eba8c72d7a278523f399202dc2d043c4e0eb58a2bcd4066e2146e321810b1ee4d3afdddb4f026bcc7905ce17e033a7727b4e08f33b53c63d8c9f763fc6c31d0523eb38c30d5e40bc, + }); +} + +inline fn bitNot(comptime Type: type, rhs: Type) @TypeOf(~rhs) { + return ~rhs; +} +test bitNot { + try testUnary(bitNot); +} + +inline fn clz(comptime Type: type, rhs: Type) @TypeOf(@clz(rhs)) { + return @clz(rhs); +} +test clz { + try testUnary(clz); } inline fn bitAnd(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs & rhs) { diff --git a/tools/update_cpu_features.zig b/tools/update_cpu_features.zig index da0aa3f565..065d40d3f4 100644 --- a/tools/update_cpu_features.zig +++ b/tools/update_cpu_features.zig @@ -902,8 +902,8 @@ const llvm_targets = [_]LlvmTarget{ .features = &.{ "v8a", "exynos" }, }, }, - // LLVM removed support for v2 and v3 but zig wants to support targeting old hardware .extra_features = &.{ + // LLVM removed support for v2 and v3 but zig wants to support targeting old hardware .{ .zig_name = "v2", .desc = "ARMv2 architecture", @@ -1043,10 +1043,22 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "64bit-mode", .omit = true, }, + .{ + .llvm_name = "alderlake", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "amdfam10", .extra_deps = &.{"3dnowa"}, }, + .{ + .llvm_name = "arrowlake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "arrowlake-s", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "athlon", .extra_deps = &.{"3dnowa"}, @@ -1081,16 +1093,64 @@ const llvm_targets = [_]LlvmTarget{ }, .{ .llvm_name = "barcelona", - .extra_deps = &.{"3dnowa"}, + .extra_deps = &.{ "3dnowa", "smap", "smep" }, + }, + .{ + .llvm_name = "broadwell", + .extra_deps = &.{ "smap", "smep" }, }, .{ .llvm_name = "c3", .extra_deps = &.{"3dnow"}, }, + .{ + .llvm_name = "cannonlake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "cascadelake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "emeraldrapids", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "geode", .extra_deps = &.{"3dnowa"}, }, + .{ + .llvm_name = "goldmont", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "goldmont_plus", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "haswell", + .extra_deps = &.{"smep"}, + }, + .{ + .llvm_name = "i386", + .extra_deps = &.{"bsf_bsr_0_clobbers_result"}, + }, + .{ + .llvm_name = "i486", + .extra_deps = &.{"bsf_bsr_0_clobbers_result"}, + }, + .{ + .llvm_name = "icelake_client", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "icelake_server", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "ivybridge", + .extra_deps = &.{"smep"}, + }, .{ .llvm_name = "k6-2", .extra_deps = &.{"3dnow"}, @@ -1127,6 +1187,10 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "lakemont", .extra_deps = &.{"soft_float"}, }, + .{ + .llvm_name = "meteorlake", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "opteron", .extra_deps = &.{"3dnowa"}, @@ -1135,6 +1199,38 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "opteron-sse3", .extra_deps = &.{"3dnowa"}, }, + .{ + .llvm_name = "raptorlake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "rocketlake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "sapphirerapids", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "silvermont", + .extra_deps = &.{"smep"}, + }, + .{ + .llvm_name = "skx", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "skylake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "skylake_avx512", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "tigerlake", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "winchip2", .extra_deps = &.{"3dnow"}, @@ -1143,9 +1239,29 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "sse4.2", .extra_deps = &.{"crc32"}, }, + .{ + .llvm_name = "znver1", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "znver2", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "znver3", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "znver4", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "znver5", + .extra_deps = &.{ "smap", "smep" }, + }, }, - // Features removed from LLVM .extra_features = &.{ + // Features removed from LLVM .{ .zig_name = "3dnow", .desc = "Enable 3DNow! instructions", @@ -1171,6 +1287,22 @@ const llvm_targets = [_]LlvmTarget{ .desc = "Prefetch with Intent to Write and T1 Hint", .deps = &.{}, }, + // Custom Zig features + .{ + .zig_name = "bsf_bsr_0_clobbers_result", + .desc = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero", + .deps = &.{}, + }, + .{ + .zig_name = "smap", + .desc = "Enable Supervisor Mode Access Prevention", + .deps = &.{}, + }, + .{ + .zig_name = "smep", + .desc = "Enable Supervisor Mode Execution Prevention", + .deps = &.{}, + }, }, .omit_cpus = &.{ // LLVM defines a bunch of dumb aliases with foreach loops in X86.td.