Merge pull request #22610 from jacobly0/x86_64-rewrite

x86_64: rewrite `@min`/`@max` for scalar floats
2025-12-06 05:44:20 +00:00 · 2025-01-27 11:47:52 -08:00 · 2025-01-27 11:47:52 -08:00 · eed3b9567d
commit eed3b9567d
parent df1fa36feb 654da648b3
7 changed files with 5452 additions and 488 deletions
--- a/lib/compiler_rt/extendxftf2.zig
+++ b/lib/compiler_rt/extendxftf2.zig
@ -15,8 +15,6 @@ fn __extendxftf2(a: f80) callconv(.C) f128 {

    const dst_bits = @bitSizeOf(f128);

-    const dst_min_normal = @as(u128, 1) << dst_sig_bits;
-
    // Break a into a sign and representation of the absolute value
    var a_rep = std.math.F80.fromFloat(a);
    const sign = a_rep.exp & 0x8000;
@ -36,12 +34,7 @@ fn __extendxftf2(a: f80) callconv(.C) f128 {
        abs_result |= @as(u128, a_rep.exp) << dst_sig_bits;
    } else {
        // a is denormal
-        // renormalize the significand and clear the leading bit and integer part,
-        // then insert the correct adjusted exponent in the destination type.
-        const scale: u32 = @clz(a_rep.fraction);
-        abs_result = @as(u128, a_rep.fraction) << @intCast(dst_sig_bits - src_sig_bits + scale + 1);
-        abs_result ^= dst_min_normal;
-        abs_result |= @as(u128, scale + 1) << dst_sig_bits;
+        abs_result = @as(u128, a_rep.fraction) << (dst_sig_bits - src_sig_bits);
    }

    // Apply the signbit to (dst_t)abs(a).
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@ -467,7 +467,7 @@ pub const Inst = struct {
        /// Mask ___ Quadword
        k_q,

-        pub fn fromCondition(cc: bits.Condition) Fixes {
+        pub fn fromCond(cc: bits.Condition) Fixes {
            return switch (cc) {
                inline else => |cc_tag| @field(Fixes, "_" ++ @tagName(cc_tag)),
                .z_and_np, .nz_or_p => unreachable,
--- a/src/arch/x86_64/bits.zig
+++ b/src/arch/x86_64/bits.zig
@ -177,6 +177,172 @@ pub const Condition = enum(u5) {
    }
 };

+/// The immediate operand of vcvtps2ph.
+pub const RoundMode = packed struct(u5) {
+    mode: enum(u4) {
+        /// Round to nearest (even)
+        nearest = 0b0_00,
+        /// Round down (toward -∞)
+        down = 0b0_01,
+        /// Round up (toward +∞)
+        up = 0b0_10,
+        /// Round toward zero (truncate)
+        zero = 0b0_11,
+        /// Use current rounding mode of MXCSR.RC
+        mxcsr = 0b1_00,
+    } = .mxcsr,
+    precision: enum(u1) {
+        normal = 0b0,
+        inexact = 0b1,
+    } = .normal,
+
+    pub fn imm(mode: RoundMode) Immediate {
+        return .u(@as(@typeInfo(RoundMode).@"struct".backing_integer.?, @bitCast(mode)));
+    }
+};
+
+/// The immediate operand of cmppd, cmpps, cmpsd, and cmpss.
+pub const SseFloatPredicate = enum(u3) {
+    /// Equal (ordered, non-signaling)
+    eq,
+    /// Less-than (ordered, signaling)
+    lt,
+    /// Less-than-or-equal (ordered, signaling)
+    le,
+    /// Unordered (non-signaling)
+    unord,
+    /// Not-equal (unordered, non-signaling)
+    neq,
+    /// Not-less-than (unordered, signaling)
+    nlt,
+    /// Not-less-than-or-equal (unordered, signaling)
+    nle,
+    /// Ordered (non-signaling)
+    ord,
+
+    /// Equal (ordered, non-signaling)
+    pub const eq_oq: SseFloatPredicate = .eq;
+    /// Less-than (ordered, signaling)
+    pub const lt_os: SseFloatPredicate = .lt;
+    /// Less-than-or-equal (ordered, signaling)
+    pub const le_os: SseFloatPredicate = .le;
+    /// Unordered (non-signaling)
+    pub const unord_q: SseFloatPredicate = .unord;
+    /// Not-equal (unordered, non-signaling)
+    pub const neq_uq: SseFloatPredicate = .neq;
+    /// Not-less-than (unordered, signaling)
+    pub const nlt_us: SseFloatPredicate = .nlt;
+    /// Not-less-than-or-equal (unordered, signaling)
+    pub const nle_us: SseFloatPredicate = .nle;
+    /// Ordered (non-signaling)
+    pub const ord_q: SseFloatPredicate = .ord;
+
+    pub fn imm(pred: SseFloatPredicate) Immediate {
+        return .u(@intFromEnum(pred));
+    }
+};
+
+/// The immediate operand of vcmppd, vcmpps, vcmpsd, and vcmpss.
+pub const VexFloatPredicate = enum(u5) {
+    /// Equal (ordered, non-signaling)
+    eq_oq,
+    /// Less-than (ordered, signaling)
+    lt_os,
+    /// Less-than-or-equal (ordered, signaling)
+    le_os,
+    /// Unordered (non-signaling)
+    unord_q,
+    /// Not-equal (unordered, non-signaling)
+    neq_uq,
+    /// Not-less-than (unordered, signaling)
+    nlt_us,
+    /// Not-less-than-or-equal (unordered, signaling)
+    nle_us,
+    /// Ordered (non-signaling)
+    ord_q,
+    /// Equal (unordered, non-signaling)
+    eq_uq,
+    /// Not-greater-than-or-equal (unordered, signaling)
+    nge_us,
+    /// Not-greater-than (unordered, signaling)
+    ngt_us,
+    /// False (ordered, non-signaling)
+    false_oq,
+    /// Not-equal (ordered, non-signaling)
+    neq_oq,
+    /// Greater-than-or-equal (ordered, signaling)
+    ge_os,
+    /// Greater-than (ordered, signaling)
+    gt_os,
+    /// True (unordered, non-signaling)
+    true_uq,
+    /// Equal (unordered, non-signaling)
+    eq_os,
+    /// Less-than (ordered, non-signaling)
+    lt_oq,
+    /// Less-than-or-equal (ordered, non-signaling)
+    le_oq,
+    /// Unordered (signaling)
+    unord_s,
+    /// Not-equal (unordered, signaling)
+    neq_us,
+    /// Not-less-than (unordered, non-signaling)
+    nlt_uq,
+    /// Not-less-than-or-equal (unordered, non-signaling)
+    nle_uq,
+    /// Ordered (signaling)
+    ord_s,
+    /// Equal (unordered, signaling)
+    eq_us,
+    /// Not-greater-than-or-equal (unordered, non-signaling)
+    nge_uq,
+    /// Not-greater-than (unordered, non-signaling)
+    ngt_uq,
+    /// False (ordered, signaling)
+    false_os,
+    /// Not-equal (ordered, signaling)
+    neq_os,
+    /// Greater-than-or-equal (ordered, non-signaling)
+    ge_oq,
+    /// Greater-than (ordered, non-signaling)
+    gt_oq,
+    /// True (unordered, signaling)
+    true_us,
+
+    /// Equal (ordered, non-signaling)
+    pub const eq: VexFloatPredicate = .eq_oq;
+    /// Less-than (ordered, signaling)
+    pub const lt: VexFloatPredicate = .lt_os;
+    /// Less-than-or-equal (ordered, signaling)
+    pub const le: VexFloatPredicate = .le_os;
+    /// Unordered (non-signaling)
+    pub const unord: VexFloatPredicate = .unord_q;
+    /// Not-equal (unordered, non-signaling)
+    pub const neq: VexFloatPredicate = .neq_uq;
+    /// Not-less-than (unordered, signaling)
+    pub const nlt: VexFloatPredicate = .nlt_us;
+    /// Not-less-than-or-equal (unordered, signaling)
+    pub const nle: VexFloatPredicate = .nle_us;
+    /// Ordered (non-signaling)
+    pub const ord: VexFloatPredicate = .ord_q;
+    /// Not-greater-than-or-equal (unordered, signaling)
+    pub const nge: VexFloatPredicate = .nge_us;
+    /// Not-greater-than (unordered, signaling)
+    pub const ngt: VexFloatPredicate = .ngt_us;
+    /// False (ordered, non-signaling)
+    pub const @"false": VexFloatPredicate = .false_oq;
+    /// Greater-than-or-equal (ordered, signaling)
+    pub const ge: VexFloatPredicate = .ge_os;
+    /// Greater-than (ordered, signaling)
+    pub const gt: VexFloatPredicate = .gt_os;
+    /// True (unordered, non-signaling)
+    pub const @"true": VexFloatPredicate = .true_uq;
+
+    pub fn imm(pred: VexFloatPredicate) Immediate {
+        return .u(@intFromEnum(pred));
+    }
+};
+
 pub const Register = enum(u8) {
    // zig fmt: off
    rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
--- a/test/behavior/maximum_minimum.zig
+++ b/test/behavior/maximum_minimum.zig
@ -121,8 +121,8 @@ test "@min/max for floats" {
    if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
    if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
    if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;

    const S = struct {
        fn doTheTest(comptime T: type) !void {
--- a/test/behavior/x86_64/build.zig
+++ b/test/behavior/x86_64/build.zig
@ -35,6 +35,12 @@ pub fn build(b: *std.Build) void {
                .sse2,
            }),
        },
+        .{
+            .cpu_arch = .x86_64,
+            .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
+            .cpu_features_add = std.Target.x86.featureSet(&.{.sahf}),
+            .cpu_features_sub = std.Target.x86.featureSet(&.{.cmov}),
+        },
        //.{
        //    .cpu_arch = .x86_64,
        //    .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 },
--- a/test/behavior/x86_64/math.zig
+++ b/test/behavior/x86_64/math.zig