stage2: implement @setFloatMode

2025-12-07 22:34:28 +00:00 · 2022-07-21 14:40:00 +03:00 · 2022-07-21 14:40:00 +03:00 · d75fa86d70
commit d75fa86d70
parent 585c160c20
14 changed files with 494 additions and 122 deletions
--- a/lib/compiler_rt/int_to_float_test.zig
+++ b/lib/compiler_rt/int_to_float_test.zig
@ -813,6 +813,7 @@ test "conversion to f32" {
 test "conversion to f80" {
    if (builtin.zig_backend == .stage1 and builtin.cpu.arch != .x86_64)
        return error.SkipZigTest; // https://github.com/ziglang/zig/issues/11408
    if (std.debug.runtime_safety) return error.SkipZigTest;
    const intToFloat = @import("./int_to_float.zig").intToFloat;
--- a/src/Air.zig
+++ b/src/Air.zig
@ -38,11 +38,15 @@ pub const Inst = struct {
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        add,
        /// Same as `add` with optimized float mode.
        add_optimized,
        /// Integer addition. Wrapping is defined to be twos complement wrapping.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        addwrap,
        /// Same as `addwrap` with optimized float mode.
        addwrap_optimized,
        /// Saturating integer addition.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
@ -53,11 +57,15 @@ pub const Inst = struct {
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        sub,
        /// Same as `sub` with optimized float mode.
        sub_optimized,
        /// Integer subtraction. Wrapping is defined to be twos complement wrapping.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        subwrap,
        /// Same as `sub` with optimized float mode.
        subwrap_optimized,
        /// Saturating integer subtraction.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
@ -68,11 +76,15 @@ pub const Inst = struct {
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        mul,
        /// Same as `mul` with optimized float mode.
        mul_optimized,
        /// Integer multiplication. Wrapping is defined to be twos complement wrapping.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        mulwrap,
        /// Same as `mulwrap` with optimized float mode.
        mulwrap_optimized,
        /// Saturating integer multiplication.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
@ -83,32 +95,44 @@ pub const Inst = struct {
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        div_float,
        /// Same as `div_float` with optimized float mode.
        div_float_optimized,
        /// Truncating integer or float division. For integers, wrapping is undefined behavior.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        div_trunc,
        /// Same as `div_trunc` with optimized float mode.
        div_trunc_optimized,
        /// Flooring integer or float division. For integers, wrapping is undefined behavior.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        div_floor,
        /// Same as `div_floor` with optimized float mode.
        div_floor_optimized,
        /// Integer or float division. Guaranteed no remainder.
        /// For integers, wrapping is undefined behavior.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        div_exact,
        /// Same as `div_exact` with optimized float mode.
        div_exact_optimized,
        /// Integer or float remainder division.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        rem,
        /// Same as `rem` with optimized float mode.
        rem_optimized,
        /// Integer or float modulus division.
        /// Both operands are guaranteed to be the same type, and the result type
        /// is the same as both operands.
        /// Uses the `bin_op` field.
        mod,
        /// Same as `mod` with optimized float mode.
        mod_optimized,
        /// Add an offset to a pointer, returning a new pointer.
        /// The offset is in element type units, not bytes.
        /// Wrapping is undefined behavior.
@ -293,29 +317,45 @@ pub const Inst = struct {
        /// LHS of zero.
        /// Uses the `un_op` field.
        neg,
        /// Same as `neg` with optimized float mode.
        neg_optimized,
        /// `<`. Result type is always bool.
        /// Uses the `bin_op` field.
        cmp_lt,
        /// Same as `cmp_lt` with optimized float mode.
        cmp_lt_optimized,
        /// `<=`. Result type is always bool.
        /// Uses the `bin_op` field.
        cmp_lte,
        /// Same as `cmp_lte` with optimized float mode.
        cmp_lte_optimized,
        /// `==`. Result type is always bool.
        /// Uses the `bin_op` field.
        cmp_eq,
        /// Same as `cmp_eq` with optimized float mode.
        cmp_eq_optimized,
        /// `>=`. Result type is always bool.
        /// Uses the `bin_op` field.
        cmp_gte,
        /// Same as `cmp_gte` with optimized float mode.
        cmp_gte_optimized,
        /// `>`. Result type is always bool.
        /// Uses the `bin_op` field.
        cmp_gt,
        /// Same as `cmp_gt` with optimized float mode.
        cmp_gt_optimized,
        /// `!=`. Result type is always bool.
        /// Uses the `bin_op` field.
        cmp_neq,
        /// Same as `cmp_neq` with optimized float mode.
        cmp_neq_optimized,
        /// Conditional between two vectors.
        /// Result type is always a vector of bools.
        /// Uses the `ty_pl` field, payload is `VectorCmp`.
        cmp_vector,
        /// Same as `cmp_vector` with optimized float mode.
        cmp_vector_optimized,
        /// Conditional branch.
        /// Result type is always noreturn; no instructions in a block follow this one.
@ -553,6 +593,8 @@ pub const Inst = struct {
        /// Given a float operand, return the integer with the closest mathematical meaning.
        /// Uses the `ty_op` field.
        float_to_int,
        /// Same as `float_to_int` with optimized float mode.
        float_to_int_optimized,
        /// Given an integer operand, return the float with the closest mathematical meaning.
        /// Uses the `ty_op` field.
        int_to_float,
@ -564,6 +606,8 @@ pub const Inst = struct {
        ///  * min, max, add, mul => integer or float
        /// Uses the `reduce` field.
        reduce,
        /// Same as `reduce` with optimized float mode.
        reduce_optimized,
        /// Given an integer, bool, float, or pointer operand, return a vector with all elements
        /// equal to the scalar value.
        /// Uses the `ty_op` field.
@ -676,25 +720,25 @@ pub const Inst = struct {
        /// Sets the operand as the current error return trace,
        set_err_return_trace,
-        pub fn fromCmpOp(op: std.math.CompareOperator) Tag {
+        pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
-            return switch (op) {
+            switch (op) {
-                .lt => .cmp_lt,
+                .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
-                .lte => .cmp_lte,
+                .lte => return if (optimized) .cmp_lte_optimized else .cmp_lte,
-                .eq => .cmp_eq,
+                .eq => return if (optimized) .cmp_eq_optimized else .cmp_eq,
-                .gte => .cmp_gte,
+                .gte => return if (optimized) .cmp_gte_optimized else .cmp_gte,
-                .gt => .cmp_gt,
+                .gt => return if (optimized) .cmp_gt_optimized else .cmp_gt,
-                .neq => .cmp_neq,
+                .neq => return if (optimized) .cmp_neq_optimized else .cmp_neq,
-            };
+            }
        }
        pub fn toCmpOp(tag: Tag) ?std.math.CompareOperator {
            return switch (tag) {
-                .cmp_lt => .lt,
+                .cmp_lt, .cmp_lt_optimized => .lt,
-                .cmp_lte => .lte,
+                .cmp_lte, .cmp_lte_optimized => .lte,
-                .cmp_eq => .eq,
+                .cmp_eq, .cmp_eq_optimized => .eq,
-                .cmp_gte => .gte,
+                .cmp_gte, .cmp_gte_optimized => .gte,
-                .cmp_gt => .gt,
+                .cmp_gt, .cmp_gt_optimized => .gt,
-                .cmp_neq => .neq,
+                .cmp_neq, .cmp_neq_optimized => .neq,
                else => null,
            };
        }
@ -959,6 +1003,18 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
        .max,
        .bool_and,
        .bool_or,
        .add_optimized,
        .addwrap_optimized,
        .sub_optimized,
        .subwrap_optimized,
        .mul_optimized,
        .mulwrap_optimized,
        .div_float_optimized,
        .div_trunc_optimized,
        .div_floor_optimized,
        .div_exact_optimized,
        .rem_optimized,
        .mod_optimized,
        => return air.typeOf(datas[inst].bin_op.lhs),
        .sqrt,
@ -976,6 +1032,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
        .round,
        .trunc_float,
        .neg,
        .neg_optimized,
        => return air.typeOf(datas[inst].un_op),
        .cmp_lt,
@ -984,6 +1041,12 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
        .cmp_gte,
        .cmp_gt,
        .cmp_neq,
        .cmp_lt_optimized,
        .cmp_lte_optimized,
        .cmp_eq_optimized,
        .cmp_gte_optimized,
        .cmp_gt_optimized,
        .cmp_neq_optimized,
        .cmp_lt_errors_len,
        .is_null,
        .is_non_null,
@ -1018,6 +1081,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
        .union_init,
        .field_parent_ptr,
        .cmp_vector,
        .cmp_vector_optimized,
        .add_with_overflow,
        .sub_with_overflow,
        .mul_with_overflow,
@ -1054,6 +1118,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
        .struct_field_ptr_index_3,
        .array_to_slice,
        .float_to_int,
        .float_to_int_optimized,
        .int_to_float,
        .splat,
        .get_union_tag,
@ -1129,7 +1194,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
            return ptr_ty.elemType();
        },
-        .reduce => return air.typeOf(datas[inst].reduce.operand).childType(),
+        .reduce, .reduce_optimized => return air.typeOf(datas[inst].reduce.operand).childType(),
        .mul_add => return air.typeOf(datas[inst].pl_op.operand),
        .select => {
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@ -173,6 +173,25 @@ pub fn categorizeOperand(
        .shr_exact,
        .min,
        .max,
        .add_optimized,
        .addwrap_optimized,
        .sub_optimized,
        .subwrap_optimized,
        .mul_optimized,
        .mulwrap_optimized,
        .div_float_optimized,
        .div_trunc_optimized,
        .div_floor_optimized,
        .div_exact_optimized,
        .rem_optimized,
        .mod_optimized,
        .neg_optimized,
        .cmp_lt_optimized,
        .cmp_lte_optimized,
        .cmp_eq_optimized,
        .cmp_gte_optimized,
        .cmp_gt_optimized,
        .cmp_neq_optimized,
        => {
            const o = air_datas[inst].bin_op;
            if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
@ -239,6 +258,7 @@ pub fn categorizeOperand(
        .struct_field_ptr_index_3,
        .array_to_slice,
        .float_to_int,
        .float_to_int_optimized,
        .int_to_float,
        .get_union_tag,
        .clz,
@ -381,12 +401,12 @@ pub fn categorizeOperand(
            if (extra.b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
            return .none;
        },
-        .reduce => {
+        .reduce, .reduce_optimized => {
            const reduce = air_datas[inst].reduce;
            if (reduce.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
            return .none;
        },
-        .cmp_vector => {
+        .cmp_vector, .cmp_vector_optimized => {
            const extra = air.extraData(Air.VectorCmp, air_datas[inst].ty_pl.payload).data;
            if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
            if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
@ -701,29 +721,47 @@ fn analyzeInst(
    switch (inst_tags[inst]) {
        .add,
        .add_optimized,
        .addwrap,
        .addwrap_optimized,
        .add_sat,
        .sub,
        .sub_optimized,
        .subwrap,
        .subwrap_optimized,
        .sub_sat,
        .mul,
        .mul_optimized,
        .mulwrap,
        .mulwrap_optimized,
        .mul_sat,
        .div_float,
        .div_float_optimized,
        .div_trunc,
        .div_trunc_optimized,
        .div_floor,
        .div_floor_optimized,
        .div_exact,
        .div_exact_optimized,
        .rem,
        .rem_optimized,
        .mod,
        .mod_optimized,
        .bit_and,
        .bit_or,
        .xor,
        .cmp_lt,
        .cmp_lt_optimized,
        .cmp_lte,
        .cmp_lte_optimized,
        .cmp_eq,
        .cmp_eq_optimized,
        .cmp_gte,
        .cmp_gte_optimized,
        .cmp_gt,
        .cmp_gt_optimized,
        .cmp_neq,
        .cmp_neq_optimized,
        .bool_and,
        .bool_or,
        .store,
@ -794,6 +832,7 @@ fn analyzeInst(
        .struct_field_ptr_index_3,
        .array_to_slice,
        .float_to_int,
        .float_to_int_optimized,
        .int_to_float,
        .get_union_tag,
        .clz,
@ -836,6 +875,7 @@ fn analyzeInst(
        .round,
        .trunc_float,
        .neg,
        .neg_optimized,
        .cmp_lt_errors_len,
        .set_err_return_trace,
        => {
@ -903,11 +943,11 @@ fn analyzeInst(
            const extra = a.air.extraData(Air.Shuffle, inst_datas[inst].ty_pl.payload).data;
            return trackOperands(a, new_set, inst, main_tomb, .{ extra.a, extra.b, .none });
        },
-        .reduce => {
+        .reduce, .reduce_optimized => {
            const reduce = inst_datas[inst].reduce;
            return trackOperands(a, new_set, inst, main_tomb, .{ reduce.operand, .none, .none });
        },
-        .cmp_vector => {
+        .cmp_vector, .cmp_vector_optimized => {
            const extra = a.air.extraData(Air.VectorCmp, inst_datas[inst].ty_pl.payload).data;
            return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, .none });
        },
--- a/src/Sema.zig
+++ b/src/Sema.zig
@ -144,6 +144,9 @@ pub const Block = struct {
    /// when null, it is determined by build mode, changed by @setRuntimeSafety
    want_safety: ?bool = null,
    /// What mode to generate float operations in, set by @setFloatMode
    float_mode: std.builtin.FloatMode = .Strict,
    c_import_buf: ?*std.ArrayList(u8) = null,
    /// type of `err` in `else => |err|`
@ -206,6 +209,7 @@ pub const Block = struct {
            .runtime_loop = parent.runtime_loop,
            .runtime_index = parent.runtime_index,
            .want_safety = parent.want_safety,
            .float_mode = parent.float_mode,
            .c_import_buf = parent.c_import_buf,
            .switch_else_err_ty = parent.switch_else_err_ty,
        };
@ -414,7 +418,7 @@ pub const Block = struct {
    fn addCmpVector(block: *Block, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref, cmp_op: std.math.CompareOperator, vector_ty: Air.Inst.Ref) !Air.Inst.Ref {
        return block.addInst(.{
-            .tag = .cmp_vector,
+            .tag = if (block.float_mode == .Optimized) .cmp_vector_optimized else .cmp_vector,
            .data = .{ .ty_pl = .{
                .ty = vector_ty,
                .payload = try block.sema.addExtra(Air.VectorCmp{
@ -714,10 +718,10 @@ fn analyzeBodyInner(
            .closure_get                  => try sema.zirClosureGet(block, inst),
            .cmp_lt                       => try sema.zirCmp(block, inst, .lt),
            .cmp_lte                      => try sema.zirCmp(block, inst, .lte),
-            .cmp_eq                       => try sema.zirCmpEq(block, inst, .eq, .cmp_eq),
+            .cmp_eq                       => try sema.zirCmpEq(block, inst, .eq, Air.Inst.Tag.fromCmpOp(.eq, block.float_mode == .Optimized)),
            .cmp_gte                      => try sema.zirCmp(block, inst, .gte),
            .cmp_gt                       => try sema.zirCmp(block, inst, .gt),
-            .cmp_neq                      => try sema.zirCmpEq(block, inst, .neq, .cmp_neq),
+            .cmp_neq                      => try sema.zirCmpEq(block, inst, .neq, Air.Inst.Tag.fromCmpOp(.neq, block.float_mode == .Optimized)),
            .coerce_result_ptr            => try sema.zirCoerceResultPtr(block, inst),
            .decl_ref                     => try sema.zirDeclRef(block, inst),
            .decl_val                     => try sema.zirDeclVal(block, inst),
@ -4705,6 +4709,7 @@ fn zirBlock(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileErro
        .inlining = parent_block.inlining,
        .is_comptime = parent_block.is_comptime,
        .want_safety = parent_block.want_safety,
        .float_mode = parent_block.float_mode,
    };
    defer child_block.instructions.deinit(gpa);
@ -5042,13 +5047,7 @@ fn zirSetCold(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!voi
 fn zirSetFloatMode(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!void {
    const extra = sema.code.extraData(Zir.Inst.UnNode, extended.operand).data;
    const src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node };
-    const float_mode = try sema.resolveBuiltinEnum(block, src, extra.operand, "FloatMode", "operand to @setFloatMode must be comptime known");
+    block.float_mode = try sema.resolveBuiltinEnum(block, src, extra.operand, "FloatMode", "operand to @setFloatMode must be comptime known");
    switch (float_mode) {
        .Strict => return,
        .Optimized => {
            // TODO implement optimized float mode
        },
    }
 }
 fn zirSetRuntimeSafety(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void {
@ -8092,7 +8091,7 @@ fn intCast(
                const ok = if (is_vector) ok: {
                    const is_in_range = try block.addCmpVector(diff_unsigned, dest_range, .lte, try sema.addType(operand_ty));
                    const all_in_range = try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                        .data = .{ .reduce = .{
                            .operand = is_in_range,
                            .operation = .And,
@ -8109,7 +8108,7 @@ fn intCast(
                const ok = if (is_vector) ok: {
                    const is_in_range = try block.addCmpVector(diff, dest_max, .lte, try sema.addType(operand_ty));
                    const all_in_range = try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                        .data = .{ .reduce = .{
                            .operand = is_in_range,
                            .operation = .And,
@ -8130,7 +8129,7 @@ fn intCast(
                const zero_inst = try sema.addConstant(operand_ty, zero_val);
                const is_in_range = try block.addCmpVector(operand, zero_inst, .gte, try sema.addType(operand_ty));
                const all_in_range = try block.addInst(.{
-                    .tag = .reduce,
+                    .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                    .data = .{ .reduce = .{
                        .operand = is_in_range,
                        .operation = .And,
@ -9391,7 +9390,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError
        } else {
            for (items) |item_ref| {
                const item = try sema.resolveInst(item_ref);
-                const cmp_ok = try case_block.addBinOp(.cmp_eq, operand, item);
+                const cmp_ok = try case_block.addBinOp(if (case_block.float_mode == .Optimized) .cmp_eq_optimized else .cmp_eq, operand, item);
                if (any_ok != .none) {
                    any_ok = try case_block.addBinOp(.bool_or, any_ok, cmp_ok);
                } else {
@ -9411,12 +9410,12 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError
                // operand >= first and operand <= last
                const range_first_ok = try case_block.addBinOp(
-                    .cmp_gte,
+                    if (case_block.float_mode == .Optimized) .cmp_gte_optimized else .cmp_gte,
                    operand,
                    item_first,
                );
                const range_last_ok = try case_block.addBinOp(
-                    .cmp_lte,
+                    if (case_block.float_mode == .Optimized) .cmp_lte_optimized else .cmp_lte,
                    operand,
                    item_last,
                );
@ -10023,7 +10022,7 @@ fn zirShl(
        const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
        const any_ov_bit = if (lhs_ty.zigTypeTag() == .Vector)
            try block.addInst(.{
-                .tag = .reduce,
+                .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                .data = .{ .reduce = .{
                    .operand = ov_bit,
                    .operation = .Or,
@ -10120,7 +10119,7 @@ fn zirShr(
        const ok = if (rhs_ty.zigTypeTag() == .Vector) ok: {
            const eql = try block.addCmpVector(lhs, back, .eq, try sema.addType(rhs_ty));
            break :ok try block.addInst(.{
-                .tag = .reduce,
+                .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                .data = .{ .reduce = .{
                    .operand = eql,
                    .operation = .And,
@ -10719,7 +10718,7 @@ fn zirNegate(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
            return sema.addConstant(rhs_ty, try rhs_val.floatNeg(rhs_ty, sema.arena, target));
        }
        try sema.requireRuntimeBlock(block, src, null);
-        return block.addUnOp(.neg, rhs);
+        return block.addUnOp(if (block.float_mode == .Optimized) .neg_optimized else .neg, rhs);
    }
    const lhs = if (rhs_ty.zigTypeTag() == .Vector)
@ -11078,6 +11077,7 @@ fn analyzeArithmetic(
                        return casted_lhs;
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .add_optimized else .add;
                if (maybe_lhs_val) |lhs_val| {
                    if (lhs_val.isUndef()) {
                        if (is_int) {
@ -11100,8 +11100,8 @@ fn analyzeArithmetic(
                                try sema.floatAdd(lhs_val, rhs_val, resolved_type),
                            );
                        }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .add };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = lhs_src, .air_tag = .add };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
            },
            .addwrap => {
                // Integers only; floats are checked above.
@ -11112,6 +11112,7 @@ fn analyzeArithmetic(
                        return casted_rhs;
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .addwrap_optimized else .addwrap;
                if (maybe_rhs_val) |rhs_val| {
                    if (rhs_val.isUndef()) {
                        return sema.addConstUndef(resolved_type);
@ -11124,8 +11125,8 @@ fn analyzeArithmetic(
                            resolved_type,
                            try sema.numberAddWrap(block, src, lhs_val, rhs_val, resolved_type),
                        );
-                    } else break :rs .{ .src = lhs_src, .air_tag = .addwrap };
+                    } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = rhs_src, .air_tag = .addwrap };
+                } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
            },
            .add_sat => {
                // Integers only; floats are checked above.
@ -11173,6 +11174,7 @@ fn analyzeArithmetic(
                        return casted_lhs;
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .sub_optimized else .sub;
                if (maybe_lhs_val) |lhs_val| {
                    if (lhs_val.isUndef()) {
                        if (is_int) {
@ -11195,8 +11197,8 @@ fn analyzeArithmetic(
                                try sema.floatSub(lhs_val, rhs_val, resolved_type),
                            );
                        }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .sub };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = lhs_src, .air_tag = .sub };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
            },
            .subwrap => {
                // Integers only; floats are checked above.
@ -11210,6 +11212,7 @@ fn analyzeArithmetic(
                        return casted_lhs;
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .subwrap_optimized else .subwrap;
                if (maybe_lhs_val) |lhs_val| {
                    if (lhs_val.isUndef()) {
                        return sema.addConstUndef(resolved_type);
@ -11219,8 +11222,8 @@ fn analyzeArithmetic(
                            resolved_type,
                            try sema.numberSubWrap(block, src, lhs_val, rhs_val, resolved_type),
                        );
-                    } else break :rs .{ .src = rhs_src, .air_tag = .subwrap };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = lhs_src, .air_tag = .subwrap };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
            },
            .sub_sat => {
                // Integers only; floats are checked above.
@ -11327,14 +11330,14 @@ fn analyzeArithmetic(
                        if (is_int) {
                            break :rs .{ .src = rhs_src, .air_tag = .div_trunc };
                        } else {
-                            break :rs .{ .src = rhs_src, .air_tag = .div_float };
+                            break :rs .{ .src = rhs_src, .air_tag = if (block.float_mode == .Optimized) .div_float_optimized else .div_float };
                        }
                    }
                } else {
                    if (is_int) {
                        break :rs .{ .src = lhs_src, .air_tag = .div_trunc };
                    } else {
-                        break :rs .{ .src = lhs_src, .air_tag = .div_float };
+                        break :rs .{ .src = lhs_src, .air_tag = if (block.float_mode == .Optimized) .div_float_optimized else .div_float };
                    }
                }
            },
@ -11373,6 +11376,7 @@ fn analyzeArithmetic(
                        return sema.failWithDivideByZero(block, rhs_src);
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .div_trunc_optimized else .div_trunc;
                if (maybe_lhs_val) |lhs_val| {
                    if (lhs_val.isUndef()) {
                        if (lhs_scalar_ty.isSignedInt() and rhs_scalar_ty.isSignedInt()) {
@ -11398,8 +11402,8 @@ fn analyzeArithmetic(
                                try lhs_val.floatDivTrunc(rhs_val, resolved_type, sema.arena, target),
                            );
                        }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .div_trunc };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = lhs_src, .air_tag = .div_trunc };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
            },
            .div_floor => {
                // For integers:
@ -11436,6 +11440,7 @@ fn analyzeArithmetic(
                        return sema.failWithDivideByZero(block, rhs_src);
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .div_floor_optimized else .div_floor;
                if (maybe_lhs_val) |lhs_val| {
                    if (lhs_val.isUndef()) {
                        if (lhs_scalar_ty.isSignedInt() and rhs_scalar_ty.isSignedInt()) {
@ -11461,8 +11466,8 @@ fn analyzeArithmetic(
                                try lhs_val.floatDivFloor(rhs_val, resolved_type, sema.arena, target),
                            );
                        }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .div_floor };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = lhs_src, .air_tag = .div_floor };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
            },
            .div_exact => {
                // For integers:
@ -11498,6 +11503,7 @@ fn analyzeArithmetic(
                        return sema.failWithDivideByZero(block, rhs_src);
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .div_exact_optimized else .div_exact;
                if (maybe_lhs_val) |lhs_val| {
                    if (maybe_rhs_val) |rhs_val| {
                        if (is_int) {
@ -11513,8 +11519,8 @@ fn analyzeArithmetic(
                                try lhs_val.floatDiv(rhs_val, resolved_type, sema.arena, target),
                            );
                        }
-                    } else break :rs .{ .src = rhs_src, .air_tag = .div_exact };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = lhs_src, .air_tag = .div_exact };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
            },
            .mul => {
                // For integers:
@ -11535,6 +11541,7 @@ fn analyzeArithmetic(
                        }
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .mul_optimized else .mul;
                if (maybe_rhs_val) |rhs_val| {
                    if (rhs_val.isUndef()) {
                        if (is_int) {
@ -11570,8 +11577,8 @@ fn analyzeArithmetic(
                                try lhs_val.floatMul(rhs_val, resolved_type, sema.arena, target),
                            );
                        }
-                    } else break :rs .{ .src = lhs_src, .air_tag = .mul };
+                    } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = rhs_src, .air_tag = .mul };
+                } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
            },
            .mulwrap => {
                // Integers only; floats are handled above.
@ -11588,6 +11595,7 @@ fn analyzeArithmetic(
                        }
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .mulwrap_optimized else .mulwrap;
                if (maybe_rhs_val) |rhs_val| {
                    if (rhs_val.isUndef()) {
                        return sema.addConstUndef(resolved_type);
@ -11606,8 +11614,8 @@ fn analyzeArithmetic(
                            resolved_type,
                            try lhs_val.numberMulWrap(rhs_val, resolved_type, sema.arena, target),
                        );
-                    } else break :rs .{ .src = lhs_src, .air_tag = .mulwrap };
+                    } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = rhs_src, .air_tag = .mulwrap };
+                } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
            },
            .mul_sat => {
                // Integers only; floats are checked above.
@ -11777,6 +11785,7 @@ fn analyzeArithmetic(
                        return sema.failWithDivideByZero(block, rhs_src);
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .rem_optimized else .rem;
                if (maybe_lhs_val) |lhs_val| {
                    if (lhs_val.isUndef()) {
                        return sema.addConstUndef(resolved_type);
@ -11786,8 +11795,8 @@ fn analyzeArithmetic(
                            resolved_type,
                            try lhs_val.floatRem(rhs_val, resolved_type, sema.arena, target),
                        );
-                    } else break :rs .{ .src = rhs_src, .air_tag = .rem };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = lhs_src, .air_tag = .rem };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
            },
            .mod => {
                // For integers:
@ -11834,6 +11843,7 @@ fn analyzeArithmetic(
                        return sema.failWithDivideByZero(block, rhs_src);
                    }
                }
                const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .mod_optimized else .mod;
                if (maybe_lhs_val) |lhs_val| {
                    if (lhs_val.isUndef()) {
                        return sema.addConstUndef(resolved_type);
@ -11843,8 +11853,8 @@ fn analyzeArithmetic(
                            resolved_type,
                            try lhs_val.floatMod(rhs_val, resolved_type, sema.arena, target),
                        );
-                    } else break :rs .{ .src = rhs_src, .air_tag = .mod };
+                    } else break :rs .{ .src = rhs_src, .air_tag = air_tag };
-                } else break :rs .{ .src = lhs_src, .air_tag = .mod };
+                } else break :rs .{ .src = lhs_src, .air_tag = air_tag };
            },
            else => unreachable,
        }
@ -11874,7 +11884,7 @@ fn analyzeArithmetic(
                const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
                const any_ov_bit = if (resolved_type.zigTypeTag() == .Vector)
                    try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                        .data = .{ .reduce = .{
                            .operand = ov_bit,
                            .operation = .Or,
@ -11890,13 +11900,17 @@ fn analyzeArithmetic(
            }
        }
        switch (rs.air_tag) {
-            .div_float, .div_exact, .div_trunc, .div_floor => {
+            // zig fmt: off
            .div_float, .div_exact, .div_trunc, .div_floor, .div_float_optimized,
            .div_exact_optimized, .div_trunc_optimized, .div_floor_optimized
            // zig fmt: on
            => if (scalar_tag == .Int or block.float_mode == .Optimized) {
                const ok = if (resolved_type.zigTypeTag() == .Vector) ok: {
                    const zero_val = try Value.Tag.repeated.create(sema.arena, Value.zero);
                    const zero = try sema.addConstant(sema.typeOf(casted_rhs), zero_val);
                    const ok = try block.addCmpVector(casted_rhs, zero, .neq, try sema.addType(resolved_type));
                    break :ok try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                        .data = .{ .reduce = .{
                            .operand = ok,
                            .operation = .And,
@ -11904,17 +11918,17 @@ fn analyzeArithmetic(
                    });
                } else ok: {
                    const zero = try sema.addConstant(sema.typeOf(casted_rhs), Value.zero);
-                    break :ok try block.addBinOp(.cmp_neq, casted_rhs, zero);
+                    break :ok try block.addBinOp(if (block.float_mode == .Optimized) .cmp_neq_optimized else .cmp_neq, casted_rhs, zero);
                };
                try sema.addSafetyCheck(block, ok, .divide_by_zero);
            },
-            .rem, .mod => {
+            .rem, .mod, .rem_optimized, .mod_optimized => {
                const ok = if (resolved_type.zigTypeTag() == .Vector) ok: {
                    const zero_val = try Value.Tag.repeated.create(sema.arena, Value.zero);
                    const zero = try sema.addConstant(sema.typeOf(casted_rhs), zero_val);
                    const ok = try block.addCmpVector(casted_rhs, zero, if (scalar_tag == .Int) .gt else .neq, try sema.addType(resolved_type));
                    break :ok try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                        .data = .{ .reduce = .{
                            .operand = ok,
                            .operation = .And,
@ -11922,13 +11936,19 @@ fn analyzeArithmetic(
                    });
                } else ok: {
                    const zero = try sema.addConstant(sema.typeOf(casted_rhs), Value.zero);
-                    break :ok try block.addBinOp(if (scalar_tag == .Int) .cmp_gt else .cmp_neq, casted_rhs, zero);
+                    const air_tag = if (scalar_tag == .Int)
                        Air.Inst.Tag.cmp_gt
                    else if (block.float_mode == .Optimized)
                        Air.Inst.Tag.cmp_neq_optimized
                    else
                        Air.Inst.Tag.cmp_neq;
                    break :ok try block.addBinOp(air_tag, casted_rhs, zero);
                };
                try sema.addSafetyCheck(block, ok, .remainder_division_zero_negative);
            },
            else => {},
        }
-        if (rs.air_tag == .div_exact) {
+        if (rs.air_tag == .div_exact or rs.air_tag == .div_exact_optimized) {
            const result = try block.addBinOp(.div_exact, casted_lhs, casted_rhs);
            const ok = if (scalar_tag == .Float) ok: {
                const floored = try block.addUnOp(.floor, result);
@ -11936,14 +11956,14 @@ fn analyzeArithmetic(
                if (resolved_type.zigTypeTag() == .Vector) {
                    const eql = try block.addCmpVector(result, floored, .eq, try sema.addType(resolved_type));
                    break :ok try block.addInst(.{
-                        .tag = .reduce,
+                        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
                        .data = .{ .reduce = .{
                            .operand = eql,
                            .operation = .And,
                        } },
                    });
                } else {
-                    const is_in_range = try block.addBinOp(.cmp_eq, result, floored);
+                    const is_in_range = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_eq_optimized else .cmp_eq, result, floored);
                    break :ok is_in_range;
                }
            } else ok: {
@ -11962,7 +11982,7 @@ fn analyzeArithmetic(
                    });
                } else {
                    const zero = try sema.addConstant(sema.typeOf(casted_rhs), Value.zero);
-                    const is_in_range = try block.addBinOp(.cmp_eq, remainder, zero);
+                    const is_in_range = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_eq_optimized else .cmp_eq, remainder, zero);
                    break :ok is_in_range;
                }
            };
@ -12476,7 +12496,7 @@ fn cmpSelf(
        const result_ty_ref = try sema.addType(result_ty);
        return block.addCmpVector(casted_lhs, casted_rhs, op, result_ty_ref);
    }
-    const tag = Air.Inst.Tag.fromCmpOp(op);
+    const tag = Air.Inst.Tag.fromCmpOp(op, block.float_mode == .Optimized);
    return block.addBinOp(tag, casted_lhs, casted_rhs);
 }
@ -15954,12 +15974,12 @@ fn zirFloatToInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!
    }
    try sema.requireRuntimeBlock(block, inst_data.src(), operand_src);
-    const result = try block.addTyOp(.float_to_int, dest_ty, operand);
+    const result = try block.addTyOp(if (block.float_mode == .Optimized) .float_to_int_optimized else .float_to_int, dest_ty, operand);
    if (block.wantSafety()) {
        const back = try block.addTyOp(.int_to_float, operand_ty, result);
        const diff = try block.addBinOp(.sub, operand, back);
-        const ok_pos = try block.addBinOp(.cmp_lt, diff, try sema.addConstant(operand_ty, Value.one));
+        const ok_pos = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_lt_optimized else .cmp_lt, diff, try sema.addConstant(operand_ty, Value.one));
-        const ok_neg = try block.addBinOp(.cmp_gt, diff, try sema.addConstant(operand_ty, Value.negative_one));
+        const ok_neg = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_gt_optimized else .cmp_gt, diff, try sema.addConstant(operand_ty, Value.negative_one));
        const ok = try block.addBinOp(.bool_and, ok_pos, ok_neg);
        try sema.addSafetyCheck(block, ok, .integer_part_out_of_bounds);
    }
@ -17194,7 +17214,7 @@ fn zirReduce(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
    try sema.requireRuntimeBlock(block, inst_data.src(), operand_src);
    return block.addInst(.{
-        .tag = .reduce,
+        .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce,
        .data = .{ .reduce = .{
            .operand = operand,
            .operation = operation,
@ -24489,7 +24509,7 @@ fn cmpNumeric(
        };
        const casted_lhs = try sema.coerce(block, dest_ty, lhs, lhs_src);
        const casted_rhs = try sema.coerce(block, dest_ty, rhs, rhs_src);
-        return block.addBinOp(Air.Inst.Tag.fromCmpOp(op), casted_lhs, casted_rhs);
+        return block.addBinOp(Air.Inst.Tag.fromCmpOp(op, block.float_mode == .Optimized), casted_lhs, casted_rhs);
    }
    // For mixed unsigned integer sizes, implicit cast both operands to the larger integer.
    // For mixed signed and unsigned integers, implicit cast both operands to a signed
@ -24610,7 +24630,7 @@ fn cmpNumeric(
    const casted_lhs = try sema.coerce(block, dest_ty, lhs, lhs_src);
    const casted_rhs = try sema.coerce(block, dest_ty, rhs, rhs_src);
-    return block.addBinOp(Air.Inst.Tag.fromCmpOp(op), casted_lhs, casted_rhs);
+    return block.addBinOp(Air.Inst.Tag.fromCmpOp(op, block.float_mode == .Optimized), casted_lhs, casted_rhs);
 }
 /// Asserts that lhs and rhs types are both vectors.
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@ -729,6 +729,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
            .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
            .add_optimized,
            .addwrap_optimized,
            .sub_optimized,
            .subwrap_optimized,
            .mul_optimized,
            .mulwrap_optimized,
            .div_float_optimized,
            .div_trunc_optimized,
            .div_floor_optimized,
            .div_exact_optimized,
            .rem_optimized,
            .mod_optimized,
            .neg_optimized,
            .cmp_lt_optimized,
            .cmp_lte_optimized,
            .cmp_eq_optimized,
            .cmp_gte_optimized,
            .cmp_gt_optimized,
            .cmp_neq_optimized,
            .cmp_vector_optimized,
            .reduce_optimized,
            .float_to_int_optimized,
            => return self.fail("TODO implement optimized float mode", .{}),
            .wasm_memory_size => unreachable,
            .wasm_memory_grow => unreachable,
            // zig fmt: on
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@ -744,6 +744,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
            .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
            .add_optimized,
            .addwrap_optimized,
            .sub_optimized,
            .subwrap_optimized,
            .mul_optimized,
            .mulwrap_optimized,
            .div_float_optimized,
            .div_trunc_optimized,
            .div_floor_optimized,
            .div_exact_optimized,
            .rem_optimized,
            .mod_optimized,
            .neg_optimized,
            .cmp_lt_optimized,
            .cmp_lte_optimized,
            .cmp_eq_optimized,
            .cmp_gte_optimized,
            .cmp_gt_optimized,
            .cmp_neq_optimized,
            .cmp_vector_optimized,
            .reduce_optimized,
            .float_to_int_optimized,
            => return self.fail("TODO implement optimized float mode", .{}),
            .wasm_memory_size => unreachable,
            .wasm_memory_grow => unreachable,
            // zig fmt: on
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@ -669,6 +669,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
            .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
            .add_optimized,
            .addwrap_optimized,
            .sub_optimized,
            .subwrap_optimized,
            .mul_optimized,
            .mulwrap_optimized,
            .div_float_optimized,
            .div_trunc_optimized,
            .div_floor_optimized,
            .div_exact_optimized,
            .rem_optimized,
            .mod_optimized,
            .neg_optimized,
            .cmp_lt_optimized,
            .cmp_lte_optimized,
            .cmp_eq_optimized,
            .cmp_gte_optimized,
            .cmp_gt_optimized,
            .cmp_neq_optimized,
            .cmp_vector_optimized,
            .reduce_optimized,
            .float_to_int_optimized,
            => return self.fail("TODO implement optimized float mode", .{}),
            .wasm_memory_size => unreachable,
            .wasm_memory_grow => unreachable,
            // zig fmt: on
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@ -681,6 +681,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .wrap_errunion_payload => @panic("TODO try self.airWrapErrUnionPayload(inst)"),
            .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
            .add_optimized,
            .addwrap_optimized,
            .sub_optimized,
            .subwrap_optimized,
            .mul_optimized,
            .mulwrap_optimized,
            .div_float_optimized,
            .div_trunc_optimized,
            .div_floor_optimized,
            .div_exact_optimized,
            .rem_optimized,
            .mod_optimized,
            .neg_optimized,
            .cmp_lt_optimized,
            .cmp_lte_optimized,
            .cmp_eq_optimized,
            .cmp_gte_optimized,
            .cmp_gt_optimized,
            .cmp_neq_optimized,
            .cmp_vector_optimized,
            .reduce_optimized,
            .float_to_int_optimized,
            => @panic("TODO implement optimized float mode"),
            .wasm_memory_size => unreachable,
            .wasm_memory_grow => unreachable,
            // zig fmt: on
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@ -1622,6 +1622,30 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
        .err_return_trace,
        .set_err_return_trace,
        => |tag| return self.fail("TODO: Implement wasm inst: {s}", .{@tagName(tag)}),
        .add_optimized,
        .addwrap_optimized,
        .sub_optimized,
        .subwrap_optimized,
        .mul_optimized,
        .mulwrap_optimized,
        .div_float_optimized,
        .div_trunc_optimized,
        .div_floor_optimized,
        .div_exact_optimized,
        .rem_optimized,
        .mod_optimized,
        .neg_optimized,
        .cmp_lt_optimized,
        .cmp_lte_optimized,
        .cmp_eq_optimized,
        .cmp_gte_optimized,
        .cmp_gt_optimized,
        .cmp_neq_optimized,
        .cmp_vector_optimized,
        .reduce_optimized,
        .float_to_int_optimized,
        => return self.fail("TODO implement optimized float mode", .{}),
    };
 }
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@ -751,6 +751,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
            .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst),
            .wrap_errunion_err     => try self.airWrapErrUnionErr(inst),
            .add_optimized,
            .addwrap_optimized,
            .sub_optimized,
            .subwrap_optimized,
            .mul_optimized,
            .mulwrap_optimized,
            .div_float_optimized,
            .div_trunc_optimized,
            .div_floor_optimized,
            .div_exact_optimized,
            .rem_optimized,
            .mod_optimized,
            .neg_optimized,
            .cmp_lt_optimized,
            .cmp_lte_optimized,
            .cmp_eq_optimized,
            .cmp_gte_optimized,
            .cmp_gt_optimized,
            .cmp_neq_optimized,
            .cmp_vector_optimized,
            .reduce_optimized,
            .float_to_int_optimized,
            => return self.fail("TODO implement optimized float mode", .{}),
            .wasm_memory_size => unreachable,
            .wasm_memory_grow => unreachable,
            // zig fmt: on
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@ -1928,6 +1928,30 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
            .wasm_memory_size => try airWasmMemorySize(f, inst),
            .wasm_memory_grow => try airWasmMemoryGrow(f, inst),
            .add_optimized,
            .addwrap_optimized,
            .sub_optimized,
            .subwrap_optimized,
            .mul_optimized,
            .mulwrap_optimized,
            .div_float_optimized,
            .div_trunc_optimized,
            .div_floor_optimized,
            .div_exact_optimized,
            .rem_optimized,
            .mod_optimized,
            .neg_optimized,
            .cmp_lt_optimized,
            .cmp_lte_optimized,
            .cmp_eq_optimized,
            .cmp_gte_optimized,
            .cmp_gt_optimized,
            .cmp_neq_optimized,
            .cmp_vector_optimized,
            .reduce_optimized,
            .float_to_int_optimized,
            => return f.fail("TODO implement optimized float mode", .{}),
            // zig fmt: on
        };
        switch (result_value) {
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@ -3984,21 +3984,21 @@ pub const FuncGen = struct {
        for (body) |inst, i| {
            const opt_value: ?*const llvm.Value = switch (air_tags[inst]) {
                // zig fmt: off
-                .add       => try self.airAdd(inst),
+                .add       => try self.airAdd(inst, false),
-                .addwrap   => try self.airAddWrap(inst),
+                .addwrap   => try self.airAddWrap(inst, false),
                .add_sat   => try self.airAddSat(inst),
-                .sub       => try self.airSub(inst),
+                .sub       => try self.airSub(inst, false),
-                .subwrap   => try self.airSubWrap(inst),
+                .subwrap   => try self.airSubWrap(inst, false),
                .sub_sat   => try self.airSubSat(inst),
-                .mul       => try self.airMul(inst),
+                .mul       => try self.airMul(inst, false),
-                .mulwrap   => try self.airMulWrap(inst),
+                .mulwrap   => try self.airMulWrap(inst, false),
                .mul_sat   => try self.airMulSat(inst),
-                .div_float => try self.airDivFloat(inst),
+                .div_float => try self.airDivFloat(inst, false),
-                .div_trunc => try self.airDivTrunc(inst),
+                .div_trunc => try self.airDivTrunc(inst, false),
-                .div_floor => try self.airDivFloor(inst),
+                .div_floor => try self.airDivFloor(inst, false),
-                .div_exact => try self.airDivExact(inst),
+                .div_exact => try self.airDivExact(inst, false),
-                .rem       => try self.airRem(inst),
+                .rem       => try self.airRem(inst, false),
-                .mod       => try self.airMod(inst),
+                .mod       => try self.airMod(inst, false),
                .ptr_add   => try self.airPtrAdd(inst),
                .ptr_sub   => try self.airPtrSub(inst),
                .shl       => try self.airShl(inst),
@ -4009,6 +4009,19 @@ pub const FuncGen = struct {
                .slice     => try self.airSlice(inst),
                .mul_add   => try self.airMulAdd(inst),
                .add_optimized       => try self.airAdd(inst, true),
                .addwrap_optimized   => try self.airAddWrap(inst, true),
                .sub_optimized       => try self.airSub(inst, true),
                .subwrap_optimized   => try self.airSubWrap(inst, true),
                .mul_optimized       => try self.airMul(inst, true),
                .mulwrap_optimized   => try self.airMulWrap(inst, true),
                .div_float_optimized => try self.airDivFloat(inst, true),
                .div_trunc_optimized => try self.airDivTrunc(inst, true),
                .div_floor_optimized => try self.airDivFloor(inst, true),
                .div_exact_optimized => try self.airDivExact(inst, true),
                .rem_optimized       => try self.airRem(inst, true),
                .mod_optimized       => try self.airMod(inst, true),
                .add_with_overflow => try self.airOverflow(inst, "llvm.sadd.with.overflow", "llvm.uadd.with.overflow"),
                .sub_with_overflow => try self.airOverflow(inst, "llvm.ssub.with.overflow", "llvm.usub.with.overflow"),
                .mul_with_overflow => try self.airOverflow(inst, "llvm.smul.with.overflow", "llvm.umul.with.overflow"),
@ -4034,16 +4047,26 @@ pub const FuncGen = struct {
                .ceil         => try self.airUnaryOp(inst, .ceil),
                .round        => try self.airUnaryOp(inst, .round),
                .trunc_float  => try self.airUnaryOp(inst, .trunc),
                .neg          => try self.airUnaryOp(inst, .neg),
-                .cmp_eq  => try self.airCmp(inst, .eq),
+                .neg           => try self.airNeg(inst, false),
-                .cmp_gt  => try self.airCmp(inst, .gt),
+                .neg_optimized => try self.airNeg(inst, true),
                .cmp_gte => try self.airCmp(inst, .gte),
                .cmp_lt  => try self.airCmp(inst, .lt),
                .cmp_lte => try self.airCmp(inst, .lte),
                .cmp_neq => try self.airCmp(inst, .neq),
-                .cmp_vector => try self.airCmpVector(inst),
+                .cmp_eq  => try self.airCmp(inst, .eq, false),
                .cmp_gt  => try self.airCmp(inst, .gt, false),
                .cmp_gte => try self.airCmp(inst, .gte, false),
                .cmp_lt  => try self.airCmp(inst, .lt, false),
                .cmp_lte => try self.airCmp(inst, .lte, false),
                .cmp_neq => try self.airCmp(inst, .neq, false),
                .cmp_eq_optimized  => try self.airCmp(inst, .eq, true),
                .cmp_gt_optimized  => try self.airCmp(inst, .gt, true),
                .cmp_gte_optimized => try self.airCmp(inst, .gte, true),
                .cmp_lt_optimized  => try self.airCmp(inst, .lt, true),
                .cmp_lte_optimized => try self.airCmp(inst, .lte, true),
                .cmp_neq_optimized => try self.airCmp(inst, .neq, true),
                .cmp_vector           => try self.airCmpVector(inst, false),
                .cmp_vector_optimized => try self.airCmpVector(inst, true),
                .cmp_lt_errors_len    => try self.airCmpLtErrorsLen(inst),
                .is_non_null     => try self.airIsNonNull(inst, false, .NE),
@ -4093,8 +4116,10 @@ pub const FuncGen = struct {
                .ptr_slice_ptr_ptr => try self.airPtrSliceFieldPtr(inst, 0),
                .ptr_slice_len_ptr => try self.airPtrSliceFieldPtr(inst, 1),
                .float_to_int           => try self.airFloatToInt(inst, false),
                .float_to_int_optimized => try self.airFloatToInt(inst, true),
                .array_to_slice => try self.airArrayToSlice(inst),
                .float_to_int   => try self.airFloatToInt(inst),
                .int_to_float   => try self.airIntToFloat(inst),
                .cmpxchg_weak   => try self.airCmpxchg(inst, true),
                .cmpxchg_strong => try self.airCmpxchg(inst, false),
@ -4115,11 +4140,13 @@ pub const FuncGen = struct {
                .splat          => try self.airSplat(inst),
                .select         => try self.airSelect(inst),
                .shuffle        => try self.airShuffle(inst),
                .reduce         => try self.airReduce(inst),
                .aggregate_init => try self.airAggregateInit(inst),
                .union_init     => try self.airUnionInit(inst),
                .prefetch       => try self.airPrefetch(inst),
                .reduce           => try self.airReduce(inst, false),
                .reduce_optimized => try self.airReduce(inst, true),
                .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
                .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
                .atomic_store_release   => try self.airAtomicStore(inst, .Release),
@ -4485,8 +4512,9 @@ pub const FuncGen = struct {
        return null;
    }
-    fn airCmp(self: *FuncGen, inst: Air.Inst.Index, op: math.CompareOperator) !?*const llvm.Value {
+    fn airCmp(self: *FuncGen, inst: Air.Inst.Index, op: math.CompareOperator, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -4496,8 +4524,9 @@ pub const FuncGen = struct {
        return self.cmp(lhs, rhs, operand_ty, op);
    }
-    fn airCmpVector(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airCmpVector(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
        const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data;
@ -4943,10 +4972,12 @@ pub const FuncGen = struct {
        return self.builder.buildCall(libc_fn, &params, params.len, .C, .Auto, "");
    }
-    fn airFloatToInt(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airFloatToInt(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst))
            return null;
        self.builder.setFastMath(want_fast_math);
        const target = self.dg.module.getTarget();
        const ty_op = self.air.instructions.items(.data)[inst].ty_op;
@ -6095,8 +6126,9 @@ pub const FuncGen = struct {
        return self.builder.buildInsertValue(partial, len, 1, "");
    }
-    fn airAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airAdd(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6109,8 +6141,9 @@ pub const FuncGen = struct {
        return self.builder.buildNUWAdd(lhs, rhs, "");
    }
-    fn airAddWrap(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airAddWrap(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6134,8 +6167,9 @@ pub const FuncGen = struct {
        return self.builder.buildUAddSat(lhs, rhs, "");
    }
-    fn airSub(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airSub(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6148,8 +6182,9 @@ pub const FuncGen = struct {
        return self.builder.buildNUWSub(lhs, rhs, "");
    }
-    fn airSubWrap(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airSubWrap(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6172,8 +6207,9 @@ pub const FuncGen = struct {
        return self.builder.buildUSubSat(lhs, rhs, "");
    }
-    fn airMul(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airMul(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6186,8 +6222,9 @@ pub const FuncGen = struct {
        return self.builder.buildNUWMul(lhs, rhs, "");
    }
-    fn airMulWrap(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airMulWrap(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6210,8 +6247,9 @@ pub const FuncGen = struct {
        return self.builder.buildUMulFixSat(lhs, rhs, "");
    }
-    fn airDivFloat(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airDivFloat(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6221,8 +6259,9 @@ pub const FuncGen = struct {
        return self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
    }
-    fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6238,8 +6277,9 @@ pub const FuncGen = struct {
        return self.builder.buildUDiv(lhs, rhs, "");
    }
-    fn airDivFloor(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airDivFloor(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6270,8 +6310,9 @@ pub const FuncGen = struct {
        return self.builder.buildUDiv(lhs, rhs, "");
    }
-    fn airDivExact(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airDivExact(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6284,8 +6325,9 @@ pub const FuncGen = struct {
        return self.builder.buildExactUDiv(lhs, rhs, "");
    }
-    fn airRem(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airRem(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -6298,8 +6340,9 @@ pub const FuncGen = struct {
        return self.builder.buildURem(lhs, rhs, "");
    }
-    fn airMod(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airMod(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
        const lhs = try self.resolveInst(bin_op.lhs);
@ -7613,6 +7656,17 @@ pub const FuncGen = struct {
        return self.buildFloatOp(op, operand_ty, 1, .{operand});
    }
    fn airNeg(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const un_op = self.air.instructions.items(.data)[inst].un_op;
        const operand = try self.resolveInst(un_op);
        const operand_ty = self.air.typeOf(un_op);
        return self.buildFloatOp(.neg, operand_ty, 1, .{operand});
    }
    fn airClzCtz(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
@ -7927,8 +7981,9 @@ pub const FuncGen = struct {
        return self.builder.buildShuffleVector(a, b, llvm_mask_value, "");
    }
-    fn airReduce(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airReduce(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value {
        if (self.liveness.isUnused(inst)) return null;
        self.builder.setFastMath(want_fast_math);
        const reduce = self.air.instructions.items(.data)[inst].reduce;
        const operand = try self.resolveInst(reduce.operand);
--- a/src/codegen/llvm/bindings.zig
+++ b/src/codegen/llvm/bindings.zig
@ -941,6 +941,9 @@ pub const Builder = opaque {
    pub const buildFPMulReduce = ZigLLVMBuildFPMulReduce;
    extern fn ZigLLVMBuildFPMulReduce(B: *const Builder, Acc: *const Value, Val: *const Value) *const Value;
    pub const setFastMath = ZigLLVMSetFastMath;
    extern fn ZigLLVMSetFastMath(B: *const Builder, on_state: bool) void;
 };
 pub const MDString = opaque {
--- a/src/print_air.zig
+++ b/src/print_air.zig
@ -138,6 +138,24 @@ const Writer = struct {
            .set_union_tag,
            .min,
            .max,
            .add_optimized,
            .addwrap_optimized,
            .sub_optimized,
            .subwrap_optimized,
            .mul_optimized,
            .mulwrap_optimized,
            .div_float_optimized,
            .div_trunc_optimized,
            .div_floor_optimized,
            .div_exact_optimized,
            .rem_optimized,
            .mod_optimized,
            .cmp_lt_optimized,
            .cmp_lte_optimized,
            .cmp_eq_optimized,
            .cmp_gte_optimized,
            .cmp_gt_optimized,
            .cmp_neq_optimized,
            => try w.writeBinOp(s, inst),
            .is_null,
@ -169,6 +187,7 @@ const Writer = struct {
            .round,
            .trunc_float,
            .neg,
            .neg_optimized,
            .cmp_lt_errors_len,
            .set_err_return_trace,
            => try w.writeUnOp(s, inst),
@ -216,6 +235,7 @@ const Writer = struct {
            .int_to_float,
            .splat,
            .float_to_int,
            .float_to_int_optimized,
            .get_union_tag,
            .clz,
            .ctz,
@ -280,8 +300,8 @@ const Writer = struct {
            .mul_add => try w.writeMulAdd(s, inst),
            .select => try w.writeSelect(s, inst),
            .shuffle => try w.writeShuffle(s, inst),
-            .reduce => try w.writeReduce(s, inst),
+            .reduce, .reduce_optimized => try w.writeReduce(s, inst),
-            .cmp_vector => try w.writeCmpVector(s, inst),
+            .cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst),
            .dbg_block_begin, .dbg_block_end => {},
        }