This commit is contained in:
Daniel Kongsgaard 2025-11-22 21:32:54 -05:00 committed by GitHub
commit f66ea7f383
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 288 additions and 193 deletions

View file

@ -540,10 +540,7 @@ pub fn parseIntSizeSuffix(buf: []const u8, digit_base: u8) ParseIntError!usize {
} else if (without_i.len != without_B.len) {
return error.InvalidCharacter;
}
const multiplier = math.powi(usize, magnitude_base, orders_of_magnitude) catch |err| switch (err) {
error.Underflow => unreachable,
error.Overflow => return error.Overflow,
};
const multiplier = try math.powi(usize, magnitude_base, orders_of_magnitude);
const number = try std.fmt.parseInt(usize, without_suffix, digit_base);
return math.mul(usize, number, multiplier);
}

View file

@ -81,8 +81,10 @@ pub fn approxEqAbs(comptime T: type, x: T, y: T, tolerance: T) bool {
if (x == y)
return true;
if (isNan(x) or isNan(y))
if (isNan(x) or isNan(y)) {
if (T == comptime_float) unreachable;
return false;
}
return @abs(x - y) <= tolerance;
}
@ -109,8 +111,10 @@ pub fn approxEqRel(comptime T: type, x: T, y: T, tolerance: T) bool {
if (x == y)
return true;
if (isNan(x) or isNan(y))
if (isNan(x) or isNan(y)) {
if (T == comptime_float) unreachable;
return false;
}
return @abs(x - y) <= @max(@abs(x), @abs(y)) * tolerance;
}

View file

@ -484,6 +484,7 @@ fn toFloat(comptime Float: type) !void {
);
}
test toFloat {
@setEvalBranchQuota(1_100);
if (builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/24191
try toFloat(f16);
try toFloat(f32);

View file

@ -117,21 +117,28 @@ pub fn FloatRepr(comptime Float: type) type {
/// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
inline fn mantissaOne(comptime T: type) comptime_int {
if (T == comptime_float) return 0;
return if (@typeInfo(T).float.bits == 80) 1 << floatFractionalBits(T) else 0;
}
/// Creates floating point type T from an unbiased exponent and raw mantissa.
inline fn reconstructFloat(comptime T: type, comptime exponent: comptime_int, comptime mantissa: comptime_int) T {
const TBits = @Type(.{ .int = .{ .signedness = .unsigned, .bits = @bitSizeOf(T) } });
const biased_exponent = @as(TBits, exponent + floatExponentMax(T));
return @as(T, @bitCast((biased_exponent << floatMantissaBits(T)) | @as(TBits, mantissa)));
const UBits, const FBits = switch (@typeInfo(T)) {
.float => |float| .{ std.meta.Int(.unsigned, float.bits), T },
.comptime_float => .{ std.meta.Int(.unsigned, 128), f128 },
else => unreachable,
};
const biased_exponent = @as(UBits, exponent + floatExponentMax(T));
return @as(T, @as(FBits, @bitCast((biased_exponent << floatMantissaBits(T)) | @as(UBits, mantissa))));
}
/// Returns the number of bits in the exponent of floating point type T.
pub inline fn floatExponentBits(comptime T: type) comptime_int {
comptime assert(@typeInfo(T) == .float);
const info = @typeInfo(T);
comptime assert(info == .float or info == .comptime_float);
return switch (@typeInfo(T).float.bits) {
if (info == .comptime_float) return 15;
return switch (info.float.bits) {
16 => 5,
32 => 8,
64 => 11,
@ -143,9 +150,11 @@ pub inline fn floatExponentBits(comptime T: type) comptime_int {
/// Returns the number of bits in the mantissa of floating point type T.
pub inline fn floatMantissaBits(comptime T: type) comptime_int {
comptime assert(@typeInfo(T) == .float);
const info = @typeInfo(T);
comptime assert(info == .float or info == .comptime_float);
return switch (@typeInfo(T).float.bits) {
if (info == .comptime_float) return 112;
return switch (info.float.bits) {
16 => 10,
32 => 23,
64 => 52,
@ -157,12 +166,14 @@ pub inline fn floatMantissaBits(comptime T: type) comptime_int {
/// Returns the number of fractional bits in the mantissa of floating point type T.
pub inline fn floatFractionalBits(comptime T: type) comptime_int {
comptime assert(@typeInfo(T) == .float);
const info = @typeInfo(T);
comptime assert(info == .float or info == .comptime_float);
// standard IEEE floats have an implicit 0.m or 1.m integer part
// f80 is special and has an explicitly stored bit in the MSB
// this function corresponds to `MANT_DIG - 1' from C
return switch (@typeInfo(T).float.bits) {
if (info == .comptime_float) return 112;
return switch (info.float.bits) {
16 => 10,
32 => 23,
64 => 52,
@ -208,36 +219,37 @@ pub inline fn floatEps(comptime T: type) T {
/// Returns the local epsilon of floating point type T.
pub inline fn floatEpsAt(comptime T: type, x: T) T {
switch (@typeInfo(T)) {
.float => |F| {
const U: type = @Type(.{ .int = .{ .signedness = .unsigned, .bits = F.bits } });
.float => |float| {
const U = std.meta.Int(.unsigned, float.bits);
const u: U = @bitCast(x);
const y: T = @bitCast(u ^ 1);
return @abs(x - y);
},
.comptime_float => {
const u: u128 = @bitCast(@as(f128, x));
const y: f128 = @bitCast(u ^ 1);
return @as(comptime_float, @abs(x - y));
},
else => @compileError("floatEpsAt only supports floats"),
}
}
/// Returns the inf value for a floating point `Type`.
pub inline fn inf(comptime Type: type) Type {
const RuntimeType = switch (Type) {
else => Type,
comptime_float => f128, // any float type will do
return switch (@typeInfo(Type)) {
.float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type)),
.comptime_float => @compileError("comptime_float cannot be infinity"),
else => @compileError("unknown floating point type " ++ @typeName(Type)),
};
return reconstructFloat(RuntimeType, floatExponentMax(RuntimeType) + 1, mantissaOne(RuntimeType));
}
/// Returns the canonical quiet NaN representation for a floating point `Type`.
pub inline fn nan(comptime Type: type) Type {
const RuntimeType = switch (Type) {
else => Type,
comptime_float => f128, // any float type will do
return switch (@typeInfo(Type)) {
.float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type) | 1 << (floatFractionalBits(Type) - 1)),
.comptime_float => @compileError("comptime_float cannot be NaN"),
else => @compileError("unknown floating point type " ++ @typeName(Type)),
};
return reconstructFloat(
RuntimeType,
floatExponentMax(RuntimeType) + 1,
mantissaOne(RuntimeType) | 1 << (floatFractionalBits(RuntimeType) - 1),
);
}
/// Returns a signalling NaN representation for a floating point `Type`.
@ -245,21 +257,20 @@ pub inline fn nan(comptime Type: type) Type {
/// TODO: LLVM is known to miscompile on some architectures to quiet NaN -
/// this is tracked by https://github.com/ziglang/zig/issues/14366
pub inline fn snan(comptime Type: type) Type {
const RuntimeType = switch (Type) {
else => Type,
comptime_float => f128, // any float type will do
return switch (@typeInfo(Type)) {
.float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type) | 1 << (floatFractionalBits(Type) - 2)),
.comptime_float => @compileError("comptime_float cannot be NaN"),
else => @compileError("unknown floating point type " ++ @typeName(Type)),
};
return reconstructFloat(
RuntimeType,
floatExponentMax(RuntimeType) + 1,
mantissaOne(RuntimeType) | 1 << (floatFractionalBits(RuntimeType) - 2),
);
}
fn floatBits(comptime Type: type) !void {
// (1 +) for the sign bit, since it is separate from the other bits
const size = 1 + floatExponentBits(Type) + floatMantissaBits(Type);
try expect(@bitSizeOf(Type) == size);
if (@typeInfo(Type) == .float)
try expect(@bitSizeOf(Type) == size)
else
try expect(128 == size);
try expect(floatFractionalBits(Type) <= floatMantissaBits(Type));
// for machine epsilon, assert expmin <= -prec <= expmax
@ -273,6 +284,8 @@ test floatBits {
try floatBits(f80);
try floatBits(f128);
try floatBits(c_longdouble);
try floatBits(comptime_float);
try comptime floatBits(comptime_float);
}
test inf {
@ -281,11 +294,11 @@ test inf {
const inf_u64: u64 = 0x7FF0000000000000;
const inf_u80: u80 = 0x7FFF8000000000000000;
const inf_u128: u128 = 0x7FFF0000000000000000000000000000;
try expectEqual(inf_u16, @as(u16, @bitCast(inf(f16))));
try expectEqual(inf_u32, @as(u32, @bitCast(inf(f32))));
try expectEqual(inf_u64, @as(u64, @bitCast(inf(f64))));
try expectEqual(inf_u80, @as(u80, @bitCast(inf(f80))));
try expectEqual(inf_u128, @as(u128, @bitCast(inf(f128))));
try expect(inf_u16 == @as(u16, @bitCast(inf(f16))));
try expect(inf_u32 == @as(u32, @bitCast(inf(f32))));
try expect(inf_u64 == @as(u64, @bitCast(inf(f64))));
try expect(inf_u80 == @as(u80, @bitCast(inf(f80))));
try expect(inf_u128 == @as(u128, @bitCast(inf(f128))));
}
test nan {
@ -294,11 +307,11 @@ test nan {
const qnan_u64: u64 = 0x7FF8000000000000;
const qnan_u80: u80 = 0x7FFFC000000000000000;
const qnan_u128: u128 = 0x7FFF8000000000000000000000000000;
try expectEqual(qnan_u16, @as(u16, @bitCast(nan(f16))));
try expectEqual(qnan_u32, @as(u32, @bitCast(nan(f32))));
try expectEqual(qnan_u64, @as(u64, @bitCast(nan(f64))));
try expectEqual(qnan_u80, @as(u80, @bitCast(nan(f80))));
try expectEqual(qnan_u128, @as(u128, @bitCast(nan(f128))));
try expect(qnan_u16 == @as(u16, @bitCast(nan(f16))));
try expect(qnan_u32 == @as(u32, @bitCast(nan(f32))));
try expect(qnan_u64 == @as(u64, @bitCast(nan(f64))));
try expect(qnan_u80 == @as(u80, @bitCast(nan(f80))));
try expect(qnan_u128 == @as(u128, @bitCast(nan(f128))));
}
test snan {
@ -307,9 +320,9 @@ test snan {
const snan_u64: u64 = 0x7FF4000000000000;
const snan_u80: u80 = 0x7FFFA000000000000000;
const snan_u128: u128 = 0x7FFF4000000000000000000000000000;
try expectEqual(snan_u16, @as(u16, @bitCast(snan(f16))));
try expectEqual(snan_u32, @as(u32, @bitCast(snan(f32))));
try expectEqual(snan_u64, @as(u64, @bitCast(snan(f64))));
try expectEqual(snan_u80, @as(u80, @bitCast(snan(f80))));
try expectEqual(snan_u128, @as(u128, @bitCast(snan(f128))));
try expect(snan_u16 == @as(u16, @bitCast(snan(f16))));
try expect(snan_u32 == @as(u32, @bitCast(snan(f32))));
try expect(snan_u64 == @as(u64, @bitCast(snan(f64))));
try expect(snan_u80 == @as(u80, @bitCast(snan(f80))));
try expect(snan_u128 == @as(u128, @bitCast(snan(f128))));
}

View file

@ -1,5 +1,6 @@
const std = @import("../std.zig");
const math = std.math;
const assert = std.debug.assert;
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
const expectApproxEqAbs = std.testing.expectApproxEqAbs;
@ -20,8 +21,10 @@ pub fn Frexp(comptime T: type) type {
/// - frexp(nan) = nan, undefined
pub fn frexp(x: anytype) Frexp(@TypeOf(x)) {
const T: type = @TypeOf(x);
const info = @typeInfo(T);
comptime assert(info == .float or info == .comptime_float);
const bits: comptime_int = @typeInfo(T).float.bits;
const bits: comptime_int = if (info == .float) info.float.bits else 128;
const Int: type = std.meta.Int(.unsigned, bits);
const exp_bits: comptime_int = math.floatExponentBits(T);
@ -43,7 +46,7 @@ pub fn frexp(x: anytype) Frexp(@TypeOf(x)) {
const extra_denorm_shift: comptime_int = 1 - ones_place;
var result: Frexp(T) = undefined;
var v: Int = @bitCast(x);
var v: Int = if (info == .float) @bitCast(x) else @bitCast(@as(f128, x));
const m: MantInt = @truncate(v);
const e: ExpInt = @truncate(v >> mant_bits);
@ -81,7 +84,7 @@ pub fn frexp(x: anytype) Frexp(@TypeOf(x)) {
},
}
result.significand = @bitCast(v);
result.significand = if (info == .float) @bitCast(v) else @as(f128, @bitCast(v));
return result;
}
@ -91,23 +94,22 @@ fn FrexpTests(comptime Float: type) type {
const T = Float;
test "normal" {
const epsilon = 1e-6;
var r: Frexp(T) = undefined;
r = frexp(@as(T, 1.3));
try expectApproxEqAbs(0.65, r.significand, epsilon);
try expectEqual(1, r.exponent);
const r1 = frexp(@as(T, 1.3));
try expectApproxEqAbs(0.65, r1.significand, epsilon);
try expectEqual(1, r1.exponent);
r = frexp(@as(T, 78.0234));
try expectApproxEqAbs(0.609558, r.significand, epsilon);
try expectEqual(7, r.exponent);
const r2 = frexp(@as(T, 78.0234));
try expectApproxEqAbs(0.609558, r2.significand, epsilon);
try expectEqual(7, r2.exponent);
r = frexp(@as(T, -1234.5678));
try expectEqual(11, r.exponent);
try expectApproxEqAbs(-0.602816, r.significand, epsilon);
const r3 = frexp(@as(T, -1234.5678));
try expectEqual(11, r3.exponent);
try expectApproxEqAbs(-0.602816, r3.significand, epsilon);
}
test "max" {
const exponent = math.floatExponentMax(T) + 1;
const significand = 1.0 - math.floatEps(T) / 2;
const significand = 1.0 - math.floatEps(T) / 2.0;
const r: Frexp(T) = frexp(math.floatMax(T));
try expectEqual(exponent, r.exponent);
try expectEqual(significand, r.significand);
@ -126,17 +128,16 @@ fn FrexpTests(comptime Float: type) type {
try expectEqual(0.5, r.significand);
}
test "zero" {
var r: Frexp(T) = undefined;
const r1 = frexp(@as(T, 0.0));
try expectEqual(0, r1.exponent);
try expect(math.isPositiveZero(r1.significand));
r = frexp(@as(T, 0.0));
try expectEqual(0, r.exponent);
try expect(math.isPositiveZero(r.significand));
r = frexp(@as(T, -0.0));
try expectEqual(0, r.exponent);
try expect(math.isNegativeZero(r.significand));
const r2 = frexp(@as(T, -0.0));
try expectEqual(0, r2.exponent);
try expect(math.isNegativeZero(r2.significand));
}
test "inf" {
if (T == comptime_float) return;
var r: Frexp(T) = undefined;
r = frexp(math.inf(T));
@ -148,6 +149,7 @@ fn FrexpTests(comptime Float: type) type {
try expect(math.isNegativeInf(r.significand));
}
test "nan" {
if (T == comptime_float) return;
const r: Frexp(T) = frexp(math.nan(T));
try expect(math.isNan(r.significand));
}
@ -156,53 +158,64 @@ fn FrexpTests(comptime Float: type) type {
// Generate tests for each floating point type
comptime {
for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
_ = FrexpTests(T);
}
}
test frexp {
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
@setEvalBranchQuota(1_500);
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
const max_exponent = math.floatExponentMax(T) + 1;
const min_exponent = math.floatExponentMin(T) + 1;
const truemin_exponent = min_exponent - math.floatFractionalBits(T);
var result: Frexp(T) = undefined;
comptime var x: T = undefined;
// basic usage
// value -> {significand, exponent},
// value == significand * (2 ^ exponent)
x = 1234.5678;
result = frexp(x);
try expectEqual(11, result.exponent);
try expectApproxEqAbs(0.602816, result.significand, 1e-6);
try expectEqual(x, math.ldexp(result.significand, result.exponent));
const x1 = 1234.5678;
const result1 = frexp(x1);
try expectEqual(11, result1.exponent);
try expectApproxEqAbs(0.602816, result1.significand, 1e-6);
try expectEqual(x1, math.ldexp(result1.significand, result1.exponent));
// float maximum
x = math.floatMax(T);
result = frexp(x);
try expectEqual(max_exponent, result.exponent);
try expectEqual(1.0 - math.floatEps(T) / 2, result.significand);
try expectEqual(x, math.ldexp(result.significand, result.exponent));
const x2 = math.floatMax(T);
const result2 = frexp(x2);
try expectEqual(max_exponent, result2.exponent);
try expectEqual(1.0 - math.floatEps(T) / 2.0, result2.significand);
try expectEqual(x2, math.ldexp(result2.significand, result2.exponent));
// float minimum
x = math.floatMin(T);
result = frexp(x);
try expectEqual(min_exponent, result.exponent);
try expectEqual(0.5, result.significand);
try expectEqual(x, math.ldexp(result.significand, result.exponent));
const x3 = math.floatMin(T);
const result3 = frexp(x3);
try expectEqual(min_exponent, result3.exponent);
try expectEqual(0.5, result3.significand);
try expectEqual(x3, math.ldexp(result3.significand, result3.exponent));
// float true minimum
// subnormal -> {normal, exponent}
x = math.floatTrueMin(T);
result = frexp(x);
try expectEqual(truemin_exponent, result.exponent);
try expectEqual(0.5, result.significand);
try expectEqual(x, math.ldexp(result.significand, result.exponent));
const x4 = math.floatTrueMin(T);
const result4 = frexp(x4);
try expectEqual(truemin_exponent, result4.exponent);
try expectEqual(0.5, result4.significand);
try expectEqual(x4, math.ldexp(result4.significand, result4.exponent));
// zero -> {zero, zero} (+)
const result5 = frexp(@as(T, 0.0));
try expectEqual(0, result5.exponent);
try expect(math.isPositiveZero(result5.significand));
// zero -> {zero, zero} (-)
const result6 = frexp(@as(T, -0.0));
try expectEqual(0, result6.exponent);
try expect(math.isNegativeZero(result6.significand));
if (T == comptime_float) return;
// infinity -> {infinity, zero} (+)
result = frexp(math.inf(T));
var result = frexp(math.inf(T));
try expectEqual(0, result.exponent);
try expect(math.isPositiveInf(result.significand));
@ -211,16 +224,6 @@ test frexp {
try expectEqual(0, result.exponent);
try expect(math.isNegativeInf(result.significand));
// zero -> {zero, zero} (+)
result = frexp(@as(T, 0.0));
try expectEqual(0, result.exponent);
try expect(math.isPositiveZero(result.significand));
// zero -> {zero, zero} (-)
result = frexp(@as(T, -0.0));
try expectEqual(0, result.exponent);
try expect(math.isNegativeZero(result.significand));
// nan -> {nan, undefined}
result = frexp(math.nan(T));
try expect(math.isNan(result.significand));

View file

@ -4,14 +4,14 @@ const expect = std.testing.expect;
/// Returns whether x is a finite value.
pub fn isFinite(x: anytype) bool {
const T = @TypeOf(x);
const T = if (@TypeOf(x) == comptime_float) f128 else @TypeOf(x);
const TBits = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
const remove_sign = ~@as(TBits, 0) >> 1;
return @as(TBits, @bitCast(x)) & remove_sign < @as(TBits, @bitCast(math.inf(T)));
return @as(TBits, @bitCast(@as(T, x))) & remove_sign < @as(TBits, @bitCast(math.inf(T)));
}
test isFinite {
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
// normals
try expect(isFinite(@as(T, 1.0)));
try expect(isFinite(-@as(T, 1.0)));
@ -25,6 +25,8 @@ test isFinite {
try expect(isFinite(math.floatMin(T)));
try expect(isFinite(math.floatMax(T)));
if (T == comptime_float) return;
// inf & nan
try expect(!isFinite(math.inf(T)));
try expect(!isFinite(-math.inf(T)));

View file

@ -4,7 +4,7 @@ const expect = std.testing.expect;
/// Returns whether x is neither zero, subnormal, infinity, or NaN.
pub fn isNormal(x: anytype) bool {
const T = @TypeOf(x);
const T = if (@TypeOf(x) == comptime_float) f128 else @TypeOf(x);
const TBits = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
const increment_exp = 1 << math.floatMantissaBits(T);
@ -15,14 +15,14 @@ pub fn isNormal(x: anytype) bool {
// The sign bit is removed because all ones would overflow into it.
// For f80, even though it has an explicit integer part stored,
// the exponent effectively takes priority if mismatching.
const value = @as(TBits, @bitCast(x)) +% increment_exp;
const value = @as(TBits, @bitCast(@as(T, x))) +% increment_exp;
return value & remove_sign >= (increment_exp << 1);
}
test isNormal {
// TODO add `c_longdouble' when math.inf(T) supports it
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
const TBits = std.meta.Int(.unsigned, @bitSizeOf(T));
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
const TBits = if (T == comptime_float) u128 else std.meta.Int(.unsigned, @bitSizeOf(T));
// normals
try expect(isNormal(@as(T, 1.0)));
@ -35,7 +35,10 @@ test isNormal {
try expect(!isNormal(@as(T, math.floatTrueMin(T))));
// largest subnormal
try expect(!isNormal(@as(T, @bitCast(~(~@as(TBits, 0) << math.floatFractionalBits(T))))));
const large_subnormal: if (T == comptime_float) f128 else T = @bitCast(~(~@as(TBits, 0) << math.floatFractionalBits(T)));
try expect(!isNormal(@as(T, large_subnormal)));
if (T == comptime_float) return;
// non-finite numbers
try expect(!isNormal(-math.inf(T)));

View file

@ -5,36 +5,50 @@ const expect = std.testing.expect;
/// Returns whether x is positive zero.
pub inline fn isPositiveZero(x: anytype) bool {
const T = @TypeOf(x);
const bit_count = @typeInfo(T).float.bits;
const bit_count, const F = switch (@typeInfo(T)) {
.float => |float| .{ float.bits, T },
.comptime_float => .{ 128, f128 },
else => @compileError("unknown floating point type " ++ @typeName(T)),
};
const TBits = std.meta.Int(.unsigned, bit_count);
return @as(TBits, @bitCast(x)) == @as(TBits, 0);
return @as(TBits, @bitCast(@as(F, x))) == @as(TBits, 0);
}
/// Returns whether x is negative zero.
pub inline fn isNegativeZero(x: anytype) bool {
const T = @TypeOf(x);
const bit_count = @typeInfo(T).float.bits;
const bit_count, const F = switch (@typeInfo(T)) {
.float => |float| .{ float.bits, T },
.comptime_float => .{ 128, f128 },
else => @compileError("unknown floating point type " ++ @typeName(T)),
};
const TBits = std.meta.Int(.unsigned, bit_count);
return @as(TBits, @bitCast(x)) == @as(TBits, 1) << (bit_count - 1);
return @as(TBits, @bitCast(@as(F, x))) == @as(TBits, 1) << (bit_count - 1);
}
test isPositiveZero {
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
try expect(isPositiveZero(@as(T, 0.0)));
try expect(!isPositiveZero(@as(T, -0.0)));
try expect(!isPositiveZero(math.floatMin(T)));
try expect(!isPositiveZero(math.floatMax(T)));
if (T == comptime_float) return;
try expect(!isPositiveZero(math.inf(T)));
try expect(!isPositiveZero(-math.inf(T)));
}
}
test isNegativeZero {
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
try expect(isNegativeZero(@as(T, -0.0)));
try expect(!isNegativeZero(@as(T, 0.0)));
try expect(!isNegativeZero(math.floatMin(T)));
try expect(!isNegativeZero(math.floatMax(T)));
if (T == comptime_float) return;
try expect(!isNegativeZero(math.inf(T)));
try expect(!isNegativeZero(-math.inf(T)));
}

View file

@ -7,7 +7,11 @@ const expect = std.testing.expect;
/// Returns x * 2^n.
pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
const T = @TypeOf(x);
const TBits = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
const TBits = switch (@typeInfo(T)) {
.float => |float| std.meta.Int(.unsigned, float.bits),
.comptime_float => u128,
else => @compileError("unknown floating point type " ++ @typeName(T)),
};
const exponent_bits = math.floatExponentBits(T);
const mantissa_bits = math.floatMantissaBits(T);
@ -16,11 +20,13 @@ pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
const max_biased_exponent = 2 * math.floatExponentMax(T);
const mantissa_mask = @as(TBits, (1 << mantissa_bits) - 1);
const repr = @as(TBits, @bitCast(x));
const repr = bitCastAs(TBits, x);
const sign_bit = repr & (1 << (exponent_bits + mantissa_bits));
if (math.isNan(x) or !math.isFinite(x))
if (math.isNan(x) or !math.isFinite(x)) {
if (T == comptime_float) unreachable;
return x;
}
var exponent: i32 = @as(i32, @intCast((repr << 1) >> (mantissa_bits + 1)));
if (exponent == 0)
@ -29,23 +35,23 @@ pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
if (n >= 0) {
if (n > max_biased_exponent - exponent) {
// Overflow. Return +/- inf
return @as(T, @bitCast(@as(TBits, @bitCast(math.inf(T))) | sign_bit));
return bitCastAs(T, bitCastAs(TBits, math.inf(T)) | sign_bit);
} else if (exponent + n <= 0) {
// Result is subnormal
return @as(T, @bitCast((repr << @as(Log2Int(TBits), @intCast(n))) | sign_bit));
return bitCastAs(T, (repr << @as(Log2Int(TBits), @intCast(n))) | sign_bit);
} else if (exponent <= 0) {
// Result is normal, but needs shifting
var result = @as(TBits, @intCast(n + exponent)) << mantissa_bits;
result |= (repr << @as(Log2Int(TBits), @intCast(1 - exponent))) & mantissa_mask;
return @as(T, @bitCast(result | sign_bit));
return bitCastAs(T, result | sign_bit);
}
// Result needs no shifting
return @as(T, @bitCast(repr + (@as(TBits, @intCast(n)) << mantissa_bits)));
return bitCastAs(T, repr + (@as(TBits, @intCast(n)) << mantissa_bits));
} else {
if (n <= -exponent) {
if (n < -(mantissa_bits + exponent))
return @as(T, @bitCast(sign_bit)); // Severe underflow. Return +/- 0
return bitCastAs(T, sign_bit); // Severe underflow. Return +/- 0
// Result underflowed, we need to shift and round
const shift = @as(Log2Int(TBits), @intCast(@min(-n, -(exponent + n) + 1)));
@ -58,14 +64,22 @@ pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
// Round result, including round-to-even for exact ties
result = ((result + 1) >> 1) & ~@as(TBits, @intFromBool(exact_tie));
return @as(T, @bitCast(result | sign_bit));
return bitCastAs(T, result | sign_bit);
}
// Result is exact, and needs no shifting
return @as(T, @bitCast(repr - (@as(TBits, @intCast(-n)) << mantissa_bits)));
return bitCastAs(T, repr - (@as(TBits, @intCast(-n)) << mantissa_bits));
}
}
inline fn bitCastAs(comptime T: type, x: anytype) T {
const y = if (@TypeOf(x) == comptime_float) @as(f128, x) else x;
return switch (T) {
comptime_float => @as(T, @as(f128, @bitCast(y))),
else => @as(T, @bitCast(y)),
};
}
test ldexp {
// subnormals
try expect(ldexp(@as(f16, 0x1.1FFp14), -14 - 9 - 15) == math.floatTrueMin(f16));
@ -73,6 +87,7 @@ test ldexp {
try expect(ldexp(@as(f64, 0x1.7FFFFFFFFFFFFp-1), -1022 - 51) == math.floatTrueMin(f64));
try expect(ldexp(@as(f80, 0x1.7FFFFFFFFFFFFFFEp-1), -16382 - 62) == math.floatTrueMin(f80));
try expect(ldexp(@as(f128, 0x1.7FFFFFFFFFFFFFFFFFFFFFFFFFFFp-1), -16382 - 111) == math.floatTrueMin(f128));
try expect(ldexp(@as(comptime_float, 0x1.7FFFFFFFFFFFFFFFFFFFFFFFFFFFp-1), -16382 - 111) == math.floatTrueMin(f128));
try expect(ldexp(math.floatMax(f32), -128 - 149) > 0.0);
try expect(ldexp(math.floatMax(f32), -128 - 149 - 1) == 0.0);

View file

@ -144,7 +144,7 @@ test "int" {
}
test "float" {
@setEvalBranchQuota(4000);
@setEvalBranchQuota(5000);
// normal -> normal
try expect(nextAfter(f16, 0x1.234p0, 2.0) == 0x1.238p0);

View file

@ -31,24 +31,20 @@ const expect = std.testing.expect;
/// - pow(-inf, y) = pow(-0, -y)
/// - pow(x, y) = nan for finite x < 0 and finite non-integer y
pub fn pow(comptime T: type, x: T, y: T) T {
if (@typeInfo(T) == .int) {
const info = @typeInfo(T);
if (info == .int or info == .comptime_int)
return math.powi(T, x, y) catch unreachable;
}
if (T != f32 and T != f64) {
@compileError("pow not implemented for " ++ @typeName(T));
}
// pow(x, +-0) = 1 for all x
// pow(1, y) = 1 for all y
if (y == 0 or x == 1) {
if (y == 0 or x == 1)
return 1;
}
// pow(nan, y) = nan for all y
// pow(x, nan) = nan for all x
if (math.isNan(x) or math.isNan(y)) {
@branchHint(.unlikely);
if (info == .comptime_float) unreachable;
return math.nan(T);
}
@ -60,7 +56,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
if (x == 0) {
if (y < 0) {
// pow(+-0, y) = +-inf for y an odd integer
if (isOddInteger(y)) {
if (isOddInteger(T, y)) {
return math.copysign(math.inf(T), x);
}
// pow(+-0, y) = +inf for y an even integer
@ -68,7 +64,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
return math.inf(T);
}
} else {
if (isOddInteger(y)) {
if (isOddInteger(T, y)) {
return x;
} else {
return 0;
@ -77,6 +73,9 @@ pub fn pow(comptime T: type, x: T, y: T) T {
}
if (math.isInf(y)) {
@branchHint(.unlikely);
if (info == .comptime_float) unreachable;
// pow(-1, inf) = 1 for all x
if (x == -1) {
return 1.0;
@ -94,6 +93,9 @@ pub fn pow(comptime T: type, x: T, y: T) T {
}
if (math.isInf(x)) {
@branchHint(.unlikely);
if (info == .comptime_float) unreachable;
if (math.isNegativeInf(x)) {
return pow(T, 1 / x, -y);
}
@ -145,7 +147,12 @@ pub fn pow(comptime T: type, x: T, y: T) T {
var xe = r2.exponent;
var x1 = r2.significand;
var i = @as(std.meta.Int(.signed, @typeInfo(T).float.bits), @intFromFloat(yi));
const Int = switch (info) {
.float => |float| std.meta.Int(.signed, float.bits),
.comptime_float => i128,
else => @compileError("pow not implemented for " ++ @typeName(T)),
};
var i = @as(Int, @intFromFloat(yi));
while (i != 0) : (i >>= 1) {
const overflow_shift = math.floatExponentBits(T) + 1;
if (xe < -(1 << overflow_shift) or (1 << overflow_shift) < xe) {
@ -178,25 +185,37 @@ pub fn pow(comptime T: type, x: T, y: T) T {
return math.scalbn(a1, ae);
}
fn isOddInteger(x: f64) bool {
if (@abs(x) >= 1 << 53) {
fn isOddInteger(comptime T: type, x: T) bool {
// standard IEEE floats have an implicit 0.m or 1.m integer part
// so the digits is the number of fractional bits + 1
const digits = math.floatFractionalBits(T) + 1;
if (@abs(x) >= digits) {
// From https://golang.org/src/math/pow.go
// 1 << 53 is the largest exact integer in the float64 format.
// 1 << digits is the largest exact integer in the IEEE float format fN.
// Any number outside this range will be truncated before the decimal point and therefore will always be
// an even integer.
// Without this check and if x overflows i64 the @intFromFloat(r.ipart) conversion below will panic
// Without this check and if x overflows iN the @intFromFloat(r.ipart) conversion below will panic
return false;
}
const r = math.modf(x);
return r.fpart == 0.0 and @as(i64, @intFromFloat(r.ipart)) & 1 == 1;
const Int = switch (@typeInfo(T)) {
.float => |float| std.meta.Int(.signed, float.bits),
.comptime_float => i128,
else => unreachable,
};
const ipart: Int = @intFromFloat(r.ipart);
return r.fpart == 0.0 and ipart & 1 == 1;
}
test isOddInteger {
try expect(isOddInteger(@floatFromInt(math.maxInt(i64) * 2)) == false);
try expect(isOddInteger(@floatFromInt(math.maxInt(i64) * 2 + 1)) == false);
try expect(isOddInteger(1 << 53) == false);
try expect(isOddInteger(12.0) == false);
try expect(isOddInteger(15.0) == true);
try expect(isOddInteger(f128, @floatFromInt(math.maxInt(i64) * 2)) == false);
try expect(isOddInteger(comptime_float, @floatFromInt(math.maxInt(i64) * 2 + 1)) == false);
try expect(isOddInteger(f64, 1 << 53) == false);
try expect(isOddInteger(f80, 12.0) == false);
try expect(isOddInteger(f80, 15.0) == true);
try expect(isOddInteger(f32, 5.0) == true);
try expect(isOddInteger(f16, -1.0) == true);
}
test pow {

View file

@ -8,30 +8,42 @@ const math = std.math;
const assert = std.debug.assert;
const testing = std.testing;
const UnsignedError = error{Overflow};
const SignedError = error{
Overflow,
Underflow,
DivisionByZero,
};
/// Returns the power of x raised by the integer y (x^y).
///
/// Errors:
/// - Overflow: Integer overflow or Infinity
/// - Overflow: Integer overflow
/// - Underflow: Absolute value of result smaller than 1
/// - DivisionByZero: Undefined power.
///
/// Edge case rules ordered by precedence:
/// - powi(T, x, 0) = 1 unless T is i1, i0, u0
/// - powi(T, 0, x) = 0 when x > 0
/// - powi(T, 0, x) = Overflow
/// - powi(T, 0, x) = DivisionByZero
/// - powi(T, 1, y) = 1
/// - powi(T, -1, y) = -1 for y an odd integer
/// - powi(T, -1, y) = 1 unless T is i1, i0, u0
/// - powi(T, -1, y) = Overflow
/// - powi(T, x, y) = Overflow when y >= @bitSizeOf(x)
/// - powi(T, x, y) = Underflow when y < 0
pub fn powi(comptime T: type, x: T, y: T) (error{
Overflow,
Underflow,
}!T) {
const bit_size = @typeInfo(T).int.bits;
pub fn powi(comptime T: type, x: T, y: T) (if (@typeInfo(T) == .int and @typeInfo(T).int.signedness == .unsigned)
UnsignedError
else
SignedError)!T {
const info = @typeInfo(T);
if (info != .int and info != .comptime_int)
@compileError("powi not implemented for " ++ @typeName(T));
const is_unsigned = info == .int and info.int.signedness == .unsigned;
// `y & 1 == 0` won't compile when `does_one_overflow`.
const does_one_overflow = math.maxInt(T) < 1;
const does_one_overflow = info == .int and math.maxInt(T) < 1;
const is_y_even = !does_one_overflow and y & 1 == 0;
if (x == 1 or y == 0 or (x == -1 and is_y_even)) {
@ -50,15 +62,17 @@ pub fn powi(comptime T: type, x: T, y: T) (error{
if (y > 0) {
return 0;
} else {
// Infinity/NaN, not overflow in strict sense
return error.Overflow;
if (is_unsigned) unreachable;
return error.DivisionByZero;
}
}
// x >= 2 or x <= -2 from this point
if (y >= bit_size) {
if (info == .int and y >= info.int.bits) {
return error.Overflow;
}
if (y < 0) {
if (is_unsigned) unreachable;
return error.Underflow;
}
@ -71,27 +85,32 @@ pub fn powi(comptime T: type, x: T, y: T) (error{
while (exp > 1) {
if (exp & 1 == 1) {
const ov = @mulWithOverflow(acc, base);
if (ov[1] != 0) return error.Overflow;
acc = ov[0];
acc = try mul(T, acc, base);
}
exp >>= 1;
const ov = @mulWithOverflow(base, base);
if (ov[1] != 0) return error.Overflow;
base = ov[0];
base = try mul(T, base, base);
}
if (exp == 1) {
const ov = @mulWithOverflow(acc, base);
if (ov[1] != 0) return error.Overflow;
acc = ov[0];
acc = try mul(T, acc, base);
}
return acc;
}
inline fn mul(comptime T: type, x: T, y: T) error{Overflow}!T {
return switch (@typeInfo(T)) {
.int => {
const prod, const overflow = @mulWithOverflow(x, y);
return if (overflow != 0) error.Overflow else prod;
},
.comptime_int => x * y,
else => unreachable,
};
}
test powi {
try testing.expectError(error.Overflow, powi(i8, -66, 6));
try testing.expectError(error.Overflow, powi(i16, -13, 13));
@ -106,6 +125,8 @@ test powi {
try testing.expect((try powi(i64, -36, 6)) == 2176782336);
try testing.expect((try powi(i17, -2, 15)) == -32768);
try testing.expect((try powi(i42, -5, 7)) == -78125);
try testing.expect((try powi(comptime_int, -12345, 11)) == -1014850422703912515858714960329315071728515625);
try comptime testing.expect((try powi(comptime_int, 13, 5)) == 371293);
try testing.expect((try powi(u8, 6, 2)) == 36);
try testing.expect((try powi(u16, 5, 4)) == 625);
@ -113,6 +134,8 @@ test powi {
try testing.expect((try powi(u64, 34, 2)) == 1156);
try testing.expect((try powi(u17, 16, 3)) == 4096);
try testing.expect((try powi(u42, 34, 6)) == 1544804416);
try testing.expect((try powi(comptime_int, 54321, 9)) == 4118222497610732111054528594901610509007281);
try comptime testing.expect((try powi(comptime_int, 51, 3)) == 132651);
try testing.expectError(error.Overflow, powi(i8, 120, 7));
try testing.expectError(error.Overflow, powi(i16, 73, 15));
@ -157,6 +180,8 @@ test "powi.special" {
try testing.expect((try powi(i64, -1, 6)) == 1);
try testing.expect((try powi(i17, -1, 15)) == -1);
try testing.expect((try powi(i42, -1, 7)) == -1);
try testing.expect((try powi(comptime_int, -1, 5)) == -1);
try comptime testing.expect((try powi(comptime_int, -1, 3)) == -1);
try testing.expect((try powi(u8, 1, 2)) == 1);
try testing.expect((try powi(u16, 1, 4)) == 1);
@ -185,6 +210,8 @@ test "powi.special" {
try testing.expect((try powi(u64, 34, 0)) == 1);
try testing.expect((try powi(u17, 16, 0)) == 1);
try testing.expect((try powi(u42, 34, 0)) == 1);
try testing.expect((try powi(comptime_int, 41, 0)) == 1);
try comptime testing.expect((try powi(comptime_int, 43, 0)) == 1);
}
test "powi.narrow" {
@ -192,6 +219,6 @@ test "powi.narrow" {
try testing.expectError(error.Overflow, powi(i0, 0, 0));
try testing.expectError(error.Overflow, powi(i1, 0, 0));
try testing.expectError(error.Overflow, powi(i1, -1, 0));
try testing.expectError(error.Overflow, powi(i1, 0, -1));
try testing.expectError(error.DivisionByZero, powi(i1, 0, -1));
try testing.expect((try powi(i1, -1, -1)) == -1);
}

View file

@ -43,6 +43,9 @@ fn testFloats(comptime Type: type) !void {
try expect(!signbit(@as(Type, 1.0)));
try expect(signbit(@as(Type, -2.0)));
try expect(signbit(@as(Type, -0.0)));
if (Type == comptime_float) return;
try expect(!signbit(math.inf(Type)));
try expect(signbit(-math.inf(Type)));
try expect(!signbit(math.nan(Type)));

View file

@ -290,19 +290,16 @@ pub inline fn expectApproxEqAbs(expected: anytype, actual: anytype, tolerance: a
fn expectApproxEqAbsInner(comptime T: type, expected: T, actual: T, tolerance: T) !void {
switch (@typeInfo(T)) {
.float => if (!math.approxEqAbs(T, expected, actual, tolerance)) {
.float, .comptime_float => if (!math.approxEqAbs(T, expected, actual, tolerance)) {
print("actual {}, not within absolute tolerance {} of expected {}\n", .{ actual, tolerance, expected });
return error.TestExpectedApproxEqAbs;
},
.comptime_float => @compileError("Cannot approximately compare two comptime_float values"),
else => @compileError("Unable to compare non floating point values"),
}
}
test expectApproxEqAbs {
inline for ([_]type{ f16, f32, f64, f128 }) |T| {
inline for ([_]type{ f16, f32, f64, f128, comptime_float }) |T| {
const pos_x: T = 12.0;
const pos_y: T = 12.06;
const neg_x: T = -12.0;
@ -326,19 +323,16 @@ pub inline fn expectApproxEqRel(expected: anytype, actual: anytype, tolerance: a
fn expectApproxEqRelInner(comptime T: type, expected: T, actual: T, tolerance: T) !void {
switch (@typeInfo(T)) {
.float => if (!math.approxEqRel(T, expected, actual, tolerance)) {
.float, .comptime_float => if (!math.approxEqRel(T, expected, actual, tolerance)) {
print("actual {}, not within relative tolerance {} of expected {}\n", .{ actual, tolerance, expected });
return error.TestExpectedApproxEqRel;
},
.comptime_float => @compileError("Cannot approximately compare two comptime_float values"),
else => @compileError("Unable to compare non floating point values"),
}
}
test expectApproxEqRel {
inline for ([_]type{ f16, f32, f64, f128 }) |T| {
inline for ([_]type{ f16, f32, f64, f128, comptime_float }) |T| {
const eps_value = comptime math.floatEps(T);
const sqrt_eps_value = comptime @sqrt(eps_value);

View file

@ -176,7 +176,7 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
try testArgs(u1025, 0x1dea81169800bac2f3afcf3be5dbd2d8eefbace8a24a2da0a383a928d1109459f34028be4413119f1af00ad90ce4d63064016dc1cee5b783c79c1998a0a49de21c4db71d432273576503589fc966c7ec2d730fa9bc4c5ff3128a82653ab8149528de67804718e39722f89b91c75d012ea41c642c889f0db95c882a9790a5e922f, 0x156fe02946ab9069a644dcc1f2b1afa04ee88ab1de19575a2715abf4a52bf374d297fdf78455ccdb87a934d3d818d774b63865eaedfdad3c56a56b8fcc62703c391aedf16cf770af06d7d205f93778c012df54fe5290084e1cd2bbec86a2f295cdce69a2cd774e064580f3c9cfae60d17b12f610e86566e68d5183d706c8ad8af);
}
fn testFloats() !void {
@setEvalBranchQuota(21_700);
@setEvalBranchQuota(25_000);
try testArgs(f16, -nan(f16), -nan(f16));
try testArgs(f16, -nan(f16), -inf(f16));
@ -4646,7 +4646,7 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
});
}
fn testFloatVectors() !void {
@setEvalBranchQuota(21_700);
@setEvalBranchQuota(25_000);
try testArgs(@Vector(1, f16), .{
-tmin(f16),

View file

@ -3133,7 +3133,7 @@ fn cast(comptime op: anytype, comptime opts: struct { compare: Compare = .relaxe
try testArgs(i1024, u1025, 1 << 1024);
}
fn testFloats() !void {
@setEvalBranchQuota(3_100);
@setEvalBranchQuota(3_500);
try testArgs(f16, f16, -nan(f16));
try testArgs(f16, f16, -inf(f16));
@ -6387,7 +6387,7 @@ fn cast(comptime op: anytype, comptime opts: struct { compare: Compare = .relaxe
try testArgs(@Vector(3, i1024), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
}
fn testFloatVectors() !void {
@setEvalBranchQuota(6_700);
@setEvalBranchQuota(7_500);
try testArgs(@Vector(1, f16), @Vector(1, f16), .{
1e0,
@ -6890,7 +6890,7 @@ fn cast(comptime op: anytype, comptime opts: struct { compare: Compare = .relaxe
});
}
fn testIntsFromFloats() !void {
@setEvalBranchQuota(2_600);
@setEvalBranchQuota(2_700);
try testArgs(i8, f16, -0x0.8p8);
try testArgs(i8, f16, next(f16, -0x0.8p8, -0.0));