mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
Merge 109dc4680f into 9473011052
This commit is contained in:
commit
f66ea7f383
16 changed files with 288 additions and 193 deletions
|
|
@ -540,10 +540,7 @@ pub fn parseIntSizeSuffix(buf: []const u8, digit_base: u8) ParseIntError!usize {
|
|||
} else if (without_i.len != without_B.len) {
|
||||
return error.InvalidCharacter;
|
||||
}
|
||||
const multiplier = math.powi(usize, magnitude_base, orders_of_magnitude) catch |err| switch (err) {
|
||||
error.Underflow => unreachable,
|
||||
error.Overflow => return error.Overflow,
|
||||
};
|
||||
const multiplier = try math.powi(usize, magnitude_base, orders_of_magnitude);
|
||||
const number = try std.fmt.parseInt(usize, without_suffix, digit_base);
|
||||
return math.mul(usize, number, multiplier);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -81,8 +81,10 @@ pub fn approxEqAbs(comptime T: type, x: T, y: T, tolerance: T) bool {
|
|||
if (x == y)
|
||||
return true;
|
||||
|
||||
if (isNan(x) or isNan(y))
|
||||
if (isNan(x) or isNan(y)) {
|
||||
if (T == comptime_float) unreachable;
|
||||
return false;
|
||||
}
|
||||
|
||||
return @abs(x - y) <= tolerance;
|
||||
}
|
||||
|
|
@ -109,8 +111,10 @@ pub fn approxEqRel(comptime T: type, x: T, y: T, tolerance: T) bool {
|
|||
if (x == y)
|
||||
return true;
|
||||
|
||||
if (isNan(x) or isNan(y))
|
||||
if (isNan(x) or isNan(y)) {
|
||||
if (T == comptime_float) unreachable;
|
||||
return false;
|
||||
}
|
||||
|
||||
return @abs(x - y) <= @max(@abs(x), @abs(y)) * tolerance;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -484,6 +484,7 @@ fn toFloat(comptime Float: type) !void {
|
|||
);
|
||||
}
|
||||
test toFloat {
|
||||
@setEvalBranchQuota(1_100);
|
||||
if (builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/24191
|
||||
try toFloat(f16);
|
||||
try toFloat(f32);
|
||||
|
|
|
|||
|
|
@ -117,21 +117,28 @@ pub fn FloatRepr(comptime Float: type) type {
|
|||
|
||||
/// Creates a raw "1.0" mantissa for floating point type T. Used to dedupe f80 logic.
|
||||
inline fn mantissaOne(comptime T: type) comptime_int {
|
||||
if (T == comptime_float) return 0;
|
||||
return if (@typeInfo(T).float.bits == 80) 1 << floatFractionalBits(T) else 0;
|
||||
}
|
||||
|
||||
/// Creates floating point type T from an unbiased exponent and raw mantissa.
|
||||
inline fn reconstructFloat(comptime T: type, comptime exponent: comptime_int, comptime mantissa: comptime_int) T {
|
||||
const TBits = @Type(.{ .int = .{ .signedness = .unsigned, .bits = @bitSizeOf(T) } });
|
||||
const biased_exponent = @as(TBits, exponent + floatExponentMax(T));
|
||||
return @as(T, @bitCast((biased_exponent << floatMantissaBits(T)) | @as(TBits, mantissa)));
|
||||
const UBits, const FBits = switch (@typeInfo(T)) {
|
||||
.float => |float| .{ std.meta.Int(.unsigned, float.bits), T },
|
||||
.comptime_float => .{ std.meta.Int(.unsigned, 128), f128 },
|
||||
else => unreachable,
|
||||
};
|
||||
const biased_exponent = @as(UBits, exponent + floatExponentMax(T));
|
||||
return @as(T, @as(FBits, @bitCast((biased_exponent << floatMantissaBits(T)) | @as(UBits, mantissa))));
|
||||
}
|
||||
|
||||
/// Returns the number of bits in the exponent of floating point type T.
|
||||
pub inline fn floatExponentBits(comptime T: type) comptime_int {
|
||||
comptime assert(@typeInfo(T) == .float);
|
||||
const info = @typeInfo(T);
|
||||
comptime assert(info == .float or info == .comptime_float);
|
||||
|
||||
return switch (@typeInfo(T).float.bits) {
|
||||
if (info == .comptime_float) return 15;
|
||||
return switch (info.float.bits) {
|
||||
16 => 5,
|
||||
32 => 8,
|
||||
64 => 11,
|
||||
|
|
@ -143,9 +150,11 @@ pub inline fn floatExponentBits(comptime T: type) comptime_int {
|
|||
|
||||
/// Returns the number of bits in the mantissa of floating point type T.
|
||||
pub inline fn floatMantissaBits(comptime T: type) comptime_int {
|
||||
comptime assert(@typeInfo(T) == .float);
|
||||
const info = @typeInfo(T);
|
||||
comptime assert(info == .float or info == .comptime_float);
|
||||
|
||||
return switch (@typeInfo(T).float.bits) {
|
||||
if (info == .comptime_float) return 112;
|
||||
return switch (info.float.bits) {
|
||||
16 => 10,
|
||||
32 => 23,
|
||||
64 => 52,
|
||||
|
|
@ -157,12 +166,14 @@ pub inline fn floatMantissaBits(comptime T: type) comptime_int {
|
|||
|
||||
/// Returns the number of fractional bits in the mantissa of floating point type T.
|
||||
pub inline fn floatFractionalBits(comptime T: type) comptime_int {
|
||||
comptime assert(@typeInfo(T) == .float);
|
||||
const info = @typeInfo(T);
|
||||
comptime assert(info == .float or info == .comptime_float);
|
||||
|
||||
// standard IEEE floats have an implicit 0.m or 1.m integer part
|
||||
// f80 is special and has an explicitly stored bit in the MSB
|
||||
// this function corresponds to `MANT_DIG - 1' from C
|
||||
return switch (@typeInfo(T).float.bits) {
|
||||
if (info == .comptime_float) return 112;
|
||||
return switch (info.float.bits) {
|
||||
16 => 10,
|
||||
32 => 23,
|
||||
64 => 52,
|
||||
|
|
@ -208,36 +219,37 @@ pub inline fn floatEps(comptime T: type) T {
|
|||
/// Returns the local epsilon of floating point type T.
|
||||
pub inline fn floatEpsAt(comptime T: type, x: T) T {
|
||||
switch (@typeInfo(T)) {
|
||||
.float => |F| {
|
||||
const U: type = @Type(.{ .int = .{ .signedness = .unsigned, .bits = F.bits } });
|
||||
.float => |float| {
|
||||
const U = std.meta.Int(.unsigned, float.bits);
|
||||
const u: U = @bitCast(x);
|
||||
const y: T = @bitCast(u ^ 1);
|
||||
return @abs(x - y);
|
||||
},
|
||||
.comptime_float => {
|
||||
const u: u128 = @bitCast(@as(f128, x));
|
||||
const y: f128 = @bitCast(u ^ 1);
|
||||
return @as(comptime_float, @abs(x - y));
|
||||
},
|
||||
else => @compileError("floatEpsAt only supports floats"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the inf value for a floating point `Type`.
|
||||
pub inline fn inf(comptime Type: type) Type {
|
||||
const RuntimeType = switch (Type) {
|
||||
else => Type,
|
||||
comptime_float => f128, // any float type will do
|
||||
return switch (@typeInfo(Type)) {
|
||||
.float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type)),
|
||||
.comptime_float => @compileError("comptime_float cannot be infinity"),
|
||||
else => @compileError("unknown floating point type " ++ @typeName(Type)),
|
||||
};
|
||||
return reconstructFloat(RuntimeType, floatExponentMax(RuntimeType) + 1, mantissaOne(RuntimeType));
|
||||
}
|
||||
|
||||
/// Returns the canonical quiet NaN representation for a floating point `Type`.
|
||||
pub inline fn nan(comptime Type: type) Type {
|
||||
const RuntimeType = switch (Type) {
|
||||
else => Type,
|
||||
comptime_float => f128, // any float type will do
|
||||
return switch (@typeInfo(Type)) {
|
||||
.float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type) | 1 << (floatFractionalBits(Type) - 1)),
|
||||
.comptime_float => @compileError("comptime_float cannot be NaN"),
|
||||
else => @compileError("unknown floating point type " ++ @typeName(Type)),
|
||||
};
|
||||
return reconstructFloat(
|
||||
RuntimeType,
|
||||
floatExponentMax(RuntimeType) + 1,
|
||||
mantissaOne(RuntimeType) | 1 << (floatFractionalBits(RuntimeType) - 1),
|
||||
);
|
||||
}
|
||||
|
||||
/// Returns a signalling NaN representation for a floating point `Type`.
|
||||
|
|
@ -245,21 +257,20 @@ pub inline fn nan(comptime Type: type) Type {
|
|||
/// TODO: LLVM is known to miscompile on some architectures to quiet NaN -
|
||||
/// this is tracked by https://github.com/ziglang/zig/issues/14366
|
||||
pub inline fn snan(comptime Type: type) Type {
|
||||
const RuntimeType = switch (Type) {
|
||||
else => Type,
|
||||
comptime_float => f128, // any float type will do
|
||||
return switch (@typeInfo(Type)) {
|
||||
.float => reconstructFloat(Type, floatExponentMax(Type) + 1, mantissaOne(Type) | 1 << (floatFractionalBits(Type) - 2)),
|
||||
.comptime_float => @compileError("comptime_float cannot be NaN"),
|
||||
else => @compileError("unknown floating point type " ++ @typeName(Type)),
|
||||
};
|
||||
return reconstructFloat(
|
||||
RuntimeType,
|
||||
floatExponentMax(RuntimeType) + 1,
|
||||
mantissaOne(RuntimeType) | 1 << (floatFractionalBits(RuntimeType) - 2),
|
||||
);
|
||||
}
|
||||
|
||||
fn floatBits(comptime Type: type) !void {
|
||||
// (1 +) for the sign bit, since it is separate from the other bits
|
||||
const size = 1 + floatExponentBits(Type) + floatMantissaBits(Type);
|
||||
try expect(@bitSizeOf(Type) == size);
|
||||
if (@typeInfo(Type) == .float)
|
||||
try expect(@bitSizeOf(Type) == size)
|
||||
else
|
||||
try expect(128 == size);
|
||||
try expect(floatFractionalBits(Type) <= floatMantissaBits(Type));
|
||||
|
||||
// for machine epsilon, assert expmin <= -prec <= expmax
|
||||
|
|
@ -273,6 +284,8 @@ test floatBits {
|
|||
try floatBits(f80);
|
||||
try floatBits(f128);
|
||||
try floatBits(c_longdouble);
|
||||
try floatBits(comptime_float);
|
||||
try comptime floatBits(comptime_float);
|
||||
}
|
||||
|
||||
test inf {
|
||||
|
|
@ -281,11 +294,11 @@ test inf {
|
|||
const inf_u64: u64 = 0x7FF0000000000000;
|
||||
const inf_u80: u80 = 0x7FFF8000000000000000;
|
||||
const inf_u128: u128 = 0x7FFF0000000000000000000000000000;
|
||||
try expectEqual(inf_u16, @as(u16, @bitCast(inf(f16))));
|
||||
try expectEqual(inf_u32, @as(u32, @bitCast(inf(f32))));
|
||||
try expectEqual(inf_u64, @as(u64, @bitCast(inf(f64))));
|
||||
try expectEqual(inf_u80, @as(u80, @bitCast(inf(f80))));
|
||||
try expectEqual(inf_u128, @as(u128, @bitCast(inf(f128))));
|
||||
try expect(inf_u16 == @as(u16, @bitCast(inf(f16))));
|
||||
try expect(inf_u32 == @as(u32, @bitCast(inf(f32))));
|
||||
try expect(inf_u64 == @as(u64, @bitCast(inf(f64))));
|
||||
try expect(inf_u80 == @as(u80, @bitCast(inf(f80))));
|
||||
try expect(inf_u128 == @as(u128, @bitCast(inf(f128))));
|
||||
}
|
||||
|
||||
test nan {
|
||||
|
|
@ -294,11 +307,11 @@ test nan {
|
|||
const qnan_u64: u64 = 0x7FF8000000000000;
|
||||
const qnan_u80: u80 = 0x7FFFC000000000000000;
|
||||
const qnan_u128: u128 = 0x7FFF8000000000000000000000000000;
|
||||
try expectEqual(qnan_u16, @as(u16, @bitCast(nan(f16))));
|
||||
try expectEqual(qnan_u32, @as(u32, @bitCast(nan(f32))));
|
||||
try expectEqual(qnan_u64, @as(u64, @bitCast(nan(f64))));
|
||||
try expectEqual(qnan_u80, @as(u80, @bitCast(nan(f80))));
|
||||
try expectEqual(qnan_u128, @as(u128, @bitCast(nan(f128))));
|
||||
try expect(qnan_u16 == @as(u16, @bitCast(nan(f16))));
|
||||
try expect(qnan_u32 == @as(u32, @bitCast(nan(f32))));
|
||||
try expect(qnan_u64 == @as(u64, @bitCast(nan(f64))));
|
||||
try expect(qnan_u80 == @as(u80, @bitCast(nan(f80))));
|
||||
try expect(qnan_u128 == @as(u128, @bitCast(nan(f128))));
|
||||
}
|
||||
|
||||
test snan {
|
||||
|
|
@ -307,9 +320,9 @@ test snan {
|
|||
const snan_u64: u64 = 0x7FF4000000000000;
|
||||
const snan_u80: u80 = 0x7FFFA000000000000000;
|
||||
const snan_u128: u128 = 0x7FFF4000000000000000000000000000;
|
||||
try expectEqual(snan_u16, @as(u16, @bitCast(snan(f16))));
|
||||
try expectEqual(snan_u32, @as(u32, @bitCast(snan(f32))));
|
||||
try expectEqual(snan_u64, @as(u64, @bitCast(snan(f64))));
|
||||
try expectEqual(snan_u80, @as(u80, @bitCast(snan(f80))));
|
||||
try expectEqual(snan_u128, @as(u128, @bitCast(snan(f128))));
|
||||
try expect(snan_u16 == @as(u16, @bitCast(snan(f16))));
|
||||
try expect(snan_u32 == @as(u32, @bitCast(snan(f32))));
|
||||
try expect(snan_u64 == @as(u64, @bitCast(snan(f64))));
|
||||
try expect(snan_u80 == @as(u80, @bitCast(snan(f80))));
|
||||
try expect(snan_u128 == @as(u128, @bitCast(snan(f128))));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
const std = @import("../std.zig");
|
||||
const math = std.math;
|
||||
const assert = std.debug.assert;
|
||||
const expect = std.testing.expect;
|
||||
const expectEqual = std.testing.expectEqual;
|
||||
const expectApproxEqAbs = std.testing.expectApproxEqAbs;
|
||||
|
|
@ -20,8 +21,10 @@ pub fn Frexp(comptime T: type) type {
|
|||
/// - frexp(nan) = nan, undefined
|
||||
pub fn frexp(x: anytype) Frexp(@TypeOf(x)) {
|
||||
const T: type = @TypeOf(x);
|
||||
const info = @typeInfo(T);
|
||||
comptime assert(info == .float or info == .comptime_float);
|
||||
|
||||
const bits: comptime_int = @typeInfo(T).float.bits;
|
||||
const bits: comptime_int = if (info == .float) info.float.bits else 128;
|
||||
const Int: type = std.meta.Int(.unsigned, bits);
|
||||
|
||||
const exp_bits: comptime_int = math.floatExponentBits(T);
|
||||
|
|
@ -43,7 +46,7 @@ pub fn frexp(x: anytype) Frexp(@TypeOf(x)) {
|
|||
const extra_denorm_shift: comptime_int = 1 - ones_place;
|
||||
|
||||
var result: Frexp(T) = undefined;
|
||||
var v: Int = @bitCast(x);
|
||||
var v: Int = if (info == .float) @bitCast(x) else @bitCast(@as(f128, x));
|
||||
|
||||
const m: MantInt = @truncate(v);
|
||||
const e: ExpInt = @truncate(v >> mant_bits);
|
||||
|
|
@ -81,7 +84,7 @@ pub fn frexp(x: anytype) Frexp(@TypeOf(x)) {
|
|||
},
|
||||
}
|
||||
|
||||
result.significand = @bitCast(v);
|
||||
result.significand = if (info == .float) @bitCast(v) else @as(f128, @bitCast(v));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -91,23 +94,22 @@ fn FrexpTests(comptime Float: type) type {
|
|||
const T = Float;
|
||||
test "normal" {
|
||||
const epsilon = 1e-6;
|
||||
var r: Frexp(T) = undefined;
|
||||
|
||||
r = frexp(@as(T, 1.3));
|
||||
try expectApproxEqAbs(0.65, r.significand, epsilon);
|
||||
try expectEqual(1, r.exponent);
|
||||
const r1 = frexp(@as(T, 1.3));
|
||||
try expectApproxEqAbs(0.65, r1.significand, epsilon);
|
||||
try expectEqual(1, r1.exponent);
|
||||
|
||||
r = frexp(@as(T, 78.0234));
|
||||
try expectApproxEqAbs(0.609558, r.significand, epsilon);
|
||||
try expectEqual(7, r.exponent);
|
||||
const r2 = frexp(@as(T, 78.0234));
|
||||
try expectApproxEqAbs(0.609558, r2.significand, epsilon);
|
||||
try expectEqual(7, r2.exponent);
|
||||
|
||||
r = frexp(@as(T, -1234.5678));
|
||||
try expectEqual(11, r.exponent);
|
||||
try expectApproxEqAbs(-0.602816, r.significand, epsilon);
|
||||
const r3 = frexp(@as(T, -1234.5678));
|
||||
try expectEqual(11, r3.exponent);
|
||||
try expectApproxEqAbs(-0.602816, r3.significand, epsilon);
|
||||
}
|
||||
test "max" {
|
||||
const exponent = math.floatExponentMax(T) + 1;
|
||||
const significand = 1.0 - math.floatEps(T) / 2;
|
||||
const significand = 1.0 - math.floatEps(T) / 2.0;
|
||||
const r: Frexp(T) = frexp(math.floatMax(T));
|
||||
try expectEqual(exponent, r.exponent);
|
||||
try expectEqual(significand, r.significand);
|
||||
|
|
@ -126,17 +128,16 @@ fn FrexpTests(comptime Float: type) type {
|
|||
try expectEqual(0.5, r.significand);
|
||||
}
|
||||
test "zero" {
|
||||
var r: Frexp(T) = undefined;
|
||||
const r1 = frexp(@as(T, 0.0));
|
||||
try expectEqual(0, r1.exponent);
|
||||
try expect(math.isPositiveZero(r1.significand));
|
||||
|
||||
r = frexp(@as(T, 0.0));
|
||||
try expectEqual(0, r.exponent);
|
||||
try expect(math.isPositiveZero(r.significand));
|
||||
|
||||
r = frexp(@as(T, -0.0));
|
||||
try expectEqual(0, r.exponent);
|
||||
try expect(math.isNegativeZero(r.significand));
|
||||
const r2 = frexp(@as(T, -0.0));
|
||||
try expectEqual(0, r2.exponent);
|
||||
try expect(math.isNegativeZero(r2.significand));
|
||||
}
|
||||
test "inf" {
|
||||
if (T == comptime_float) return;
|
||||
var r: Frexp(T) = undefined;
|
||||
|
||||
r = frexp(math.inf(T));
|
||||
|
|
@ -148,6 +149,7 @@ fn FrexpTests(comptime Float: type) type {
|
|||
try expect(math.isNegativeInf(r.significand));
|
||||
}
|
||||
test "nan" {
|
||||
if (T == comptime_float) return;
|
||||
const r: Frexp(T) = frexp(math.nan(T));
|
||||
try expect(math.isNan(r.significand));
|
||||
}
|
||||
|
|
@ -156,53 +158,64 @@ fn FrexpTests(comptime Float: type) type {
|
|||
|
||||
// Generate tests for each floating point type
|
||||
comptime {
|
||||
for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
|
||||
for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
|
||||
_ = FrexpTests(T);
|
||||
}
|
||||
}
|
||||
|
||||
test frexp {
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
|
||||
@setEvalBranchQuota(1_500);
|
||||
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
|
||||
const max_exponent = math.floatExponentMax(T) + 1;
|
||||
const min_exponent = math.floatExponentMin(T) + 1;
|
||||
const truemin_exponent = min_exponent - math.floatFractionalBits(T);
|
||||
|
||||
var result: Frexp(T) = undefined;
|
||||
comptime var x: T = undefined;
|
||||
|
||||
// basic usage
|
||||
// value -> {significand, exponent},
|
||||
// value == significand * (2 ^ exponent)
|
||||
x = 1234.5678;
|
||||
result = frexp(x);
|
||||
try expectEqual(11, result.exponent);
|
||||
try expectApproxEqAbs(0.602816, result.significand, 1e-6);
|
||||
try expectEqual(x, math.ldexp(result.significand, result.exponent));
|
||||
const x1 = 1234.5678;
|
||||
const result1 = frexp(x1);
|
||||
try expectEqual(11, result1.exponent);
|
||||
try expectApproxEqAbs(0.602816, result1.significand, 1e-6);
|
||||
try expectEqual(x1, math.ldexp(result1.significand, result1.exponent));
|
||||
|
||||
// float maximum
|
||||
x = math.floatMax(T);
|
||||
result = frexp(x);
|
||||
try expectEqual(max_exponent, result.exponent);
|
||||
try expectEqual(1.0 - math.floatEps(T) / 2, result.significand);
|
||||
try expectEqual(x, math.ldexp(result.significand, result.exponent));
|
||||
const x2 = math.floatMax(T);
|
||||
const result2 = frexp(x2);
|
||||
try expectEqual(max_exponent, result2.exponent);
|
||||
try expectEqual(1.0 - math.floatEps(T) / 2.0, result2.significand);
|
||||
try expectEqual(x2, math.ldexp(result2.significand, result2.exponent));
|
||||
|
||||
// float minimum
|
||||
x = math.floatMin(T);
|
||||
result = frexp(x);
|
||||
try expectEqual(min_exponent, result.exponent);
|
||||
try expectEqual(0.5, result.significand);
|
||||
try expectEqual(x, math.ldexp(result.significand, result.exponent));
|
||||
const x3 = math.floatMin(T);
|
||||
const result3 = frexp(x3);
|
||||
try expectEqual(min_exponent, result3.exponent);
|
||||
try expectEqual(0.5, result3.significand);
|
||||
try expectEqual(x3, math.ldexp(result3.significand, result3.exponent));
|
||||
|
||||
// float true minimum
|
||||
// subnormal -> {normal, exponent}
|
||||
x = math.floatTrueMin(T);
|
||||
result = frexp(x);
|
||||
try expectEqual(truemin_exponent, result.exponent);
|
||||
try expectEqual(0.5, result.significand);
|
||||
try expectEqual(x, math.ldexp(result.significand, result.exponent));
|
||||
const x4 = math.floatTrueMin(T);
|
||||
const result4 = frexp(x4);
|
||||
try expectEqual(truemin_exponent, result4.exponent);
|
||||
try expectEqual(0.5, result4.significand);
|
||||
try expectEqual(x4, math.ldexp(result4.significand, result4.exponent));
|
||||
|
||||
// zero -> {zero, zero} (+)
|
||||
const result5 = frexp(@as(T, 0.0));
|
||||
try expectEqual(0, result5.exponent);
|
||||
try expect(math.isPositiveZero(result5.significand));
|
||||
|
||||
// zero -> {zero, zero} (-)
|
||||
const result6 = frexp(@as(T, -0.0));
|
||||
try expectEqual(0, result6.exponent);
|
||||
try expect(math.isNegativeZero(result6.significand));
|
||||
|
||||
if (T == comptime_float) return;
|
||||
|
||||
// infinity -> {infinity, zero} (+)
|
||||
result = frexp(math.inf(T));
|
||||
var result = frexp(math.inf(T));
|
||||
try expectEqual(0, result.exponent);
|
||||
try expect(math.isPositiveInf(result.significand));
|
||||
|
||||
|
|
@ -211,16 +224,6 @@ test frexp {
|
|||
try expectEqual(0, result.exponent);
|
||||
try expect(math.isNegativeInf(result.significand));
|
||||
|
||||
// zero -> {zero, zero} (+)
|
||||
result = frexp(@as(T, 0.0));
|
||||
try expectEqual(0, result.exponent);
|
||||
try expect(math.isPositiveZero(result.significand));
|
||||
|
||||
// zero -> {zero, zero} (-)
|
||||
result = frexp(@as(T, -0.0));
|
||||
try expectEqual(0, result.exponent);
|
||||
try expect(math.isNegativeZero(result.significand));
|
||||
|
||||
// nan -> {nan, undefined}
|
||||
result = frexp(math.nan(T));
|
||||
try expect(math.isNan(result.significand));
|
||||
|
|
|
|||
|
|
@ -4,14 +4,14 @@ const expect = std.testing.expect;
|
|||
|
||||
/// Returns whether x is a finite value.
|
||||
pub fn isFinite(x: anytype) bool {
|
||||
const T = @TypeOf(x);
|
||||
const T = if (@TypeOf(x) == comptime_float) f128 else @TypeOf(x);
|
||||
const TBits = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
|
||||
const remove_sign = ~@as(TBits, 0) >> 1;
|
||||
return @as(TBits, @bitCast(x)) & remove_sign < @as(TBits, @bitCast(math.inf(T)));
|
||||
return @as(TBits, @bitCast(@as(T, x))) & remove_sign < @as(TBits, @bitCast(math.inf(T)));
|
||||
}
|
||||
|
||||
test isFinite {
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
|
||||
// normals
|
||||
try expect(isFinite(@as(T, 1.0)));
|
||||
try expect(isFinite(-@as(T, 1.0)));
|
||||
|
|
@ -25,6 +25,8 @@ test isFinite {
|
|||
try expect(isFinite(math.floatMin(T)));
|
||||
try expect(isFinite(math.floatMax(T)));
|
||||
|
||||
if (T == comptime_float) return;
|
||||
|
||||
// inf & nan
|
||||
try expect(!isFinite(math.inf(T)));
|
||||
try expect(!isFinite(-math.inf(T)));
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ const expect = std.testing.expect;
|
|||
|
||||
/// Returns whether x is neither zero, subnormal, infinity, or NaN.
|
||||
pub fn isNormal(x: anytype) bool {
|
||||
const T = @TypeOf(x);
|
||||
const T = if (@TypeOf(x) == comptime_float) f128 else @TypeOf(x);
|
||||
const TBits = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
|
||||
|
||||
const increment_exp = 1 << math.floatMantissaBits(T);
|
||||
|
|
@ -15,14 +15,14 @@ pub fn isNormal(x: anytype) bool {
|
|||
// The sign bit is removed because all ones would overflow into it.
|
||||
// For f80, even though it has an explicit integer part stored,
|
||||
// the exponent effectively takes priority if mismatching.
|
||||
const value = @as(TBits, @bitCast(x)) +% increment_exp;
|
||||
const value = @as(TBits, @bitCast(@as(T, x))) +% increment_exp;
|
||||
return value & remove_sign >= (increment_exp << 1);
|
||||
}
|
||||
|
||||
test isNormal {
|
||||
// TODO add `c_longdouble' when math.inf(T) supports it
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
|
||||
const TBits = std.meta.Int(.unsigned, @bitSizeOf(T));
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
|
||||
const TBits = if (T == comptime_float) u128 else std.meta.Int(.unsigned, @bitSizeOf(T));
|
||||
|
||||
// normals
|
||||
try expect(isNormal(@as(T, 1.0)));
|
||||
|
|
@ -35,7 +35,10 @@ test isNormal {
|
|||
try expect(!isNormal(@as(T, math.floatTrueMin(T))));
|
||||
|
||||
// largest subnormal
|
||||
try expect(!isNormal(@as(T, @bitCast(~(~@as(TBits, 0) << math.floatFractionalBits(T))))));
|
||||
const large_subnormal: if (T == comptime_float) f128 else T = @bitCast(~(~@as(TBits, 0) << math.floatFractionalBits(T)));
|
||||
try expect(!isNormal(@as(T, large_subnormal)));
|
||||
|
||||
if (T == comptime_float) return;
|
||||
|
||||
// non-finite numbers
|
||||
try expect(!isNormal(-math.inf(T)));
|
||||
|
|
|
|||
|
|
@ -5,36 +5,50 @@ const expect = std.testing.expect;
|
|||
/// Returns whether x is positive zero.
|
||||
pub inline fn isPositiveZero(x: anytype) bool {
|
||||
const T = @TypeOf(x);
|
||||
const bit_count = @typeInfo(T).float.bits;
|
||||
const bit_count, const F = switch (@typeInfo(T)) {
|
||||
.float => |float| .{ float.bits, T },
|
||||
.comptime_float => .{ 128, f128 },
|
||||
else => @compileError("unknown floating point type " ++ @typeName(T)),
|
||||
};
|
||||
const TBits = std.meta.Int(.unsigned, bit_count);
|
||||
return @as(TBits, @bitCast(x)) == @as(TBits, 0);
|
||||
return @as(TBits, @bitCast(@as(F, x))) == @as(TBits, 0);
|
||||
}
|
||||
|
||||
/// Returns whether x is negative zero.
|
||||
pub inline fn isNegativeZero(x: anytype) bool {
|
||||
const T = @TypeOf(x);
|
||||
const bit_count = @typeInfo(T).float.bits;
|
||||
const bit_count, const F = switch (@typeInfo(T)) {
|
||||
.float => |float| .{ float.bits, T },
|
||||
.comptime_float => .{ 128, f128 },
|
||||
else => @compileError("unknown floating point type " ++ @typeName(T)),
|
||||
};
|
||||
const TBits = std.meta.Int(.unsigned, bit_count);
|
||||
return @as(TBits, @bitCast(x)) == @as(TBits, 1) << (bit_count - 1);
|
||||
return @as(TBits, @bitCast(@as(F, x))) == @as(TBits, 1) << (bit_count - 1);
|
||||
}
|
||||
|
||||
test isPositiveZero {
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
|
||||
try expect(isPositiveZero(@as(T, 0.0)));
|
||||
try expect(!isPositiveZero(@as(T, -0.0)));
|
||||
try expect(!isPositiveZero(math.floatMin(T)));
|
||||
try expect(!isPositiveZero(math.floatMax(T)));
|
||||
|
||||
if (T == comptime_float) return;
|
||||
|
||||
try expect(!isPositiveZero(math.inf(T)));
|
||||
try expect(!isPositiveZero(-math.inf(T)));
|
||||
}
|
||||
}
|
||||
|
||||
test isNegativeZero {
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| {
|
||||
inline for ([_]type{ f16, f32, f64, f80, f128, comptime_float }) |T| {
|
||||
try expect(isNegativeZero(@as(T, -0.0)));
|
||||
try expect(!isNegativeZero(@as(T, 0.0)));
|
||||
try expect(!isNegativeZero(math.floatMin(T)));
|
||||
try expect(!isNegativeZero(math.floatMax(T)));
|
||||
|
||||
if (T == comptime_float) return;
|
||||
|
||||
try expect(!isNegativeZero(math.inf(T)));
|
||||
try expect(!isNegativeZero(-math.inf(T)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,11 @@ const expect = std.testing.expect;
|
|||
/// Returns x * 2^n.
|
||||
pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
|
||||
const T = @TypeOf(x);
|
||||
const TBits = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
|
||||
const TBits = switch (@typeInfo(T)) {
|
||||
.float => |float| std.meta.Int(.unsigned, float.bits),
|
||||
.comptime_float => u128,
|
||||
else => @compileError("unknown floating point type " ++ @typeName(T)),
|
||||
};
|
||||
|
||||
const exponent_bits = math.floatExponentBits(T);
|
||||
const mantissa_bits = math.floatMantissaBits(T);
|
||||
|
|
@ -16,11 +20,13 @@ pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
|
|||
const max_biased_exponent = 2 * math.floatExponentMax(T);
|
||||
const mantissa_mask = @as(TBits, (1 << mantissa_bits) - 1);
|
||||
|
||||
const repr = @as(TBits, @bitCast(x));
|
||||
const repr = bitCastAs(TBits, x);
|
||||
const sign_bit = repr & (1 << (exponent_bits + mantissa_bits));
|
||||
|
||||
if (math.isNan(x) or !math.isFinite(x))
|
||||
if (math.isNan(x) or !math.isFinite(x)) {
|
||||
if (T == comptime_float) unreachable;
|
||||
return x;
|
||||
}
|
||||
|
||||
var exponent: i32 = @as(i32, @intCast((repr << 1) >> (mantissa_bits + 1)));
|
||||
if (exponent == 0)
|
||||
|
|
@ -29,23 +35,23 @@ pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
|
|||
if (n >= 0) {
|
||||
if (n > max_biased_exponent - exponent) {
|
||||
// Overflow. Return +/- inf
|
||||
return @as(T, @bitCast(@as(TBits, @bitCast(math.inf(T))) | sign_bit));
|
||||
return bitCastAs(T, bitCastAs(TBits, math.inf(T)) | sign_bit);
|
||||
} else if (exponent + n <= 0) {
|
||||
// Result is subnormal
|
||||
return @as(T, @bitCast((repr << @as(Log2Int(TBits), @intCast(n))) | sign_bit));
|
||||
return bitCastAs(T, (repr << @as(Log2Int(TBits), @intCast(n))) | sign_bit);
|
||||
} else if (exponent <= 0) {
|
||||
// Result is normal, but needs shifting
|
||||
var result = @as(TBits, @intCast(n + exponent)) << mantissa_bits;
|
||||
result |= (repr << @as(Log2Int(TBits), @intCast(1 - exponent))) & mantissa_mask;
|
||||
return @as(T, @bitCast(result | sign_bit));
|
||||
return bitCastAs(T, result | sign_bit);
|
||||
}
|
||||
|
||||
// Result needs no shifting
|
||||
return @as(T, @bitCast(repr + (@as(TBits, @intCast(n)) << mantissa_bits)));
|
||||
return bitCastAs(T, repr + (@as(TBits, @intCast(n)) << mantissa_bits));
|
||||
} else {
|
||||
if (n <= -exponent) {
|
||||
if (n < -(mantissa_bits + exponent))
|
||||
return @as(T, @bitCast(sign_bit)); // Severe underflow. Return +/- 0
|
||||
return bitCastAs(T, sign_bit); // Severe underflow. Return +/- 0
|
||||
|
||||
// Result underflowed, we need to shift and round
|
||||
const shift = @as(Log2Int(TBits), @intCast(@min(-n, -(exponent + n) + 1)));
|
||||
|
|
@ -58,14 +64,22 @@ pub fn ldexp(x: anytype, n: i32) @TypeOf(x) {
|
|||
|
||||
// Round result, including round-to-even for exact ties
|
||||
result = ((result + 1) >> 1) & ~@as(TBits, @intFromBool(exact_tie));
|
||||
return @as(T, @bitCast(result | sign_bit));
|
||||
return bitCastAs(T, result | sign_bit);
|
||||
}
|
||||
|
||||
// Result is exact, and needs no shifting
|
||||
return @as(T, @bitCast(repr - (@as(TBits, @intCast(-n)) << mantissa_bits)));
|
||||
return bitCastAs(T, repr - (@as(TBits, @intCast(-n)) << mantissa_bits));
|
||||
}
|
||||
}
|
||||
|
||||
inline fn bitCastAs(comptime T: type, x: anytype) T {
|
||||
const y = if (@TypeOf(x) == comptime_float) @as(f128, x) else x;
|
||||
return switch (T) {
|
||||
comptime_float => @as(T, @as(f128, @bitCast(y))),
|
||||
else => @as(T, @bitCast(y)),
|
||||
};
|
||||
}
|
||||
|
||||
test ldexp {
|
||||
// subnormals
|
||||
try expect(ldexp(@as(f16, 0x1.1FFp14), -14 - 9 - 15) == math.floatTrueMin(f16));
|
||||
|
|
@ -73,6 +87,7 @@ test ldexp {
|
|||
try expect(ldexp(@as(f64, 0x1.7FFFFFFFFFFFFp-1), -1022 - 51) == math.floatTrueMin(f64));
|
||||
try expect(ldexp(@as(f80, 0x1.7FFFFFFFFFFFFFFEp-1), -16382 - 62) == math.floatTrueMin(f80));
|
||||
try expect(ldexp(@as(f128, 0x1.7FFFFFFFFFFFFFFFFFFFFFFFFFFFp-1), -16382 - 111) == math.floatTrueMin(f128));
|
||||
try expect(ldexp(@as(comptime_float, 0x1.7FFFFFFFFFFFFFFFFFFFFFFFFFFFp-1), -16382 - 111) == math.floatTrueMin(f128));
|
||||
|
||||
try expect(ldexp(math.floatMax(f32), -128 - 149) > 0.0);
|
||||
try expect(ldexp(math.floatMax(f32), -128 - 149 - 1) == 0.0);
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ test "int" {
|
|||
}
|
||||
|
||||
test "float" {
|
||||
@setEvalBranchQuota(4000);
|
||||
@setEvalBranchQuota(5000);
|
||||
|
||||
// normal -> normal
|
||||
try expect(nextAfter(f16, 0x1.234p0, 2.0) == 0x1.238p0);
|
||||
|
|
|
|||
|
|
@ -31,24 +31,20 @@ const expect = std.testing.expect;
|
|||
/// - pow(-inf, y) = pow(-0, -y)
|
||||
/// - pow(x, y) = nan for finite x < 0 and finite non-integer y
|
||||
pub fn pow(comptime T: type, x: T, y: T) T {
|
||||
if (@typeInfo(T) == .int) {
|
||||
const info = @typeInfo(T);
|
||||
if (info == .int or info == .comptime_int)
|
||||
return math.powi(T, x, y) catch unreachable;
|
||||
}
|
||||
|
||||
if (T != f32 and T != f64) {
|
||||
@compileError("pow not implemented for " ++ @typeName(T));
|
||||
}
|
||||
|
||||
// pow(x, +-0) = 1 for all x
|
||||
// pow(1, y) = 1 for all y
|
||||
if (y == 0 or x == 1) {
|
||||
if (y == 0 or x == 1)
|
||||
return 1;
|
||||
}
|
||||
|
||||
// pow(nan, y) = nan for all y
|
||||
// pow(x, nan) = nan for all x
|
||||
if (math.isNan(x) or math.isNan(y)) {
|
||||
@branchHint(.unlikely);
|
||||
if (info == .comptime_float) unreachable;
|
||||
return math.nan(T);
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +56,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
|
|||
if (x == 0) {
|
||||
if (y < 0) {
|
||||
// pow(+-0, y) = +-inf for y an odd integer
|
||||
if (isOddInteger(y)) {
|
||||
if (isOddInteger(T, y)) {
|
||||
return math.copysign(math.inf(T), x);
|
||||
}
|
||||
// pow(+-0, y) = +inf for y an even integer
|
||||
|
|
@ -68,7 +64,7 @@ pub fn pow(comptime T: type, x: T, y: T) T {
|
|||
return math.inf(T);
|
||||
}
|
||||
} else {
|
||||
if (isOddInteger(y)) {
|
||||
if (isOddInteger(T, y)) {
|
||||
return x;
|
||||
} else {
|
||||
return 0;
|
||||
|
|
@ -77,6 +73,9 @@ pub fn pow(comptime T: type, x: T, y: T) T {
|
|||
}
|
||||
|
||||
if (math.isInf(y)) {
|
||||
@branchHint(.unlikely);
|
||||
if (info == .comptime_float) unreachable;
|
||||
|
||||
// pow(-1, inf) = 1 for all x
|
||||
if (x == -1) {
|
||||
return 1.0;
|
||||
|
|
@ -94,6 +93,9 @@ pub fn pow(comptime T: type, x: T, y: T) T {
|
|||
}
|
||||
|
||||
if (math.isInf(x)) {
|
||||
@branchHint(.unlikely);
|
||||
if (info == .comptime_float) unreachable;
|
||||
|
||||
if (math.isNegativeInf(x)) {
|
||||
return pow(T, 1 / x, -y);
|
||||
}
|
||||
|
|
@ -145,7 +147,12 @@ pub fn pow(comptime T: type, x: T, y: T) T {
|
|||
var xe = r2.exponent;
|
||||
var x1 = r2.significand;
|
||||
|
||||
var i = @as(std.meta.Int(.signed, @typeInfo(T).float.bits), @intFromFloat(yi));
|
||||
const Int = switch (info) {
|
||||
.float => |float| std.meta.Int(.signed, float.bits),
|
||||
.comptime_float => i128,
|
||||
else => @compileError("pow not implemented for " ++ @typeName(T)),
|
||||
};
|
||||
var i = @as(Int, @intFromFloat(yi));
|
||||
while (i != 0) : (i >>= 1) {
|
||||
const overflow_shift = math.floatExponentBits(T) + 1;
|
||||
if (xe < -(1 << overflow_shift) or (1 << overflow_shift) < xe) {
|
||||
|
|
@ -178,25 +185,37 @@ pub fn pow(comptime T: type, x: T, y: T) T {
|
|||
return math.scalbn(a1, ae);
|
||||
}
|
||||
|
||||
fn isOddInteger(x: f64) bool {
|
||||
if (@abs(x) >= 1 << 53) {
|
||||
fn isOddInteger(comptime T: type, x: T) bool {
|
||||
// standard IEEE floats have an implicit 0.m or 1.m integer part
|
||||
// so the digits is the number of fractional bits + 1
|
||||
const digits = math.floatFractionalBits(T) + 1;
|
||||
if (@abs(x) >= digits) {
|
||||
// From https://golang.org/src/math/pow.go
|
||||
// 1 << 53 is the largest exact integer in the float64 format.
|
||||
// 1 << digits is the largest exact integer in the IEEE float format fN.
|
||||
// Any number outside this range will be truncated before the decimal point and therefore will always be
|
||||
// an even integer.
|
||||
// Without this check and if x overflows i64 the @intFromFloat(r.ipart) conversion below will panic
|
||||
// Without this check and if x overflows iN the @intFromFloat(r.ipart) conversion below will panic
|
||||
return false;
|
||||
}
|
||||
const r = math.modf(x);
|
||||
return r.fpart == 0.0 and @as(i64, @intFromFloat(r.ipart)) & 1 == 1;
|
||||
|
||||
const Int = switch (@typeInfo(T)) {
|
||||
.float => |float| std.meta.Int(.signed, float.bits),
|
||||
.comptime_float => i128,
|
||||
else => unreachable,
|
||||
};
|
||||
const ipart: Int = @intFromFloat(r.ipart);
|
||||
return r.fpart == 0.0 and ipart & 1 == 1;
|
||||
}
|
||||
|
||||
test isOddInteger {
|
||||
try expect(isOddInteger(@floatFromInt(math.maxInt(i64) * 2)) == false);
|
||||
try expect(isOddInteger(@floatFromInt(math.maxInt(i64) * 2 + 1)) == false);
|
||||
try expect(isOddInteger(1 << 53) == false);
|
||||
try expect(isOddInteger(12.0) == false);
|
||||
try expect(isOddInteger(15.0) == true);
|
||||
try expect(isOddInteger(f128, @floatFromInt(math.maxInt(i64) * 2)) == false);
|
||||
try expect(isOddInteger(comptime_float, @floatFromInt(math.maxInt(i64) * 2 + 1)) == false);
|
||||
try expect(isOddInteger(f64, 1 << 53) == false);
|
||||
try expect(isOddInteger(f80, 12.0) == false);
|
||||
try expect(isOddInteger(f80, 15.0) == true);
|
||||
try expect(isOddInteger(f32, 5.0) == true);
|
||||
try expect(isOddInteger(f16, -1.0) == true);
|
||||
}
|
||||
|
||||
test pow {
|
||||
|
|
|
|||
|
|
@ -8,30 +8,42 @@ const math = std.math;
|
|||
const assert = std.debug.assert;
|
||||
const testing = std.testing;
|
||||
|
||||
const UnsignedError = error{Overflow};
|
||||
const SignedError = error{
|
||||
Overflow,
|
||||
Underflow,
|
||||
DivisionByZero,
|
||||
};
|
||||
|
||||
/// Returns the power of x raised by the integer y (x^y).
|
||||
///
|
||||
/// Errors:
|
||||
/// - Overflow: Integer overflow or Infinity
|
||||
/// - Overflow: Integer overflow
|
||||
/// - Underflow: Absolute value of result smaller than 1
|
||||
/// - DivisionByZero: Undefined power.
|
||||
///
|
||||
/// Edge case rules ordered by precedence:
|
||||
/// - powi(T, x, 0) = 1 unless T is i1, i0, u0
|
||||
/// - powi(T, 0, x) = 0 when x > 0
|
||||
/// - powi(T, 0, x) = Overflow
|
||||
/// - powi(T, 0, x) = DivisionByZero
|
||||
/// - powi(T, 1, y) = 1
|
||||
/// - powi(T, -1, y) = -1 for y an odd integer
|
||||
/// - powi(T, -1, y) = 1 unless T is i1, i0, u0
|
||||
/// - powi(T, -1, y) = Overflow
|
||||
/// - powi(T, x, y) = Overflow when y >= @bitSizeOf(x)
|
||||
/// - powi(T, x, y) = Underflow when y < 0
|
||||
pub fn powi(comptime T: type, x: T, y: T) (error{
|
||||
Overflow,
|
||||
Underflow,
|
||||
}!T) {
|
||||
const bit_size = @typeInfo(T).int.bits;
|
||||
pub fn powi(comptime T: type, x: T, y: T) (if (@typeInfo(T) == .int and @typeInfo(T).int.signedness == .unsigned)
|
||||
UnsignedError
|
||||
else
|
||||
SignedError)!T {
|
||||
const info = @typeInfo(T);
|
||||
if (info != .int and info != .comptime_int)
|
||||
@compileError("powi not implemented for " ++ @typeName(T));
|
||||
|
||||
const is_unsigned = info == .int and info.int.signedness == .unsigned;
|
||||
|
||||
// `y & 1 == 0` won't compile when `does_one_overflow`.
|
||||
const does_one_overflow = math.maxInt(T) < 1;
|
||||
const does_one_overflow = info == .int and math.maxInt(T) < 1;
|
||||
const is_y_even = !does_one_overflow and y & 1 == 0;
|
||||
|
||||
if (x == 1 or y == 0 or (x == -1 and is_y_even)) {
|
||||
|
|
@ -50,15 +62,17 @@ pub fn powi(comptime T: type, x: T, y: T) (error{
|
|||
if (y > 0) {
|
||||
return 0;
|
||||
} else {
|
||||
// Infinity/NaN, not overflow in strict sense
|
||||
return error.Overflow;
|
||||
if (is_unsigned) unreachable;
|
||||
return error.DivisionByZero;
|
||||
}
|
||||
}
|
||||
|
||||
// x >= 2 or x <= -2 from this point
|
||||
if (y >= bit_size) {
|
||||
if (info == .int and y >= info.int.bits) {
|
||||
return error.Overflow;
|
||||
}
|
||||
if (y < 0) {
|
||||
if (is_unsigned) unreachable;
|
||||
return error.Underflow;
|
||||
}
|
||||
|
||||
|
|
@ -71,27 +85,32 @@ pub fn powi(comptime T: type, x: T, y: T) (error{
|
|||
|
||||
while (exp > 1) {
|
||||
if (exp & 1 == 1) {
|
||||
const ov = @mulWithOverflow(acc, base);
|
||||
if (ov[1] != 0) return error.Overflow;
|
||||
acc = ov[0];
|
||||
acc = try mul(T, acc, base);
|
||||
}
|
||||
|
||||
exp >>= 1;
|
||||
|
||||
const ov = @mulWithOverflow(base, base);
|
||||
if (ov[1] != 0) return error.Overflow;
|
||||
base = ov[0];
|
||||
base = try mul(T, base, base);
|
||||
}
|
||||
|
||||
if (exp == 1) {
|
||||
const ov = @mulWithOverflow(acc, base);
|
||||
if (ov[1] != 0) return error.Overflow;
|
||||
acc = ov[0];
|
||||
acc = try mul(T, acc, base);
|
||||
}
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
inline fn mul(comptime T: type, x: T, y: T) error{Overflow}!T {
|
||||
return switch (@typeInfo(T)) {
|
||||
.int => {
|
||||
const prod, const overflow = @mulWithOverflow(x, y);
|
||||
return if (overflow != 0) error.Overflow else prod;
|
||||
},
|
||||
.comptime_int => x * y,
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
test powi {
|
||||
try testing.expectError(error.Overflow, powi(i8, -66, 6));
|
||||
try testing.expectError(error.Overflow, powi(i16, -13, 13));
|
||||
|
|
@ -106,6 +125,8 @@ test powi {
|
|||
try testing.expect((try powi(i64, -36, 6)) == 2176782336);
|
||||
try testing.expect((try powi(i17, -2, 15)) == -32768);
|
||||
try testing.expect((try powi(i42, -5, 7)) == -78125);
|
||||
try testing.expect((try powi(comptime_int, -12345, 11)) == -1014850422703912515858714960329315071728515625);
|
||||
try comptime testing.expect((try powi(comptime_int, 13, 5)) == 371293);
|
||||
|
||||
try testing.expect((try powi(u8, 6, 2)) == 36);
|
||||
try testing.expect((try powi(u16, 5, 4)) == 625);
|
||||
|
|
@ -113,6 +134,8 @@ test powi {
|
|||
try testing.expect((try powi(u64, 34, 2)) == 1156);
|
||||
try testing.expect((try powi(u17, 16, 3)) == 4096);
|
||||
try testing.expect((try powi(u42, 34, 6)) == 1544804416);
|
||||
try testing.expect((try powi(comptime_int, 54321, 9)) == 4118222497610732111054528594901610509007281);
|
||||
try comptime testing.expect((try powi(comptime_int, 51, 3)) == 132651);
|
||||
|
||||
try testing.expectError(error.Overflow, powi(i8, 120, 7));
|
||||
try testing.expectError(error.Overflow, powi(i16, 73, 15));
|
||||
|
|
@ -157,6 +180,8 @@ test "powi.special" {
|
|||
try testing.expect((try powi(i64, -1, 6)) == 1);
|
||||
try testing.expect((try powi(i17, -1, 15)) == -1);
|
||||
try testing.expect((try powi(i42, -1, 7)) == -1);
|
||||
try testing.expect((try powi(comptime_int, -1, 5)) == -1);
|
||||
try comptime testing.expect((try powi(comptime_int, -1, 3)) == -1);
|
||||
|
||||
try testing.expect((try powi(u8, 1, 2)) == 1);
|
||||
try testing.expect((try powi(u16, 1, 4)) == 1);
|
||||
|
|
@ -185,6 +210,8 @@ test "powi.special" {
|
|||
try testing.expect((try powi(u64, 34, 0)) == 1);
|
||||
try testing.expect((try powi(u17, 16, 0)) == 1);
|
||||
try testing.expect((try powi(u42, 34, 0)) == 1);
|
||||
try testing.expect((try powi(comptime_int, 41, 0)) == 1);
|
||||
try comptime testing.expect((try powi(comptime_int, 43, 0)) == 1);
|
||||
}
|
||||
|
||||
test "powi.narrow" {
|
||||
|
|
@ -192,6 +219,6 @@ test "powi.narrow" {
|
|||
try testing.expectError(error.Overflow, powi(i0, 0, 0));
|
||||
try testing.expectError(error.Overflow, powi(i1, 0, 0));
|
||||
try testing.expectError(error.Overflow, powi(i1, -1, 0));
|
||||
try testing.expectError(error.Overflow, powi(i1, 0, -1));
|
||||
try testing.expectError(error.DivisionByZero, powi(i1, 0, -1));
|
||||
try testing.expect((try powi(i1, -1, -1)) == -1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,6 +43,9 @@ fn testFloats(comptime Type: type) !void {
|
|||
try expect(!signbit(@as(Type, 1.0)));
|
||||
try expect(signbit(@as(Type, -2.0)));
|
||||
try expect(signbit(@as(Type, -0.0)));
|
||||
|
||||
if (Type == comptime_float) return;
|
||||
|
||||
try expect(!signbit(math.inf(Type)));
|
||||
try expect(signbit(-math.inf(Type)));
|
||||
try expect(!signbit(math.nan(Type)));
|
||||
|
|
|
|||
|
|
@ -290,19 +290,16 @@ pub inline fn expectApproxEqAbs(expected: anytype, actual: anytype, tolerance: a
|
|||
|
||||
fn expectApproxEqAbsInner(comptime T: type, expected: T, actual: T, tolerance: T) !void {
|
||||
switch (@typeInfo(T)) {
|
||||
.float => if (!math.approxEqAbs(T, expected, actual, tolerance)) {
|
||||
.float, .comptime_float => if (!math.approxEqAbs(T, expected, actual, tolerance)) {
|
||||
print("actual {}, not within absolute tolerance {} of expected {}\n", .{ actual, tolerance, expected });
|
||||
return error.TestExpectedApproxEqAbs;
|
||||
},
|
||||
|
||||
.comptime_float => @compileError("Cannot approximately compare two comptime_float values"),
|
||||
|
||||
else => @compileError("Unable to compare non floating point values"),
|
||||
}
|
||||
}
|
||||
|
||||
test expectApproxEqAbs {
|
||||
inline for ([_]type{ f16, f32, f64, f128 }) |T| {
|
||||
inline for ([_]type{ f16, f32, f64, f128, comptime_float }) |T| {
|
||||
const pos_x: T = 12.0;
|
||||
const pos_y: T = 12.06;
|
||||
const neg_x: T = -12.0;
|
||||
|
|
@ -326,19 +323,16 @@ pub inline fn expectApproxEqRel(expected: anytype, actual: anytype, tolerance: a
|
|||
|
||||
fn expectApproxEqRelInner(comptime T: type, expected: T, actual: T, tolerance: T) !void {
|
||||
switch (@typeInfo(T)) {
|
||||
.float => if (!math.approxEqRel(T, expected, actual, tolerance)) {
|
||||
.float, .comptime_float => if (!math.approxEqRel(T, expected, actual, tolerance)) {
|
||||
print("actual {}, not within relative tolerance {} of expected {}\n", .{ actual, tolerance, expected });
|
||||
return error.TestExpectedApproxEqRel;
|
||||
},
|
||||
|
||||
.comptime_float => @compileError("Cannot approximately compare two comptime_float values"),
|
||||
|
||||
else => @compileError("Unable to compare non floating point values"),
|
||||
}
|
||||
}
|
||||
|
||||
test expectApproxEqRel {
|
||||
inline for ([_]type{ f16, f32, f64, f128 }) |T| {
|
||||
inline for ([_]type{ f16, f32, f64, f128, comptime_float }) |T| {
|
||||
const eps_value = comptime math.floatEps(T);
|
||||
const sqrt_eps_value = comptime @sqrt(eps_value);
|
||||
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
|
|||
try testArgs(u1025, 0x1dea81169800bac2f3afcf3be5dbd2d8eefbace8a24a2da0a383a928d1109459f34028be4413119f1af00ad90ce4d63064016dc1cee5b783c79c1998a0a49de21c4db71d432273576503589fc966c7ec2d730fa9bc4c5ff3128a82653ab8149528de67804718e39722f89b91c75d012ea41c642c889f0db95c882a9790a5e922f, 0x156fe02946ab9069a644dcc1f2b1afa04ee88ab1de19575a2715abf4a52bf374d297fdf78455ccdb87a934d3d818d774b63865eaedfdad3c56a56b8fcc62703c391aedf16cf770af06d7d205f93778c012df54fe5290084e1cd2bbec86a2f295cdce69a2cd774e064580f3c9cfae60d17b12f610e86566e68d5183d706c8ad8af);
|
||||
}
|
||||
fn testFloats() !void {
|
||||
@setEvalBranchQuota(21_700);
|
||||
@setEvalBranchQuota(25_000);
|
||||
|
||||
try testArgs(f16, -nan(f16), -nan(f16));
|
||||
try testArgs(f16, -nan(f16), -inf(f16));
|
||||
|
|
@ -4646,7 +4646,7 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
|
|||
});
|
||||
}
|
||||
fn testFloatVectors() !void {
|
||||
@setEvalBranchQuota(21_700);
|
||||
@setEvalBranchQuota(25_000);
|
||||
|
||||
try testArgs(@Vector(1, f16), .{
|
||||
-tmin(f16),
|
||||
|
|
|
|||
|
|
@ -3133,7 +3133,7 @@ fn cast(comptime op: anytype, comptime opts: struct { compare: Compare = .relaxe
|
|||
try testArgs(i1024, u1025, 1 << 1024);
|
||||
}
|
||||
fn testFloats() !void {
|
||||
@setEvalBranchQuota(3_100);
|
||||
@setEvalBranchQuota(3_500);
|
||||
|
||||
try testArgs(f16, f16, -nan(f16));
|
||||
try testArgs(f16, f16, -inf(f16));
|
||||
|
|
@ -6387,7 +6387,7 @@ fn cast(comptime op: anytype, comptime opts: struct { compare: Compare = .relaxe
|
|||
try testArgs(@Vector(3, i1024), @Vector(3, u1025), .{ 0, 1, 1 << 1024 });
|
||||
}
|
||||
fn testFloatVectors() !void {
|
||||
@setEvalBranchQuota(6_700);
|
||||
@setEvalBranchQuota(7_500);
|
||||
|
||||
try testArgs(@Vector(1, f16), @Vector(1, f16), .{
|
||||
1e0,
|
||||
|
|
@ -6890,7 +6890,7 @@ fn cast(comptime op: anytype, comptime opts: struct { compare: Compare = .relaxe
|
|||
});
|
||||
}
|
||||
fn testIntsFromFloats() !void {
|
||||
@setEvalBranchQuota(2_600);
|
||||
@setEvalBranchQuota(2_700);
|
||||
|
||||
try testArgs(i8, f16, -0x0.8p8);
|
||||
try testArgs(i8, f16, next(f16, -0x0.8p8, -0.0));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue