mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
improve impl of __sqrth, sqrtf, sqrt, __sqrtx and sqrtq (#25416)
The previous version (ported from musl) used bit-by-bit calculations and was slow, but the current version (also ported from musl) uses lookup tables combined with Goldschmidt iterations to significantly improve the speed.
This commit is contained in:
parent
851ae9bb43
commit
9bfd4c3017
1 changed files with 679 additions and 247 deletions
|
|
@ -1,3 +1,10 @@
|
|||
//! Ported from musl, which is MIT licensed.
|
||||
//! https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
|
||||
//!
|
||||
//! https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
|
||||
//! https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
|
||||
//! https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
|
||||
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const arch = builtin.cpu.arch;
|
||||
|
|
@ -21,227 +28,470 @@ comptime {
|
|||
}
|
||||
|
||||
pub fn __sqrth(x: f16) callconv(.c) f16 {
|
||||
// TODO: more efficient implementation
|
||||
return @floatCast(sqrtf(x));
|
||||
var ix: u16 = @bitCast(x);
|
||||
var top = ix >> 10;
|
||||
|
||||
// special case handling.
|
||||
if (top -% 0x01 >= 0x1F - 0x01) {
|
||||
@branchHint(.unlikely);
|
||||
// x < 0x1p-14 or inf or nan.
|
||||
if (ix & 0x7FFF == 0) return x;
|
||||
if (ix == 0x7C00) return x;
|
||||
if (ix > 0x7C00) return math.nan(f16);
|
||||
// x is subnormal, normalize it.
|
||||
ix = @bitCast(x * 0x1p10);
|
||||
top = (ix >> 10) -% 10;
|
||||
}
|
||||
|
||||
// argument reduction:
|
||||
// x = 4^e m; with integer e, and m in [1, 4)
|
||||
// m: fixed point representation [2.14]
|
||||
// 2^e is the exponent part of the result.
|
||||
const even = (top & 1) != 0;
|
||||
const m = if (even) (ix << 4) & 0x7FFF else (ix << 5) | 0x8000;
|
||||
top = (top +% 0x0F) >> 1;
|
||||
|
||||
// approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
|
||||
// the fixed point representations are
|
||||
// m: 2.14 r: 0.16, s: 2.14, d: 2.14, u: 2.14, three: 2.14
|
||||
const three: u16 = 0xC000;
|
||||
const i: usize = @intCast((ix >> 4) & 0x7F);
|
||||
const r = __rsqrt_tab[i];
|
||||
// |r*sqrt(m) - 1| < 0x1p-8
|
||||
var s = mul16(m, r);
|
||||
// |s/sqrt(m) - 1| < 0x1p-8
|
||||
const d = mul16(s, r);
|
||||
const u = three - d;
|
||||
s = mul16(s, u); // repr: 3.13
|
||||
// -0x1.20p-13 < s/sqrt(m) - 1 < 0x7Dp-16
|
||||
s = (s - 1) >> 3; // repr: 6.10
|
||||
// s < sqrt(m) < s + 0x1.24p-10
|
||||
|
||||
// compute nearest rounded result:
|
||||
// the nearest result to 10 bits is either s or s+0x1p-10,
|
||||
// we can decide by comparing (2^10 s + 0.5)^2 to 2^20 m.
|
||||
const d0 = (m << 6) -% s *% s;
|
||||
const d1 = s -% d0;
|
||||
const d2 = d1 +% s +% 1;
|
||||
s += d1 >> 15;
|
||||
s &= 0x03FF;
|
||||
s |= top << 10;
|
||||
const y: f16 = @bitCast(s);
|
||||
|
||||
// handle rounding modes and inexact exception:
|
||||
// only (s+1)^2 == 2^6 m case is exact otherwise
|
||||
// add a tiny value to cause the fenv effects.
|
||||
if (d2 != 0) {
|
||||
@branchHint(.likely);
|
||||
var tiny: u16 = 0x0001;
|
||||
tiny |= (d1 ^ d2) & 0x8000;
|
||||
const t: f16 = @bitCast(tiny);
|
||||
return y + t;
|
||||
}
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
pub fn sqrtf(x: f32) callconv(.c) f32 {
|
||||
const tiny: f32 = 1.0e-30;
|
||||
const sign: i32 = @bitCast(@as(u32, 0x80000000));
|
||||
var ix: i32 = @bitCast(x);
|
||||
var ix: u32 = @bitCast(x);
|
||||
var top = ix >> 23;
|
||||
|
||||
if ((ix & 0x7F800000) == 0x7F800000) {
|
||||
return x * x + x; // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
|
||||
// special case handling.
|
||||
if (top -% 0x01 >= 0xFF - 0x01) {
|
||||
@branchHint(.unlikely);
|
||||
// x < 0x1p-126 or inf or nan.
|
||||
if (ix & 0x7FFF_FFFF == 0) return x;
|
||||
if (ix == 0x7F80_0000) return x;
|
||||
if (ix > 0x7F80_0000) return math.nan(f32);
|
||||
// x is subnormal, normalize it.
|
||||
ix = @bitCast(x * 0x1p23);
|
||||
top = (ix >> 23) -% 23;
|
||||
}
|
||||
|
||||
// zero
|
||||
if (ix <= 0) {
|
||||
if (ix & ~sign == 0) {
|
||||
return x; // sqrt (+-0) = +-0
|
||||
}
|
||||
if (ix < 0) {
|
||||
return math.nan(f32);
|
||||
}
|
||||
// argument reduction:
|
||||
// x = 4^e m; with integer e, and m in [1, 4)
|
||||
// m: fixed point representation [2.30]
|
||||
// 2^e is the exponent part of the result.
|
||||
const even = (top & 1) != 0;
|
||||
const m = if (even) (ix << 7) & 0x7FFF_FFFF else (ix << 8) | 0x8000_0000;
|
||||
top = (top +% 0x7F) >> 1;
|
||||
|
||||
// approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
|
||||
// the fixed point representations are
|
||||
// m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
|
||||
const three: u32 = 0xC000_0000;
|
||||
var i: usize = @intCast((ix >> 17) & 0x3F);
|
||||
if (even) i += 64;
|
||||
var r = @as(u32, @intCast(__rsqrt_tab[i])) << 16;
|
||||
// |r*sqrt(m) - 1| < 0x1p-8
|
||||
var s = mul32(m, r);
|
||||
// |s/sqrt(m) - 1| < 0x1p-8
|
||||
var d = mul32(s, r);
|
||||
var u = three - d;
|
||||
r = mul32(r, u) << 1;
|
||||
// |r*sqrt(m) - 1| < 0x1.7bp-16
|
||||
s = mul32(s, u) << 1;
|
||||
// |s/sqrt(m) - 1| < 0x1.7bp-16
|
||||
d = mul32(s, r);
|
||||
u = three - d;
|
||||
s = mul32(s, u); // repr: 3.29
|
||||
// -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31
|
||||
s = (s - 1) >> 6; // repr: 9.23
|
||||
// s < sqrt(m) < s + 0x1.08p-23
|
||||
|
||||
// compute nearest rounded result:
|
||||
// the nearest result to 23 bits is either s or s+0x1p-23,
|
||||
// we can decide by comparing (2^23 s + 0.5)^2 to 2^46 m.
|
||||
const d0 = (m << 16) -% s *% s;
|
||||
const d1 = s -% d0;
|
||||
const d2 = d1 +% s +% 1;
|
||||
s += d1 >> 31;
|
||||
s &= 0x007F_FFFF;
|
||||
s |= top << 23;
|
||||
const y: f32 = @bitCast(s);
|
||||
|
||||
// handle rounding modes and inexact exception:
|
||||
// only (s+1)^2 == 2^16 m case is exact otherwise
|
||||
// add a tiny value to cause the fenv effects.
|
||||
if (d2 != 0) {
|
||||
@branchHint(.likely);
|
||||
var tiny: u32 = 0x0100_0000;
|
||||
tiny |= (d1 ^ d2) & 0x8000_0000;
|
||||
const t: f32 = @bitCast(tiny);
|
||||
return y + t;
|
||||
}
|
||||
|
||||
// normalize
|
||||
var m = ix >> 23;
|
||||
if (m == 0) {
|
||||
// subnormal
|
||||
var i: i32 = 0;
|
||||
while (ix & 0x00800000 == 0) : (i += 1) {
|
||||
ix <<= 1;
|
||||
}
|
||||
m -= i - 1;
|
||||
return y;
|
||||
}
|
||||
|
||||
m -= 127; // unbias exponent
|
||||
ix = (ix & 0x007FFFFF) | 0x00800000;
|
||||
|
||||
if (m & 1 != 0) { // odd m, double x to even
|
||||
ix += ix;
|
||||
}
|
||||
|
||||
m >>= 1; // m = [m / 2]
|
||||
|
||||
// sqrt(x) bit by bit
|
||||
ix += ix;
|
||||
var q: i32 = 0; // q = sqrt(x)
|
||||
var s: i32 = 0;
|
||||
var r: i32 = 0x01000000; // r = moving bit right -> left
|
||||
|
||||
while (r != 0) {
|
||||
const t = s + r;
|
||||
if (t <= ix) {
|
||||
s = t + r;
|
||||
ix -= t;
|
||||
q += r;
|
||||
}
|
||||
ix += ix;
|
||||
r >>= 1;
|
||||
}
|
||||
|
||||
// floating add to find rounding direction
|
||||
if (ix != 0) {
|
||||
var z = 1.0 - tiny; // inexact
|
||||
if (z >= 1.0) {
|
||||
z = 1.0 + tiny;
|
||||
if (z > 1.0) {
|
||||
q += 2;
|
||||
} else {
|
||||
if (q & 1 != 0) {
|
||||
q += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ix = (q >> 1) + 0x3f000000;
|
||||
ix += m << 23;
|
||||
return @bitCast(ix);
|
||||
}
|
||||
|
||||
/// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
|
||||
/// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
|
||||
/// potentially some edge cases remaining that are not handled in the same way.
|
||||
pub fn sqrt(x: f64) callconv(.c) f64 {
|
||||
const tiny: f64 = 1.0e-300;
|
||||
const sign: u32 = 0x80000000;
|
||||
const u: u64 = @bitCast(x);
|
||||
var ix: u64 = @bitCast(x);
|
||||
var top = ix >> 52;
|
||||
|
||||
var ix0: u32 = @intCast(u >> 32);
|
||||
var ix1: u32 = @intCast(u & 0xFFFFFFFF);
|
||||
|
||||
// sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
|
||||
if (ix0 & 0x7FF00000 == 0x7FF00000) {
|
||||
return x * x + x;
|
||||
// special case handling.
|
||||
if (top -% 0x001 >= 0x7FF - 0x001) {
|
||||
@branchHint(.unlikely);
|
||||
// x < 0x1p-1022 or inf or nan.
|
||||
if (ix & 0x7FFF_FFFF_FFFF_FFFF == 0) return x;
|
||||
if (ix == 0x7FF0_0000_0000_0000) return x;
|
||||
if (ix > 0x7FF0_0000_0000_0000) return math.nan(f64);
|
||||
// x is subnormal, normalize it.
|
||||
ix = @bitCast(x * 0x1p52);
|
||||
top = (ix >> 52) -% 52;
|
||||
}
|
||||
|
||||
// sqrt(+-0) = +-0
|
||||
if (x == 0.0) {
|
||||
return x;
|
||||
}
|
||||
// sqrt(-ve) = nan
|
||||
if (ix0 & sign != 0) {
|
||||
return math.nan(f64);
|
||||
// argument reduction:
|
||||
// x = 4^e m; with integer e, and m in [1, 4)
|
||||
// m: fixed point representation [2.62]
|
||||
// 2^e is the exponent part of the result.
|
||||
const even = (top & 1) != 0;
|
||||
const m = if (even) (ix << 10) & 0x7FFF_FFFF_FFFF_FFFF else (ix << 11) | 0x8000_0000_0000_0000;
|
||||
top = (top +% 0x3FF) >> 1;
|
||||
|
||||
// approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
|
||||
//
|
||||
// initial estimate:
|
||||
// 7bit table lookup (1bit exponent and 6bit significand).
|
||||
//
|
||||
// iterative approximation:
|
||||
// using 2 goldschmidt iterations with 32bit int arithmetics
|
||||
// and a final iteration with 64bit int arithmetics.
|
||||
//
|
||||
// details:
|
||||
//
|
||||
// the relative error (e = r0 sqrt(m)-1) of a linear estimate
|
||||
// (r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best,
|
||||
// a table lookup is faster and needs one less iteration
|
||||
// 6 bit lookup table (128b) gives |e| < 0x1.f9p-8
|
||||
// 7 bit lookup table (256b) gives |e| < 0x1.fdp-9
|
||||
// for single and double prec 6bit is enough but for quad
|
||||
// prec 7bit is needed (or modified iterations). to avoid
|
||||
// one more iteration >=13bit table would be needed (16k).
|
||||
//
|
||||
// a newton-raphson iteration for r is
|
||||
// w = r*r
|
||||
// u = 3 - m*w
|
||||
// r = r*u/2
|
||||
// can use a goldschmidt iteration for s at the end or
|
||||
// s = m*r
|
||||
//
|
||||
// first goldschmidt iteration is
|
||||
// s = m*r
|
||||
// u = 3 - s*r
|
||||
// r = r*u/2
|
||||
// s = s*u/2
|
||||
// next goldschmidt iteration is
|
||||
// u = 3 - s*r
|
||||
// r = r*u/2
|
||||
// s = s*u/2
|
||||
// and at the end r is not computed only s.
|
||||
//
|
||||
// they use the same amount of operations and converge at the
|
||||
// same quadratic rate, i.e. if
|
||||
// r1 sqrt(m) - 1 = e, then
|
||||
// r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3
|
||||
// the advantage of goldschmidt is that the mul for s and r
|
||||
// are independent (computed in parallel), however it is not
|
||||
// "self synchronizing": it only uses the input m in the
|
||||
// first iteration so rounding errors accumulate. at the end
|
||||
// or when switching to larger precision arithmetics rounding
|
||||
// errors dominate so the first iteration should be used.
|
||||
//
|
||||
// the fixed point representations are
|
||||
// m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
|
||||
// and after switching to 64 bit
|
||||
// m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62
|
||||
const three: struct { u32, u64 } = .{
|
||||
0xC000_0000,
|
||||
0xC000_0000_0000_0000,
|
||||
};
|
||||
var r: struct { u32, u64 } = undefined;
|
||||
var s: struct { u32, u64 } = undefined;
|
||||
var d: struct { u32, u64 } = undefined;
|
||||
var u: struct { u32, u64 } = undefined;
|
||||
const i: usize = @intCast((ix >> 46) & 0x7F);
|
||||
r[0] = @intCast(__rsqrt_tab[i]);
|
||||
r[0] <<= 16;
|
||||
// |r sqrt(m) - 1| < 0x1.fdp-9
|
||||
s[0] = mul32(@intCast(m >> 32), r[0]);
|
||||
// |s/sqrt(m) - 1| < 0x1.fdp-9
|
||||
d[0] = mul32(s[0], r[0]);
|
||||
u[0] = three[0] - d[0];
|
||||
r[0] = mul32(r[0], u[0]) << 1;
|
||||
// |r sqrt(m) - 1| < 0x1.7bp-16
|
||||
s[0] = mul32(s[0], u[0]) << 1;
|
||||
// |s/sqrt(m) - 1| < 0x1.7bp-16
|
||||
d[0] = mul32(s[0], r[0]);
|
||||
u[0] = three[0] - d[0];
|
||||
r[0] = mul32(r[0], u[0]) << 1;
|
||||
// |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case)
|
||||
r[1] = @intCast(r[0]);
|
||||
r[1] <<= 32;
|
||||
s[1] = mul64(m, r[1]);
|
||||
d[1] = mul64(s[1], r[1]);
|
||||
u[1] = three[1] - d[1];
|
||||
s[1] = mul64(s[1], u[1]); // repr: 3.61
|
||||
// -0x1p-57 < s - sqrt(m) < 0x1.8001p-61
|
||||
s[1] = (s[1] - 2) >> 9; // repr: 12.52
|
||||
// -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63
|
||||
|
||||
// s < sqrt(m) < s + 0x1.09p-52
|
||||
// compute nearest rounded result:
|
||||
// the nearest result to 52 bits is either s or s+0x1p-52,
|
||||
// we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m.
|
||||
const d0 = (m << 42) -% s[1] *% s[1];
|
||||
const d1 = s[1] -% d0;
|
||||
const d2 = d1 +% s[1] +% 1;
|
||||
s[1] += d1 >> 63;
|
||||
s[1] &= 0x000F_FFFF_FFFF_FFFF;
|
||||
s[1] |= top << 52;
|
||||
const y: f64 = @bitCast(s[1]);
|
||||
|
||||
// handle rounding modes and inexact exception:
|
||||
// only (s+1)^2 == 2^42 m case is exact otherwise
|
||||
// add a tiny value to cause the fenv effects.
|
||||
if (d2 != 0) {
|
||||
@branchHint(.likely);
|
||||
var tiny: u64 = 0x0010_0000_0000_0000;
|
||||
tiny |= (d1 ^ d2) & 0x8000_0000_0000_0000;
|
||||
const t: f64 = @bitCast(tiny);
|
||||
return y + t;
|
||||
}
|
||||
|
||||
// normalize x
|
||||
var m: i32 = @intCast(ix0 >> 20);
|
||||
if (m == 0) {
|
||||
// subnormal
|
||||
while (ix0 == 0) {
|
||||
m -= 21;
|
||||
ix0 |= ix1 >> 11;
|
||||
ix1 <<= 21;
|
||||
}
|
||||
|
||||
// subnormal
|
||||
var i: u32 = 0;
|
||||
while (ix0 & 0x00100000 == 0) : (i += 1) {
|
||||
ix0 <<= 1;
|
||||
}
|
||||
m -= @as(i32, @intCast(i)) - 1;
|
||||
ix0 |= ix1 >> @intCast(32 - i);
|
||||
ix1 <<= @intCast(i);
|
||||
}
|
||||
|
||||
// unbias exponent
|
||||
m -= 1023;
|
||||
ix0 = (ix0 & 0x000FFFFF) | 0x00100000;
|
||||
if (m & 1 != 0) {
|
||||
ix0 += ix0 + (ix1 >> 31);
|
||||
ix1 = ix1 +% ix1;
|
||||
}
|
||||
m >>= 1;
|
||||
|
||||
// sqrt(x) bit by bit
|
||||
ix0 += ix0 + (ix1 >> 31);
|
||||
ix1 = ix1 +% ix1;
|
||||
|
||||
var q: u32 = 0;
|
||||
var q1: u32 = 0;
|
||||
var s0: u32 = 0;
|
||||
var s1: u32 = 0;
|
||||
var r: u32 = 0x00200000;
|
||||
var t: u32 = undefined;
|
||||
var t1: u32 = undefined;
|
||||
|
||||
while (r != 0) {
|
||||
t = s0 +% r;
|
||||
if (t <= ix0) {
|
||||
s0 = t + r;
|
||||
ix0 -= t;
|
||||
q += r;
|
||||
}
|
||||
ix0 = ix0 +% ix0 +% (ix1 >> 31);
|
||||
ix1 = ix1 +% ix1;
|
||||
r >>= 1;
|
||||
}
|
||||
|
||||
r = sign;
|
||||
while (r != 0) {
|
||||
t1 = s1 +% r;
|
||||
t = s0;
|
||||
if (t < ix0 or (t == ix0 and t1 <= ix1)) {
|
||||
s1 = t1 +% r;
|
||||
if (t1 & sign == sign and s1 & sign == 0) {
|
||||
s0 += 1;
|
||||
}
|
||||
ix0 -= t;
|
||||
if (ix1 < t1) {
|
||||
ix0 -= 1;
|
||||
}
|
||||
ix1 = ix1 -% t1;
|
||||
q1 += r;
|
||||
}
|
||||
ix0 = ix0 +% ix0 +% (ix1 >> 31);
|
||||
ix1 = ix1 +% ix1;
|
||||
r >>= 1;
|
||||
}
|
||||
|
||||
// rounding direction
|
||||
if (ix0 | ix1 != 0) {
|
||||
var z = 1.0 - tiny; // raise inexact
|
||||
if (z >= 1.0) {
|
||||
z = 1.0 + tiny;
|
||||
if (q1 == 0xFFFFFFFF) {
|
||||
q1 = 0;
|
||||
q += 1;
|
||||
} else if (z > 1.0) {
|
||||
if (q1 == 0xFFFFFFFE) {
|
||||
q += 1;
|
||||
}
|
||||
q1 += 2;
|
||||
} else {
|
||||
q1 += q1 & 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ix0 = (q >> 1) + 0x3FE00000;
|
||||
ix1 = q1 >> 1;
|
||||
if (q & 1 != 0) {
|
||||
ix1 |= 0x80000000;
|
||||
}
|
||||
|
||||
// NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same
|
||||
// behaviour at least.
|
||||
var iix0: i32 = @intCast(ix0);
|
||||
iix0 = iix0 +% (m << 20);
|
||||
|
||||
const uz = (@as(u64, @intCast(iix0)) << 32) | ix1;
|
||||
return @bitCast(uz);
|
||||
return y;
|
||||
}
|
||||
|
||||
pub fn __sqrtx(x: f80) callconv(.c) f80 {
|
||||
// TODO: more efficient implementation
|
||||
return @floatCast(sqrtq(x));
|
||||
var ix: u80 = @bitCast(x);
|
||||
var top = ix >> 64;
|
||||
|
||||
// special case handling.
|
||||
if (top -% 0x0001 >= 0x7FFF - 0x0001) {
|
||||
@branchHint(.unlikely);
|
||||
// x < 0x1p-16382 or inf or nan.
|
||||
if (ix & 0x7FFF_FFFF_FFFF_FFFF_FFFF == 0) return x;
|
||||
if (ix == 0x7FFF_8000_0000_0000_0000) return x;
|
||||
if (ix > 0x7FFF_8000_0000_0000_0000) return math.nan(f80);
|
||||
// x is subnormal, normalize it.
|
||||
ix = @bitCast(x * 0x1p63);
|
||||
top = (ix >> 64) -% 63;
|
||||
}
|
||||
|
||||
// argument reduction:
|
||||
// x = 4^e m; with integer e, and m in [1, 4)
|
||||
// m: fixed point representation [2.78]
|
||||
// 2^e is the exponent part of the result.
|
||||
const even = (top & 1) != 0;
|
||||
const m = if (even) (ix << 15) & 0x7FFF_FFFF_FFFF_FFFF_FFFF else ix << 16;
|
||||
top = (top +% 0x3FFF) >> 1;
|
||||
|
||||
// approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
|
||||
// the fixed point representations are
|
||||
// m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
|
||||
// and after switching to 64 bit
|
||||
// m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62
|
||||
// and after switching to 80 bit
|
||||
// m: 2.78 r: 0.80, s: 2.78, d: 2.78, u: 2.78, three: 2.78
|
||||
const three: struct { u32, u64, u80 } = .{
|
||||
0xC000_0000,
|
||||
0xC000_0000_0000_0000,
|
||||
0xC000_0000_0000_0000_0000,
|
||||
};
|
||||
var r: struct { u32, u64, u80 } = undefined;
|
||||
var s: struct { u32, u64, u80 } = undefined;
|
||||
var d: struct { u32, u64, u80 } = undefined;
|
||||
var u: struct { u32, u64, u80 } = undefined;
|
||||
var i: usize = @intCast((ix >> 57) & 0x3F);
|
||||
if (even) i += 64;
|
||||
r[0] = @intCast(__rsqrt_tab[i]);
|
||||
r[0] <<= 16;
|
||||
// |r sqrt(m) - 1| < 0x1p-8
|
||||
s[0] = mul32(@intCast(m >> 48), r[0]);
|
||||
d[0] = mul32(s[0], r[0]);
|
||||
u[0] = three[0] - d[0];
|
||||
r[0] = mul32(u[0], r[0]) << 1;
|
||||
// |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit
|
||||
r[1] = @intCast(r[0]);
|
||||
r[1] <<= 32;
|
||||
s[1] = mul64(@intCast(m >> 16), r[1]);
|
||||
d[1] = mul64(s[1], r[1]);
|
||||
u[1] = three[1] - d[1];
|
||||
r[1] = mul64(u[1], r[1]) << 1;
|
||||
// |r sqrt(m) - 1| < 0x1.a5p-31
|
||||
s[1] = mul64(u[1], s[1]) << 1;
|
||||
d[1] = mul64(s[1], r[1]);
|
||||
u[1] = three[1] - d[1];
|
||||
r[1] = mul64(u[1], r[1]) << 1;
|
||||
// |r sqrt(m) - 1| < 0x1.c001p-59, switch to 80bit
|
||||
r[2] = @intCast(r[1]);
|
||||
r[2] <<= 16;
|
||||
s[2] = mul80(m, r[2]);
|
||||
d[2] = mul80(s[2], r[2]);
|
||||
u[2] = three[2] - d[2];
|
||||
s[2] = mul80(u[2], s[2]); // repr: 3.77
|
||||
s[2] = (s[2] - 4) >> 14; // repr: 17.63
|
||||
// s < sqrt(m) < s + 1 ULP + tiny
|
||||
|
||||
// compute nearest rounded result:
|
||||
// the nearest result to 63 bits is either s or s+0x1p-63,
|
||||
// we can decide by comparing (2^63 s + 0.5)^2 to 2^126 m
|
||||
const d0 = (m << 48) -% mul80_tail(s[2], s[2]);
|
||||
const d1 = s[2] -% d0;
|
||||
const d2 = d1 +% s[2] +% 1;
|
||||
s[2] += d1 >> 79;
|
||||
s[2] &= 0x0000_7FFF_FFFF_FFFF_FFFF;
|
||||
s[2] |= 0x0000_8000_0000_0000_0000;
|
||||
s[2] |= top << 64;
|
||||
const y: f80 = @bitCast(s[2]);
|
||||
|
||||
// handle rounding modes and inexact exception:
|
||||
// only (s+1)^2 == 2^48 m case is exact otherwise
|
||||
// add a tiny value to cause the fenv effects.
|
||||
if (d2 != 0) {
|
||||
@branchHint(.likely);
|
||||
var tiny: u80 = 0x0001_8000_0000_0000_0000;
|
||||
tiny |= (d1 ^ d2) & 0x8000_0000_0000_0000_0000;
|
||||
const t: f80 = @bitCast(tiny);
|
||||
return y + t;
|
||||
}
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
pub fn sqrtq(x: f128) callconv(.c) f128 {
|
||||
// TODO: more correct implementation
|
||||
return sqrt(@floatCast(x));
|
||||
var ix: u128 = @bitCast(x);
|
||||
var top = ix >> 112;
|
||||
|
||||
// special case handling.
|
||||
if (top -% 0x0001 >= 0x7FFF - 0x0001) {
|
||||
@branchHint(.unlikely);
|
||||
// x < 0x1p-16382 or inf or nan.
|
||||
if (ix & 0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF == 0) return x;
|
||||
if (ix == 0x7FFF_0000_0000_0000_0000_0000_0000_0000) return x;
|
||||
if (ix > 0x7FFF_0000_0000_0000_0000_0000_0000_0000) return math.nan(f128);
|
||||
// x is subnormal, normalize it.
|
||||
ix = @bitCast(x * 0x1p112);
|
||||
top = (ix >> 112) -% 112;
|
||||
}
|
||||
|
||||
// argument reduction:
|
||||
// x = 4^e m; with integer e, and m in [1, 4)
|
||||
// m: fixed point representation [2.126]
|
||||
// 2^e is the exponent part of the result.
|
||||
const even = (top & 1) != 0;
|
||||
const m = if (even) (ix << 14) & 0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF else (ix << 15) | 0x8000_0000_0000_0000_0000_0000_0000_0000;
|
||||
top = (top +% 0x3FFF) >> 1;
|
||||
|
||||
// approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
|
||||
// the fixed point representations are
|
||||
// m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
|
||||
// and after switching to 64 bit
|
||||
// m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62
|
||||
// and after switching to 128 bit
|
||||
// m: 2.126 r: 0.128, s: 2.126, d: 2.126, u: 2.126, three: 2.126
|
||||
const three: struct { u32, u64, u128 } = .{
|
||||
0xC000_0000,
|
||||
0xC000_0000_0000_0000,
|
||||
0xC000_0000_0000_0000_0000_0000_0000_0000,
|
||||
};
|
||||
var r: struct { u32, u64, u128 } = undefined;
|
||||
var s: struct { u32, u64, u128 } = undefined;
|
||||
var d: struct { u32, u64, u128 } = undefined;
|
||||
var u: struct { u32, u64, u128 } = undefined;
|
||||
const i: usize = @intCast((ix >> 106) & 0x7F);
|
||||
r[0] = @intCast(__rsqrt_tab[i]);
|
||||
r[0] <<= 16;
|
||||
// |r sqrt(m) - 1| < 0x1p-8
|
||||
s[0] = mul32(@intCast(m >> 96), r[0]);
|
||||
d[0] = mul32(s[0], r[0]);
|
||||
u[0] = three[0] - d[0];
|
||||
r[0] = mul32(u[0], r[0]) << 1;
|
||||
// |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit
|
||||
r[1] = @intCast(r[0]);
|
||||
r[1] <<= 32;
|
||||
s[1] = mul64(@intCast(m >> 64), r[1]);
|
||||
d[1] = mul64(s[1], r[1]);
|
||||
u[1] = three[1] - d[1];
|
||||
r[1] = mul64(u[1], r[1]) << 1;
|
||||
// |r sqrt(m) - 1| < 0x1.a5p-31
|
||||
s[1] = mul64(u[1], s[1]) << 1;
|
||||
d[1] = mul64(s[1], r[1]);
|
||||
u[1] = three[1] - d[1];
|
||||
r[1] = mul64(u[1], r[1]) << 1;
|
||||
// |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit
|
||||
r[2] = @intCast(r[1]);
|
||||
r[2] <<= 64;
|
||||
s[2] = mul128(m, r[2]);
|
||||
d[2] = mul128(s[2], r[2]);
|
||||
u[2] = three[2] - d[2];
|
||||
s[2] = mul128(u[2], s[2]); // repr: 3.125
|
||||
// -0x1p-116 < s - sqrt(m) < 0x3.8001p-125
|
||||
s[2] = (s[2] - 4) >> 13; // repr: 16.122
|
||||
// s < sqrt(m) < s + 1 ULP + tiny
|
||||
|
||||
// compute nearest rounded result:
|
||||
// the nearest result to 122 bits is either s or s+0x1p-122,
|
||||
// we can decide by comparing (2^122 s + 0.5)^2 to 2^244 m
|
||||
const d0 = (m << 98) -% s[2] *% s[2];
|
||||
const d1 = s[2] -% d0;
|
||||
const d2 = d1 +% s[2] +% 1;
|
||||
s[2] += d1 >> 127;
|
||||
s[2] &= 0x0000_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF;
|
||||
s[2] |= top << 112;
|
||||
const y: f128 = @bitCast(s[2]);
|
||||
|
||||
// handle rounding modes and inexact exception:
|
||||
// only (s+1)^2 == 2^98 m case is exact otherwise
|
||||
// add a tiny value to cause the fenv effects.
|
||||
if (d2 != 0) {
|
||||
@branchHint(.likely);
|
||||
var tiny: u128 = 0x0001_0000_0000_0000_0000_0000_0000_0000;
|
||||
tiny |= (d1 ^ d2) & 0x8000_0000_0000_0000_0000_0000_0000_0000;
|
||||
const t: f128 = @bitCast(tiny);
|
||||
return y + t;
|
||||
}
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
fn _Qp_sqrt(c: *f128, a: *f128) callconv(.c) void {
|
||||
|
|
@ -259,60 +509,242 @@ pub fn sqrtl(x: c_longdouble) callconv(.c) c_longdouble {
|
|||
}
|
||||
}
|
||||
|
||||
test "sqrtf" {
|
||||
const V = [_]f32{
|
||||
0.0,
|
||||
4.089288054930154,
|
||||
7.538757127071935,
|
||||
8.97780793672623,
|
||||
5.304443821913729,
|
||||
5.682408965311888,
|
||||
0.5846878579110049,
|
||||
3.650338664297043,
|
||||
0.3178091951800732,
|
||||
7.1505232436382835,
|
||||
3.6589165881946464,
|
||||
const __rsqrt_tab: [128]u16 = .{
|
||||
0xB451, 0xB2F0, 0xB196, 0xB044, 0xAEF9, 0xADB6, 0xAC79, 0xAB43,
|
||||
0xAA14, 0xA8EB, 0xA7C8, 0xA6AA, 0xA592, 0xA480, 0xA373, 0xA26B,
|
||||
0xA168, 0xA06A, 0x9F70, 0x9E7B, 0x9D8A, 0x9C9D, 0x9BB5, 0x9AD1,
|
||||
0x99F0, 0x9913, 0x983A, 0x9765, 0x9693, 0x95C4, 0x94F8, 0x9430,
|
||||
0x936B, 0x92A9, 0x91EA, 0x912E, 0x9075, 0x8FBE, 0x8F0A, 0x8E59,
|
||||
0x8DAA, 0x8CFE, 0x8C54, 0x8BAC, 0x8B07, 0x8A64, 0x89C4, 0x8925,
|
||||
0x8889, 0x87EE, 0x8756, 0x86C0, 0x862B, 0x8599, 0x8508, 0x8479,
|
||||
0x83EC, 0x8361, 0x82D8, 0x8250, 0x81C9, 0x8145, 0x80C2, 0x8040,
|
||||
0xFF02, 0xFD0E, 0xFB25, 0xF947, 0xF773, 0xF5AA, 0xF3EA, 0xF234,
|
||||
0xF087, 0xEEE3, 0xED47, 0xEBB3, 0xEA27, 0xE8A3, 0xE727, 0xE5B2,
|
||||
0xE443, 0xE2DC, 0xE17A, 0xE020, 0xDECB, 0xDD7D, 0xDC34, 0xDAF1,
|
||||
0xD9B3, 0xD87B, 0xD748, 0xD61A, 0xD4F1, 0xD3CD, 0xD2AD, 0xD192,
|
||||
0xD07B, 0xCF69, 0xCE5B, 0xCD51, 0xCC4A, 0xCB48, 0xCA4A, 0xC94F,
|
||||
0xC858, 0xC764, 0xC674, 0xC587, 0xC49D, 0xC3B7, 0xC2D4, 0xC1F4,
|
||||
0xC116, 0xC03C, 0xBF65, 0xBE90, 0xBDBE, 0xBCEF, 0xBC23, 0xBB59,
|
||||
0xBA91, 0xB9CC, 0xB90A, 0xB84A, 0xB78C, 0xB6D0, 0xB617, 0xB560,
|
||||
};
|
||||
|
||||
// Note that @sqrt will either generate the sqrt opcode (if supported by the
|
||||
// target ISA) or a call to `sqrtf` otherwise.
|
||||
for (V) |val|
|
||||
try std.testing.expectEqual(@sqrt(val), sqrtf(val));
|
||||
inline fn mul16(a: u16, b: u16) u16 {
|
||||
return @intCast(@as(u32, @intCast(a)) * @as(u32, @intCast(b)) >> 16);
|
||||
}
|
||||
|
||||
test "sqrtf special" {
|
||||
try std.testing.expect(math.isPositiveInf(sqrtf(math.inf(f32))));
|
||||
try std.testing.expect(sqrtf(0.0) == 0.0);
|
||||
try std.testing.expect(sqrtf(-0.0) == -0.0);
|
||||
try std.testing.expect(math.isNan(sqrtf(-1.0)));
|
||||
inline fn mul32(a: u32, b: u32) u32 {
|
||||
return @intCast(@as(u64, @intCast(a)) * @as(u64, @intCast(b)) >> 32);
|
||||
}
|
||||
|
||||
inline fn mul64(a: u64, b: u64) u64 {
|
||||
return @intCast(@as(u128, @intCast(a)) * @as(u128, @intCast(b)) >> 64);
|
||||
}
|
||||
|
||||
inline fn mul80(a: u80, b: u80) u80 {
|
||||
const ahi = a >> 40;
|
||||
const alo = a & 0xFF_FFFF_FFFF;
|
||||
const bhi = b >> 40;
|
||||
const blo = b & 0xFF_FFFF_FFFF;
|
||||
return ahi * bhi + (ahi * blo >> 40) + (alo * bhi >> 40);
|
||||
}
|
||||
|
||||
inline fn mul128(a: u128, b: u128) u128 {
|
||||
const ahi = a >> 64;
|
||||
const alo = a & 0xFFFF_FFFF_FFFF_FFFF;
|
||||
const bhi = b >> 64;
|
||||
const blo = b & 0xFFFF_FFFF_FFFF_FFFF;
|
||||
return ahi * bhi + (ahi * blo >> 64) + (alo * bhi >> 64);
|
||||
}
|
||||
|
||||
inline fn mul80_tail(a: u80, b: u80) u80 {
|
||||
const ahi = a >> 40;
|
||||
const alo = a & 0xFF_FFFF_FFFF;
|
||||
const bhi = b >> 40;
|
||||
const blo = b & 0xFF_FFFF_FFFF;
|
||||
return alo * blo +% ((ahi * blo) << 40) +% ((alo * bhi) << 40);
|
||||
}
|
||||
|
||||
test "__sqrth" {
|
||||
// sqrt(±0) is ±0
|
||||
try std.testing.expectEqual(__sqrth(0x0.0p0), 0x0.0p0);
|
||||
try std.testing.expectEqual(__sqrth(-0x0.0p0), -0x0.0p0);
|
||||
// sqrt(+max) is finite
|
||||
try std.testing.expectEqual(__sqrth(0x1.FFCp15), 0x1.FFCp7);
|
||||
// sqrt(4)=2
|
||||
try std.testing.expectEqual(__sqrth(0x1p2), 0x1p1);
|
||||
// sqrt(x) for x=1, 1±ulp
|
||||
try std.testing.expectEqual(__sqrth(0x1p0), 0x1p0);
|
||||
try std.testing.expectEqual(__sqrth(0x1.004p0), 0x1p0);
|
||||
try std.testing.expectEqual(__sqrth(0x1.FF8p-1), 0x1.FFCp-1);
|
||||
// sqrt(+min) is non-zero
|
||||
try std.testing.expectEqual(__sqrth(0x1p-14), 0x1p-7);
|
||||
// sqrt(min subnormal) is non-zero
|
||||
try std.testing.expectEqual(__sqrth(0x0.004p-14), 0x1p-12);
|
||||
// sqrt(inf) is inf
|
||||
try std.testing.expect(math.isInf(__sqrth(math.inf(f16))));
|
||||
// sqrt(nan) is nan
|
||||
try std.testing.expect(math.isNan(__sqrth(math.nan(f16))));
|
||||
// sqrt(-ve) is nan
|
||||
try std.testing.expect(math.isNan(__sqrth(-0x1p-14)));
|
||||
try std.testing.expect(math.isNan(__sqrth(-0x1p+0)));
|
||||
try std.testing.expect(math.isNan(__sqrth(-math.inf(f16))));
|
||||
// random arguments
|
||||
try std.testing.expectEqual(__sqrth(0x1.1p14), 0x1.08p7);
|
||||
try std.testing.expectEqual(__sqrth(0x1.C9p-12), 0x1.56p-6);
|
||||
try std.testing.expectEqual(__sqrth(0x1.CE8p-7), 0x1.E68p-4);
|
||||
try std.testing.expectEqual(__sqrth(0x1.134p-7), 0x1.778p-4);
|
||||
try std.testing.expectEqual(__sqrth(0x1.E9Cp-10), 0x1.62p-5);
|
||||
try std.testing.expectEqual(__sqrth(0x1.3Dp9), 0x1.92Cp4);
|
||||
try std.testing.expectEqual(__sqrth(0x1.AA4p8), 0x1.4A4p4);
|
||||
try std.testing.expectEqual(__sqrth(0x1.8A8p4), 0x1.3DCp2);
|
||||
try std.testing.expectEqual(__sqrth(0x1.8Fp-7), 0x1.C4p-4);
|
||||
try std.testing.expectEqual(__sqrth(0x1.584p-11), 0x1.A3Cp-6);
|
||||
}
|
||||
|
||||
test "sqrtf" {
|
||||
// sqrt(±0) is ±0
|
||||
try std.testing.expectEqual(sqrtf(0x0.0p0), 0x0.0p0);
|
||||
try std.testing.expectEqual(sqrtf(-0x0.0p0), -0x0.0p0);
|
||||
// sqrt(+max) is finite
|
||||
try std.testing.expectEqual(sqrtf(0x1.FFFFFEp127), 0x1.FFFFFEp63);
|
||||
// sqrt(4)=2
|
||||
try std.testing.expectEqual(sqrtf(0x1p2), 0x1p1);
|
||||
// sqrt(x) for x=1, 1±ulp
|
||||
try std.testing.expectEqual(sqrtf(0x1p0), 0x1p0);
|
||||
try std.testing.expectEqual(sqrtf(0x1.000002p0), 0x1p0);
|
||||
try std.testing.expectEqual(sqrtf(0x1.FFFFFEp-1), 0x1.FFFFFEp-1);
|
||||
// sqrt(+min) is non-zero
|
||||
try std.testing.expectEqual(sqrtf(0x1p-126), 0x1p-63);
|
||||
// sqrt(min subnormal) is non-zero
|
||||
try std.testing.expectEqual(sqrtf(0x0.000002p-126), 0x1.6a09e6p-75);
|
||||
// sqrt(inf) is inf
|
||||
try std.testing.expect(math.isInf(sqrtf(math.inf(f32))));
|
||||
// sqrt(nan) is nan
|
||||
try std.testing.expect(math.isNan(sqrtf(math.nan(f32))));
|
||||
// sqrt(-ve) is nan
|
||||
try std.testing.expect(math.isNan(sqrtf(-0x1p-149)));
|
||||
try std.testing.expect(math.isNan(sqrtf(-0x1p0)));
|
||||
try std.testing.expect(math.isNan(sqrtf(-math.inf(f32))));
|
||||
// random arguments
|
||||
try std.testing.expectEqual(sqrtf(0x1.4DD57Ep77), 0x1.9D6DA8p38);
|
||||
try std.testing.expectEqual(sqrtf(0x1.871848p102), 0x1.3C6AFAp51);
|
||||
try std.testing.expectEqual(sqrtf(0x1.A1D748p-112), 0x1.470EFCp-56);
|
||||
try std.testing.expectEqual(sqrtf(0x1.E626C2p18), 0x1.60C80Ep9);
|
||||
try std.testing.expectEqual(sqrtf(0x1.E80E66p-29), 0x1.F3E282p-15);
|
||||
try std.testing.expectEqual(sqrtf(0x1.B47204p89), 0x1.D8B732p44);
|
||||
try std.testing.expectEqual(sqrtf(0x1.77F45p15), 0x1.B6BC3Ap7);
|
||||
try std.testing.expectEqual(sqrtf(0x1.AD5F5p-48), 0x1.4B8A72p-24);
|
||||
try std.testing.expectEqual(sqrtf(0x1.91A39p-76), 0x1.40A7A8p-38);
|
||||
try std.testing.expectEqual(sqrtf(0x1.DAE088p79), 0x1.ED16DCp39);
|
||||
}
|
||||
|
||||
test "sqrt" {
|
||||
const V = [_]f64{
|
||||
0.0,
|
||||
4.089288054930154,
|
||||
7.538757127071935,
|
||||
8.97780793672623,
|
||||
5.304443821913729,
|
||||
5.682408965311888,
|
||||
0.5846878579110049,
|
||||
3.650338664297043,
|
||||
0.3178091951800732,
|
||||
7.1505232436382835,
|
||||
3.6589165881946464,
|
||||
};
|
||||
|
||||
// Note that @sqrt will either generate the sqrt opcode (if supported by the
|
||||
// target ISA) or a call to `sqrtf` otherwise.
|
||||
for (V) |val|
|
||||
try std.testing.expectEqual(@sqrt(val), sqrt(val));
|
||||
}
|
||||
|
||||
test "sqrt special" {
|
||||
try std.testing.expect(math.isPositiveInf(sqrt(math.inf(f64))));
|
||||
try std.testing.expect(sqrt(0.0) == 0.0);
|
||||
try std.testing.expect(sqrt(-0.0) == -0.0);
|
||||
try std.testing.expect(math.isNan(sqrt(-1.0)));
|
||||
// sqrt(±0) is ±0
|
||||
try std.testing.expectEqual(sqrt(0x0.0p0), 0x0.0p0);
|
||||
try std.testing.expectEqual(sqrt(-0x0.0p0), -0x0.0p0);
|
||||
// sqrt(+max) is finite
|
||||
try std.testing.expectEqual(sqrt(math.floatMax(f64)), 0x1.FFFFFFFFFFFFFp511);
|
||||
// sqrt(4)=2
|
||||
try std.testing.expectEqual(sqrt(0x1p2), 0x1p1);
|
||||
// sqrt(x) for x=1, 1±ulp
|
||||
try std.testing.expectEqual(sqrt(0x1p0), 0x1p0);
|
||||
try std.testing.expectEqual(sqrt(0x1p0 + math.floatEps(f64)), 0x1p0);
|
||||
try std.testing.expectEqual(sqrt(0x1p0 - math.floatEps(f64)), 0x1.FFFFFFFFFFFFFp-1);
|
||||
// sqrt(+min) is non-zero
|
||||
try std.testing.expectEqual(sqrt(math.floatMin(f64)), 0x1p-511);
|
||||
// sqrt(min subnormal) is non-zero
|
||||
try std.testing.expectEqual(sqrt(math.floatTrueMin(f64)), 0x1p-537);
|
||||
// sqrt(inf) is inf
|
||||
try std.testing.expect(math.isInf(sqrt(math.inf(f64))));
|
||||
// sqrt(nan) is nan
|
||||
try std.testing.expect(math.isNan(sqrt(math.nan(f64))));
|
||||
// sqrt(-ve) is nan
|
||||
try std.testing.expect(math.isNan(sqrt(-0x1p-1074)));
|
||||
try std.testing.expect(math.isNan(sqrt(-0x1p0)));
|
||||
try std.testing.expect(math.isNan(sqrt(-math.inf(f64))));
|
||||
// random arguments
|
||||
try std.testing.expectEqual(sqrt(0x1.27D3510D4789Bp471), 0x1.852E97E58CFB7p235);
|
||||
try std.testing.expectEqual(sqrt(0x1.8C4FCD5A07846p791), 0x1.C27504E56D938p395);
|
||||
try std.testing.expectEqual(sqrt(0x1.B1B69324F96E7p-137), 0x1.D73BD0414D8BFp-69);
|
||||
try std.testing.expectEqual(sqrt(0x1.1CBD179A811FEp278), 0x1.0DFCB9A114A61p139);
|
||||
try std.testing.expectEqual(sqrt(0x1.1D0C7EFB04A56p917), 0x1.7E0708A25DDCDp458);
|
||||
try std.testing.expectEqual(sqrt(0x1.21B355DA8C94Bp-249), 0x1.8121CBE2608E3p-125);
|
||||
try std.testing.expectEqual(sqrt(0x1.63024D4C5E987p487), 0x1.AA56AEA589DCDp243);
|
||||
try std.testing.expectEqual(sqrt(0x1.45AC3BE941F6Ep339), 0x1.9857F3F453E2Dp169);
|
||||
try std.testing.expectEqual(sqrt(0x1.3B719C733AA24p267), 0x1.91E12E3AC8F71p133);
|
||||
try std.testing.expectEqual(sqrt(0x1.0B150433A2275p357), 0x1.71CAB87F8277Cp178);
|
||||
}
|
||||
|
||||
test "__sqrtx" {
|
||||
// sqrt(±0) is ±0
|
||||
try std.testing.expectEqual(__sqrtx(0x0.0p0), 0x0.0p0);
|
||||
try std.testing.expectEqual(__sqrtx(-0x0.0p0), -0x0.0p0);
|
||||
// sqrt(+max) is finite
|
||||
try std.testing.expectEqual(__sqrtx(math.floatMax(f80)), 0x1.FFFFFFFFFFFFFFFEp8191);
|
||||
// sqrt(4)=2
|
||||
try std.testing.expectEqual(__sqrtx(0x1p2), 0x1p1);
|
||||
// sqrt(x) for x=1, 1±ulp
|
||||
try std.testing.expectEqual(__sqrtx(0x1p0), 0x1p0);
|
||||
try std.testing.expectEqual(__sqrtx(0x1p0 + math.floatEps(f80)), 0x1p0);
|
||||
try std.testing.expectEqual(__sqrtx(0x1p0 - math.floatEps(f80)), 0x1.FFFFFFFFFFFFFFFEp-1);
|
||||
// sqrt(+min) is non-zero
|
||||
try std.testing.expectEqual(__sqrtx(math.floatMin(f80)), 0x1p-8191);
|
||||
// sqrt(min subnormal) is non-zero
|
||||
try std.testing.expectEqual(__sqrtx(math.floatTrueMin(f80)), 0x1.6A09E667F3BCC908p-8223);
|
||||
// sqrt(inf) is inf
|
||||
try std.testing.expect(math.isInf(__sqrtx(math.inf(f80))));
|
||||
// sqrt(nan) is nan
|
||||
try std.testing.expect(math.isNan(__sqrtx(math.nan(f80))));
|
||||
// sqrt(-ve) is nan
|
||||
try std.testing.expect(math.isNan(__sqrtx(-0x1p-16442)));
|
||||
try std.testing.expect(math.isNan(__sqrtx(-0x1p0)));
|
||||
try std.testing.expect(math.isNan(__sqrtx(-math.inf(f80))));
|
||||
// random arguments
|
||||
try std.testing.expectEqual(__sqrtx(0x1.087F3953486918A4p15482), 0x1.0436BBE03D02F32p7741);
|
||||
try std.testing.expectEqual(__sqrtx(0x1.530CF9E2AE84D8Fp-6330), 0x1.269CFEF51933BE58p-3165);
|
||||
try std.testing.expectEqual(__sqrtx(0x1.3F971515EADD574Ap5713), 0x1.9483232AB780B006p2856);
|
||||
try std.testing.expectEqual(__sqrtx(0x1.4CC0DC7379222954p864), 0x1.23DD4D0A4758C2Cp432);
|
||||
try std.testing.expectEqual(__sqrtx(0x1.920E5649559A839Ep-3181), 0x1.C5B5BC0F98DD83D2p-1591);
|
||||
try std.testing.expectEqual(__sqrtx(0x1.2E59726F87CD1746p-629), 0x1.8973327E95CB350Cp-315);
|
||||
try std.testing.expectEqual(__sqrtx(0x1.D3A16391F57B4D64p-9034), 0x1.59FF08B7DEEF5DB2p-4517);
|
||||
try std.testing.expectEqual(__sqrtx(0x1.E7053D8DAA49BCEEp-11411), 0x1.F35AA3EA5E18E344p-5706);
|
||||
try std.testing.expectEqual(__sqrtx(0x1.797ED0B05DD4A984p7521), 0x1.B7A22E40C6A7867Ap3760);
|
||||
try std.testing.expectEqual(__sqrtx(0x1.FC50806445C7226Ap15371), 0x1.FE2766142653F5BEp7685);
|
||||
}
|
||||
|
||||
test "sqrtq" {
|
||||
// sqrt(±0) is ±0
|
||||
try std.testing.expectEqual(sqrtq(0x0.0p0), 0x0.0p0);
|
||||
try std.testing.expectEqual(sqrtq(-0x0.0p0), -0x0.0p0);
|
||||
// sqrt(+max) is finite
|
||||
try std.testing.expectEqual(sqrtq(math.floatMax(f128)), 0x1.FFFFFFFFFFFFFFFFFFFFFFFFFFFFp8191);
|
||||
// sqrt(4)=2
|
||||
try std.testing.expectEqual(sqrtq(0x1p2), 0x1p1);
|
||||
// sqrt(x) for x=1, 1±ulp
|
||||
try std.testing.expectEqual(sqrtq(0x1p0), 0x1p0);
|
||||
try std.testing.expectEqual(sqrtq(0x1p0 + math.floatEps(f128)), 0x1p0);
|
||||
try std.testing.expectEqual(sqrtq(0x1p0 - math.floatEps(f128)), 0x1.FFFFFFFFFFFFFFFFFFFFFFFFFFFFp-1);
|
||||
// sqrt(+min) is non-zero
|
||||
try std.testing.expectEqual(sqrtq(math.floatMin(f128)), 0x1p-8191);
|
||||
// sqrt(min subnormal) is non-zero
|
||||
try std.testing.expectEqual(sqrtq(math.floatTrueMin(f128)), 0x1p-8247);
|
||||
// sqrt(inf) is inf
|
||||
try std.testing.expect(math.isInf(sqrtq(math.inf(f128))));
|
||||
// sqrt(nan) is nan
|
||||
try std.testing.expect(math.isNan(sqrtq(math.nan(f128))));
|
||||
// sqrt(-ve) is nan
|
||||
try std.testing.expect(math.isNan(sqrtq(-0x1p-16442)));
|
||||
try std.testing.expect(math.isNan(sqrtq(-0x1p0)));
|
||||
try std.testing.expect(math.isNan(sqrtq(-math.inf(f128))));
|
||||
// random arguments
|
||||
try std.testing.expectEqual(sqrtq(0x1.B6942D29A331751600C9F3AF7E5Fp3363), 0x1.D9DE9AFEF0F2D25586A50CA39D4Dp1681);
|
||||
try std.testing.expectEqual(sqrtq(0x1.5E65C405F84D471A8070ADD7A42Dp11765), 0x1.A78F7F9452B4D9EC2403C81D9D42p5882);
|
||||
try std.testing.expectEqual(sqrtq(0x1.B42334D68F8016D8AE6F5E22B044p-5624), 0x1.4E247A7F2FF2A325E9377BB09C8p-2812);
|
||||
try std.testing.expectEqual(sqrtq(0x1.E61715047F80F2E0B9382B38E06Bp10062), 0x1.60C25D9DFDC0116B78EF5AFDE0E9p5031);
|
||||
try std.testing.expectEqual(sqrtq(0x1.2ED0B53B494CB55A7B04E653D40Ep-1026), 0x1.166CE78D658D2453D700B04C5748p-513);
|
||||
try std.testing.expectEqual(sqrtq(0x1.1BA756B9790E78A4E6F0B083AA89p1835), 0x1.7D1767EA3303DB7A46940033988p917);
|
||||
try std.testing.expectEqual(sqrtq(0x1.5B6C574319C1120335C8E1609704p4512), 0x1.2A3A8A415BB1648C548FBA2A4182p2256);
|
||||
try std.testing.expectEqual(sqrtq(0x1.FF91E8CDEE1552A2B74E77B602Ep14953), 0x1.FFC8F171267D4FE75CBE7AB4D851p7476);
|
||||
try std.testing.expectEqual(sqrtq(0x1.9B1837CFC629A1B6B1BB97099E7Dp2892), 0x1.4468511B909EAF8641BD59105A6Bp1446);
|
||||
try std.testing.expectEqual(sqrtq(0x1.0E2115475E64A92340914E7F7B37p-13951), 0x1.73E536F82F414134012F55BA5368p-6976);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue