This commit is contained in:
rpkak 2025-11-21 19:55:19 +01:00 committed by GitHub
commit c3eed6c9a8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 253 additions and 219 deletions

View file

@ -272,78 +272,77 @@ pub fn float(r: Random, comptime T: type) T {
// Generate a uniformly random value for the mantissa.
// Then generate an exponentially biased random value for the exponent.
// This covers every possible value in the range.
switch (T) {
f32 => {
// Use 23 random bits for the mantissa, and the rest for the exponent.
// If all 41 bits are zero, generate additional random bits, until a
// set bit is found, or 126 bits have been generated.
const rand = r.int(u64);
var rand_lz = @clz(rand);
if (rand_lz >= 41) {
@branchHint(.unlikely);
rand_lz = 41 + @clz(r.int(u64));
if (rand_lz == 41 + 64) {
@branchHint(.unlikely);
// It is astronomically unlikely to reach this point.
rand_lz += @clz(r.int(u32) | 0x7FF);
}
const TAsInt = std.meta.Int(.unsigned, @typeInfo(T).float.bits);
const fractional_bits = std.math.floatFractionalBits(T);
const max_lz = (1 << (std.math.floatExponentBits(T) - 1)) - 2;
const max_random_bits = max_lz + fractional_bits;
// It's not known yet how many bits are needed.
// The first call to r.int(...) will generate
// - a maximum of max_random_bits bits
// - more than fractional_bits bits
// - preferably a multiple of 64 bits
const first_random_bits = @min((fractional_bits | 63) + 1, max_random_bits);
const max_first_lz = first_random_bits - fractional_bits;
const rand = r.int(std.meta.Int(.unsigned, first_random_bits));
// Use fractional_bits random bits for the mantissa, and the rest for the exponent.
// If all bits of this rest are zero, generate additional random bits, until a
// set bit is found, or max_lz bits have been generated.
const mantissa: TAsInt = @intCast(rand & ((1 << fractional_bits) - 1));
var rand_lz: TAsInt = @clz(rand);
if (rand_lz >= max_first_lz) {
@branchHint(if (max_first_lz > 1) .unlikely else .none);
rand_lz = max_first_lz;
for (0..(max_lz - max_first_lz) / 64) |i| {
// It is astronomically unlikely for this loop to execute more than once.
rand_lz += @clz(r.int(u64));
if (rand_lz != 64 * (i + 1) + max_first_lz) {
@branchHint(.likely);
break;
}
const mantissa: u23 = @truncate(rand);
const exponent = @as(u32, 126 - rand_lz) << 23;
return @bitCast(exponent | mantissa);
},
f64 => {
// Use 52 random bits for the mantissa, and the rest for the exponent.
// If all 12 bits are zero, generate additional random bits, until a
// set bit is found, or 1022 bits have been generated.
const rand = r.int(u64);
var rand_lz: u64 = @clz(rand);
if (rand_lz >= 12) {
rand_lz = 12;
while (true) {
// It is astronomically unlikely for this loop to execute more than once.
const addl_rand_lz = @clz(r.int(u64));
rand_lz += addl_rand_lz;
if (addl_rand_lz != 64) {
@branchHint(.likely);
break;
}
if (rand_lz >= 1022) {
rand_lz = 1022;
break;
}
}
} else {
const missing_bits = (max_lz - max_first_lz) & 63;
std.debug.assert(max_lz - rand_lz == missing_bits);
if (missing_bits != 0) {
rand_lz += @clz(r.int(std.meta.Int(.unsigned, missing_bits)));
}
const mantissa = rand & 0xFFFFFFFFFFFFF;
const exponent = (1022 - rand_lz) << 52;
return @bitCast(exponent | mantissa);
},
else => @compileError("unknown floating point type"),
}
}
std.debug.assert(rand_lz <= max_lz);
const exponent = max_lz - rand_lz;
var float_as_int = (exponent << std.math.floatMantissaBits(T)) | mantissa;
if (@typeInfo(T).float.bits == 80) {
float_as_int |= @as(TAsInt, @intFromBool(exponent != 0)) << fractional_bits;
}
return @bitCast(float_as_int);
}
/// Return a floating point value normally distributed with mean = 0, stddev = 1.
///
/// To use different parameters, use: floatNorm(...) * desiredStddev + desiredMean.
pub fn floatNorm(r: Random, comptime T: type) T {
const value = ziggurat.next_f64(r, ziggurat.NormDist);
switch (T) {
f32 => return @floatCast(value),
f64 => return value,
else => @compileError("unknown floating point type"),
}
const tables = ziggurat.distributions(T).normal;
return ziggurat.next(T, r, tables);
}
/// Return an exponentially distributed float with a rate parameter of 1.
///
/// To use a different rate parameter, use: floatExp(...) / desiredRate.
pub fn floatExp(r: Random, comptime T: type) T {
const value = ziggurat.next_f64(r, ziggurat.ExpDist);
switch (T) {
f32 => return @floatCast(value),
f64 => return value,
else => @compileError("unknown floating point type"),
}
const tables = ziggurat.distributions(T).exponential;
return ziggurat.next(T, r, tables);
}
/// Shuffle a slice into a random order.

View file

@ -292,15 +292,13 @@ test "Random float correctness" {
var prng = DefaultPrng.init(0);
const random = prng.random();
var i: usize = 0;
while (i < 1000) : (i += 1) {
const val1 = random.float(f32);
try expect(val1 >= 0.0);
try expect(val1 < 1.0);
const val2 = random.float(f64);
try expect(val2 >= 0.0);
try expect(val2 < 1.0);
inline for (.{ f16, f32, f64, f80, c_longdouble, f128 }) |Float| {
var i: usize = 0;
while (i < 1000) : (i += 1) {
const val = random.float(Float);
try expect(val >= 0.0);
try expect(val < 1.0);
}
}
}
@ -309,74 +307,42 @@ test "Random float coverage" {
var prng = try Dilbert.init(&[_]u8{0});
const random = prng.random();
const rand_f64 = random.float(f64);
const rand_f32 = random.float(f32);
try expect(rand_f32 == 0.0);
try expect(rand_f64 == 0.0);
inline for (.{ f16, f32, f64, f80, c_longdouble, f128 }) |Float| {
const rand_float = random.float(Float);
try expect(rand_float == 0.0);
}
}
test "Random float chi-square goodness of fit" {
const num_numbers = 100000;
const num_buckets = 1000;
var f32_hist = std.AutoHashMap(u32, u32).init(std.testing.allocator);
defer f32_hist.deinit();
var f64_hist = std.AutoHashMap(u64, u32).init(std.testing.allocator);
defer f64_hist.deinit();
const num_buckets = 1024;
const expected_count_per_bucket = @as(f64, num_numbers) / @as(f64, num_buckets);
var prng = DefaultPrng.init(0);
const random = prng.random();
var i: usize = 0;
while (i < num_numbers) : (i += 1) {
const rand_f32 = random.float(f32);
const rand_f64 = random.float(f64);
const f32_put = try f32_hist.getOrPut(@as(u32, @intFromFloat(rand_f32 * @as(f32, @floatFromInt(num_buckets)))));
if (f32_put.found_existing) {
f32_put.value_ptr.* += 1;
} else {
f32_put.value_ptr.* = 1;
inline for (.{ f16, f32, f64, f80, c_longdouble, f128 }) |Float| {
var hist: [num_buckets]u32 = @splat(0);
for (0..num_numbers) |_| {
const rand_float = random.float(Float);
hist[@intFromFloat(rand_float * num_buckets)] += 1;
}
const f64_put = try f64_hist.getOrPut(@as(u32, @intFromFloat(rand_f64 * @as(f64, @floatFromInt(num_buckets)))));
if (f64_put.found_existing) {
f64_put.value_ptr.* += 1;
} else {
f64_put.value_ptr.* = 1;
var total_variance: f64 = 0;
for (hist) |count| {
const delta = @as(f64, @floatFromInt(count)) - expected_count_per_bucket;
const variance = (delta * delta) / expected_count_per_bucket;
total_variance += variance;
}
// Accept p-values >= 0.05.
// Critical value is calculated by opening a Python interpreter and running:
// scipy.stats.chi2.isf(0.05, num_buckets - 1)
const critical_value = 1098.5207819245886;
try expect(total_variance < critical_value);
}
var f32_total_variance: f64 = 0;
var f64_total_variance: f64 = 0;
{
var j: u32 = 0;
while (j < num_buckets) : (j += 1) {
const count = @as(f64, @floatFromInt((if (f32_hist.get(j)) |v| v else 0)));
const expected = @as(f64, @floatFromInt(num_numbers)) / @as(f64, @floatFromInt(num_buckets));
const delta = count - expected;
const variance = (delta * delta) / expected;
f32_total_variance += variance;
}
}
{
var j: u64 = 0;
while (j < num_buckets) : (j += 1) {
const count = @as(f64, @floatFromInt((if (f64_hist.get(j)) |v| v else 0)));
const expected = @as(f64, @floatFromInt(num_numbers)) / @as(f64, @floatFromInt(num_buckets));
const delta = count - expected;
const variance = (delta * delta) / expected;
f64_total_variance += variance;
}
}
// Accept p-values >= 0.05.
// Critical value is calculated by opening a Python interpreter and running:
// scipy.stats.chi2.isf(0.05, num_buckets - 1)
const critical_value = 1073.6426506574246;
try expect(f32_total_variance < critical_value);
try expect(f64_total_variance < critical_value);
}
test "Random shuffle" {

View file

@ -8,26 +8,33 @@
//! https://sbarral.github.io/etf.
const std = @import("../std.zig");
const builtin = @import("builtin");
const math = std.math;
const Random = std.Random;
pub fn next_f64(random: Random, comptime tables: ZigTable) f64 {
pub fn next(comptime T: type, random: Random, comptime tables: Table(T)) T {
const t_bits = @typeInfo(T).float.bits;
const mantissa_bits = math.floatMantissaBits(T);
const TAsInt = std.meta.Int(.unsigned, t_bits);
while (true) {
// We manually construct a float from parts as we can avoid an extra random lookup here by
// using the unused exponent for the lookup table entry.
const bits = random.int(u64);
const bits = random.int(std.meta.Int(.unsigned, mantissa_bits + 8)); // bits for mantissa and 8 for `i`
const i = @as(usize, @as(u8, @truncate(bits)));
const u = blk: {
// If symmetric, generate value in range [2, 4) and scale into [-1, 1),
// otherwise generate value in range [1, 2] and scale into (0, 1)
const mantissa: TAsInt = @intCast(bits >> 8);
const exponent: TAsInt = (math.floatExponentMax(T) + (if (tables.is_symmetric) 1 else 0)) << mantissa_bits;
const representation: TAsInt = switch (t_bits) {
80 => exponent | mantissa | (1 << (mantissa_bits - 1)),
else => exponent | mantissa,
};
if (tables.is_symmetric) {
// Generate a value in the range [2, 4) and scale into [-1, 1)
const repr = ((0x3ff + 1) << 52) | (bits >> 12);
break :blk @as(f64, @bitCast(repr)) - 3.0;
break :blk @as(T, @bitCast(representation)) - 3.0;
} else {
// Generate a value in the range [1, 2) and scale into (0, 1)
const repr = (0x3ff << 52) | (bits >> 12);
break :blk @as(f64, @bitCast(repr)) - (1.0 - math.floatEps(f64) / 2.0);
break :blk @as(T, @bitCast(representation)) - (1.0 - math.floatEps(T) / 2.0);
}
};
@ -40,51 +47,52 @@ pub fn next_f64(random: Random, comptime tables: ZigTable) f64 {
}
if (i == 0) {
return tables.zero_case(random, u);
return tables.zeroCase(random, u);
}
// equivalent to f1 + DRanU() * (f0 - f1) < 1
if (tables.f[i + 1] + (tables.f[i] - tables.f[i + 1]) * random.float(f64) < tables.pdf(x)) {
if (tables.f[i + 1] + (tables.f[i] - tables.f[i + 1]) * random.float(T) < tables.pdf(x)) {
return x;
}
}
}
pub const ZigTable = struct {
r: f64,
x: [257]f64,
f: [257]f64,
pub fn Table(comptime T: type) type {
comptime std.debug.assert(@typeInfo(T) == .float);
return struct {
x: [257]T,
f: [257]T,
// probability density function used as a fallback
pdf: fn (f64) f64,
// whether the distribution is symmetric
is_symmetric: bool,
// fallback calculation in the case we are in the 0 block
zero_case: fn (Random, f64) f64,
};
// probability density function used as a fallback
pdf: fn (T) T,
// whether the distribution is symmetric
is_symmetric: bool,
// fallback calculation in the case we are in the 0 block
zeroCase: fn (Random, T) T,
};
}
// zigNorInit
pub fn ZigTableGen(
pub fn tableGen(
comptime T: type,
comptime is_symmetric: bool,
comptime r: f64,
comptime v: f64,
comptime f: fn (f64) f64,
comptime f_inv: fn (f64) f64,
comptime zero_case: fn (Random, f64) f64,
) ZigTable {
var tables: ZigTable = undefined;
comptime r: T,
comptime v: T,
comptime f: fn (T) T,
comptime fInv: fn (T) T,
comptime zeroCase: fn (Random, T) T,
) Table(T) {
var tables: Table(T) = undefined;
tables.is_symmetric = is_symmetric;
tables.r = r;
tables.pdf = f;
tables.zero_case = zero_case;
tables.zeroCase = zeroCase;
tables.x[0] = v / f(r);
tables.x[1] = r;
for (tables.x[2..256], 0..) |*entry, i| {
const last = tables.x[2 + i - 1];
entry.* = f_inv(v / last + f(last));
entry.* = fInv(v / last + f(last));
}
tables.x[256] = 0;
@ -95,78 +103,139 @@ pub fn ZigTableGen(
return tables;
}
// N(0, 1)
pub const NormDist = blk: {
@setEvalBranchQuota(30000);
break :blk ZigTableGen(true, norm_r, norm_v, norm_f, norm_f_inv, norm_zero_case);
};
/// Namespace containing distributions for a specific floating point type.
pub fn distributions(comptime T: type) type {
comptime std.debug.assert(@typeInfo(T) == .float);
return struct {
pub const norm_r = 3.6541528853610088;
pub const norm_v = 0.00492867323399;
pub fn normF(x: T) T {
return @exp(-x * x / 2.0);
}
pub fn normFInv(y: T) T {
return @sqrt(-2.0 * @log(y));
}
pub fn normZeroCase(random: Random, u: T) T {
var x: T = 1.0;
var y: T = 0.0;
pub const norm_r = 3.6541528853610088;
pub const norm_v = 0.00492867323399;
while (-2.0 * y < x * x) {
x = @log(random.float(T)) / norm_r;
y = @log(random.float(T));
}
pub fn norm_f(x: f64) f64 {
return @exp(-x * x / 2.0);
}
pub fn norm_f_inv(y: f64) f64 {
return @sqrt(-2.0 * @log(y));
}
pub fn norm_zero_case(random: Random, u: f64) f64 {
var x: f64 = 1;
var y: f64 = 0;
if (u < 0) {
return x - norm_r;
} else {
return norm_r - x;
}
}
/// N(0, 1)
pub const normal = blk: {
@setEvalBranchQuota(30000);
break :blk tableGen(T, true, norm_r, norm_v, normF, normFInv, normZeroCase);
};
while (-2.0 * y < x * x) {
x = @log(random.float(f64)) / norm_r;
y = @log(random.float(f64));
}
if (u < 0) {
return x - norm_r;
} else {
return norm_r - x;
}
pub const exp_r = 7.69711747013104972;
pub const exp_v = 0.0039496598225815571993;
pub fn expF(x: T) T {
return @exp(-x);
}
pub fn expFInv(y: T) T {
return -@log(y);
}
pub fn expZeroCase(random: Random, _: T) T {
return exp_r - @log(random.float(T));
}
/// E(1)
pub const exponential = blk: {
@setEvalBranchQuota(30000);
break :blk tableGen(T, false, exp_r, exp_v, expF, expFInv, expZeroCase);
};
};
}
test "normal dist smoke test" {
// Hardcode 0 as the seed because it's possible a seed exists that fails
// this test.
var prng = Random.DefaultPrng.init(0);
const random = prng.random();
var i: usize = 0;
while (i < 1000) : (i += 1) {
_ = random.floatNorm(f64);
}
/// Deprecated. Use `next` instead.
pub fn next_f64(random: Random, comptime tables: Table(f64)) f64 {
return next(f64, random, tables);
}
// Exp(1)
pub const ExpDist = blk: {
@setEvalBranchQuota(30000);
break :blk ZigTableGen(false, exp_r, exp_v, exp_f, exp_f_inv, exp_zero_case);
};
pub const exp_r = 7.69711747013104972;
pub const exp_v = 0.0039496598225815571993;
pub fn exp_f(x: f64) f64 {
return @exp(-x);
}
pub fn exp_f_inv(y: f64) f64 {
return -@log(y);
}
pub fn exp_zero_case(random: Random, _: f64) f64 {
return exp_r - @log(random.float(f64));
/// Deprecated. Use `Table` instead.
pub const ZigTable = Table(f64);
/// Deprecated. Use `tableGen` instead.
pub fn ZigTableGen(
comptime is_symmetric: bool,
comptime r: f64,
comptime v: f64,
comptime f: fn (f64) f64,
comptime f_inv: fn (f64) f64,
comptime zero_case: fn (Random, f64) f64,
) Table(f64) {
return tableGen(f64, is_symmetric, r, v, f, f_inv, zero_case);
}
/// Deprecated. Use `distributions.normal` instead.
pub const NormDist = distributions(f64).normal;
/// Deprecated. Use `distributions.exponential` instead.
pub const ExpDist = distributions(f64).exponential;
test "exp dist smoke test" {
var prng = Random.DefaultPrng.init(0);
const random = prng.random();
fn zigguratTests(comptime T: type) type {
return struct {
test "normal dist correctness" {
const n = 10000;
const p = 0.682689492136; // chance of `random.floatNorm` [-1.0, 1.0]
const mu = n * p;
const sigma = @sqrt(n * p * (1.0 - p));
// interval that `in_range` will land in (inclusive) with 95% confidence
const in_range_min: u32 = @intFromFloat(@ceil(mu - 1.97 * sigma));
const in_range_max: u32 = @intFromFloat(@floor(mu + 1.97 * sigma));
var i: usize = 0;
while (i < 1000) : (i += 1) {
_ = random.floatExp(f64);
}
var prng = Random.DefaultPrng.init(switch (@typeInfo(T).float.bits) {
// By random chance, this fails for `f64` on seed `0`
// and for `f32` on seed `1`. Thus this setup.
64 => 1,
else => 0,
});
const random = prng.random();
var in_range: u32 = 0;
for (0..n) |_| {
const value = random.floatNorm(T);
if (value >= -1.0 and value <= 1.0) in_range += 1;
}
try std.testing.expect(in_range >= in_range_min);
try std.testing.expect(in_range <= in_range_max);
}
test "exponential dist correctness" {
const n = 10000;
const p = 0.5; // chance of `random.floatExp` < @log(2.0)
const mu = n * p;
const sigma = @sqrt(n * p * (1.0 - p));
// interval that `in_range` will land in (inclusive) with 95% confidence
const in_range_min: u32 = @intFromFloat(@ceil(mu - 1.97 * sigma));
const in_range_max: u32 = @intFromFloat(@floor(mu + 1.97 * sigma));
var prng = Random.DefaultPrng.init(0);
const random = prng.random();
var in_range: u32 = 0;
for (0..n) |_| {
const value = random.floatExp(T);
if (value < @log(2.0)) in_range += 1;
}
try std.testing.expect(in_range >= in_range_min);
try std.testing.expect(in_range <= in_range_max);
}
test "distributions" {
const dists = distributions(T);
_ = dists.normal;
_ = dists.exponential;
}
};
}
test {
_ = NormDist;
inline for ([_]type{ f16, f32, f64, f80, f128, c_longdouble }) |T| {
_ = zigguratTests(T);
}
}