From 00ff65357666a7ec161d6e80c92be2ccc79e3638 Mon Sep 17 00:00:00 2001 From: Marc Tiehuis Date: Thu, 8 Jun 2023 14:27:17 +1200 Subject: [PATCH] reimplement wyhash v4.1 (#15969) --- lib/std/hash/auto_hash.zig | 2 +- lib/std/hash/wyhash.zig | 373 ++++++++++++++++++++----------------- 2 files changed, 203 insertions(+), 172 deletions(-) diff --git a/lib/std/hash/auto_hash.zig b/lib/std/hash/auto_hash.zig index b78159ca0b..0c88caae7e 100644 --- a/lib/std/hash/auto_hash.zig +++ b/lib/std/hash/auto_hash.zig @@ -343,7 +343,7 @@ test "testHash optional" { const b: ?u32 = null; try testing.expectEqual(testHash(a), testHash(@as(u32, 123))); try testing.expect(testHash(a) != testHash(b)); - try testing.expectEqual(testHash(b), 0); + try testing.expectEqual(testHash(b), 0x409638ee2bde459); // wyhash empty input hash } test "testHash array" { diff --git a/lib/std/hash/wyhash.zig b/lib/std/hash/wyhash.zig index 682619e1a4..3426bca9f4 100644 --- a/lib/std/hash/wyhash.zig +++ b/lib/std/hash/wyhash.zig @@ -1,209 +1,240 @@ const std = @import("std"); -const mem = std.mem; -const primes = [_]u64{ - 0xa0761d6478bd642f, - 0xe7037ed1a0b428db, - 0x8ebc6af09c88c6e3, - 0x589965cc75374cc3, - 0x1d8e4e27c47d124f, -}; - -fn read_bytes(comptime bytes: u8, data: []const u8) u64 { - const T = std.meta.Int(.unsigned, 8 * bytes); - return mem.readIntLittle(T, data[0..bytes]); -} - -fn read_8bytes_swapped(data: []const u8) u64 { - return (read_bytes(4, data) << 32 | read_bytes(4, data[4..])); -} - -fn mum(a: u64, b: u64) u64 { - var r = std.math.mulWide(u64, a, b); - r = (r >> 64) ^ r; - return @truncate(u64, r); -} - -fn mix0(a: u64, b: u64, seed: u64) u64 { - return mum(a ^ seed ^ primes[0], b ^ seed ^ primes[1]); -} - -fn mix1(a: u64, b: u64, seed: u64) u64 { - return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]); -} - -// Wyhash version which does not store internal state for handling partial buffers. -// This is needed so that we can maximize the speed for the short key case, which will -// use the non-iterative api which the public Wyhash exposes. -const WyhashStateless = struct { - seed: u64, - msg_len: usize, - - pub fn init(seed: u64) WyhashStateless { - return WyhashStateless{ - .seed = seed, - .msg_len = 0, - }; - } - - fn round(self: *WyhashStateless, b: []const u8) void { - std.debug.assert(b.len == 32); - - self.seed = mix0( - read_bytes(8, b[0..]), - read_bytes(8, b[8..]), - self.seed, - ) ^ mix1( - read_bytes(8, b[16..]), - read_bytes(8, b[24..]), - self.seed, - ); - } - - pub fn update(self: *WyhashStateless, b: []const u8) void { - std.debug.assert(b.len % 32 == 0); - - var off: usize = 0; - while (off < b.len) : (off += 32) { - @call(.always_inline, round, .{ self, b[off..][0..32] }); - } - - self.msg_len += b.len; - } - - pub fn final(self: *WyhashStateless, b: []const u8) u64 { - std.debug.assert(b.len < 32); - - const seed = self.seed; - const rem_len = @intCast(u5, b.len); - const rem_key = b[0..rem_len]; - - self.seed = switch (rem_len) { - 0 => seed, - 1 => mix0(read_bytes(1, rem_key), primes[4], seed), - 2 => mix0(read_bytes(2, rem_key), primes[4], seed), - 3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed), - 4 => mix0(read_bytes(4, rem_key), primes[4], seed), - 5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed), - 6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed), - 7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed), - 8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed), - 9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed), - 10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed), - 11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed), - 12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed), - 13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed), - 14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed), - 15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed), - 16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed), - 17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed), - 18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed), - 19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed), - 20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed), - 21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed), - 22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed), - 23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed), - 24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed), - 25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed), - 26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed), - 27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed), - 28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed), - 29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed), - 30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed), - 31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed), - }; - - self.msg_len += b.len; - return mum(self.seed ^ self.msg_len, primes[4]); - } - - pub fn hash(seed: u64, input: []const u8) u64 { - const aligned_len = input.len - (input.len % 32); - - var c = WyhashStateless.init(seed); - @call(.always_inline, update, .{ &c, input[0..aligned_len] }); - return @call(.always_inline, final, .{ &c, input[aligned_len..] }); - } -}; - -/// Fast non-cryptographic 64bit hash function. -/// See https://github.com/wangyi-fudan/wyhash pub const Wyhash = struct { - state: WyhashStateless, + const secret = [_]u64{ + 0xa0761d6478bd642f, + 0xe7037ed1a0b428db, + 0x8ebc6af09c88c6e3, + 0x589965cc75374cc3, + }; - buf: [32]u8, + a: u64, + b: u64, + state: [3]u64, + total_len: usize, + + buf: [48]u8, buf_len: usize, pub fn init(seed: u64) Wyhash { - return Wyhash{ - .state = WyhashStateless.init(seed), + var self = Wyhash{ + .a = undefined, + .b = undefined, + .state = undefined, + .total_len = 0, .buf = undefined, .buf_len = 0, }; + + self.state[0] = seed ^ mix(seed ^ secret[0], secret[1]); + self.state[1] = self.state[0]; + self.state[2] = self.state[0]; + return self; } - pub fn update(self: *Wyhash, b: []const u8) void { - var off: usize = 0; + // This is subtly different from other hash function update calls. Wyhash requires the last + // full 48-byte block to be run through final1 if is exactly aligned to 48-bytes. + pub fn update(self: *Wyhash, input: []const u8) void { + self.total_len += input.len; - if (self.buf_len != 0 and self.buf_len + b.len >= 32) { - off += 32 - self.buf_len; - @memcpy(self.buf[self.buf_len..][0..off], b[0..off]); - self.state.update(self.buf[0..]); + if (input.len <= 48 - self.buf_len) { + @memcpy(self.buf[self.buf_len..][0..input.len], input); + self.buf_len += input.len; + return; + } + + var i: usize = 0; + + if (self.buf_len > 0) { + i = 48 - self.buf_len; + @memcpy(self.buf[self.buf_len..][0..i], input[0..i]); + self.round(&self.buf); self.buf_len = 0; } - const remain_len = b.len - off; - const aligned_len = remain_len - (remain_len % 32); - self.state.update(b[off .. off + aligned_len]); + while (i + 48 < input.len) : (i += 48) { + self.round(input[i..][0..48]); + } - const src = b[off + aligned_len ..]; - @memcpy(self.buf[self.buf_len..][0..src.len], src); - self.buf_len += @intCast(u8, b[off + aligned_len ..].len); + const remaining_bytes = input[i..]; + @memcpy(self.buf[0..remaining_bytes.len], remaining_bytes); + self.buf_len = remaining_bytes.len; } pub fn final(self: *Wyhash) u64 { - const rem_key = self.buf[0..self.buf_len]; + var input = self.buf[0..self.buf_len]; + var newSelf = self.shallowCopy(); // ensure idempotency - return self.state.final(rem_key); + if (self.total_len <= 16) { + newSelf.smallKey(input); + } else { + if (self.buf_len < 16) { + var scratch: [16]u8 = undefined; + const rem = 16 - self.buf_len; + @memcpy(scratch[0..rem], self.buf[self.buf.len - rem ..][0..rem]); + @memcpy(scratch[rem..][0..self.buf_len], self.buf[0..self.buf_len]); + + // Same as input with lookbehind to pad to 16-bytes + input = scratch[rem..]; + } + + newSelf.final0(); + newSelf.final1(input); + } + + return newSelf.final2(); + } + + // Copies the core wyhash state but not any internal buffers. + inline fn shallowCopy(self: *Wyhash) Wyhash { + return .{ + .a = self.a, + .b = self.b, + .state = self.state, + .total_len = self.total_len, + .buf = undefined, + .buf_len = undefined, + }; + } + + inline fn smallKey(self: *Wyhash, input: []const u8) void { + std.debug.assert(input.len <= 16); + + if (input.len >= 4) { + const end = input.len - 4; + const quarter = (input.len >> 3) << 2; + self.a = (read(4, input[0..]) << 32) | read(4, input[quarter..]); + self.b = (read(4, input[end..]) << 32) | read(4, input[end - quarter ..]); + } else if (input.len > 0) { + self.a = (@as(u64, input[0]) << 16) | (@as(u64, input[input.len >> 1]) << 8) | input[input.len - 1]; + self.b = 0; + } else { + self.a = 0; + self.b = 0; + } + } + + inline fn round(self: *Wyhash, input: *const [48]u8) void { + inline for (0..3) |i| { + const a = read(8, input[8 * (2 * i) ..]); + const b = read(8, input[8 * (2 * i + 1) ..]); + self.state[i] = mix(a ^ secret[i + 1], b ^ self.state[i]); + } + } + + inline fn read(comptime bytes: usize, data: []const u8) u64 { + std.debug.assert(bytes <= 8); + const T = std.meta.Int(.unsigned, 8 * bytes); + return @as(u64, std.mem.readIntLittle(T, data[0..bytes])); + } + + inline fn mum(a: *u64, b: *u64) void { + const x = @as(u128, a.*) *% b.*; + a.* = @truncate(u64, x); + b.* = @truncate(u64, x >> 64); + } + + inline fn mix(a_: u64, b_: u64) u64 { + var a = a_; + var b = b_; + mum(&a, &b); + return a ^ b; + } + + inline fn final0(self: *Wyhash) void { + self.state[0] ^= self.state[1] ^ self.state[2]; + } + + // Input must reside in a 16-byte buffer. The input slice passed be offset into it in which + // case this function will index in front of the slice. + inline fn final1(self: *Wyhash, input: []const u8) void { + std.debug.assert(input.len <= 48); + + var i: usize = 0; + while (i + 16 < input.len) : (i += 16) { + self.state[0] = mix(read(8, input[i..]) ^ secret[1], read(8, input[i + 8 ..]) ^ self.state[0]); + } + + // Possible lookbehind past pointer start. + self.a = read(8, (input.ptr + input.len - 16)[0..8]); + self.b = read(8, (input.ptr + input.len - 8)[0..8]); + } + + inline fn final2(self: *Wyhash) u64 { + self.a ^= secret[1]; + self.b ^= self.state[0]; + mum(&self.a, &self.b); + return mix(self.a ^ secret[0] ^ self.total_len, self.b ^ secret[1]); } pub fn hash(seed: u64, input: []const u8) u64 { - return WyhashStateless.hash(seed, input); + var self = Wyhash.init(seed); + + if (input.len <= 16) { + self.smallKey(input); + } else { + var i: usize = 0; + if (input.len >= 48) { + while (i + 48 < input.len) : (i += 48) { + self.round(input[i..][0..48]); + } + self.final0(); + } + self.final1(input[i..]); + } + + self.total_len = input.len; + return self.final2(); } }; const expectEqual = std.testing.expectEqual; -test "test vectors" { - const hash = Wyhash.hash; +const TestVector = struct { + expected: u64, + seed: u64, + input: []const u8, +}; - try expectEqual(hash(0, ""), 0x0); - try expectEqual(hash(1, "a"), 0xbed235177f41d328); - try expectEqual(hash(2, "abc"), 0xbe348debe59b27c3); - try expectEqual(hash(3, "message digest"), 0x37320f657213a290); - try expectEqual(hash(4, "abcdefghijklmnopqrstuvwxyz"), 0xd0b270e1d8a7019c); - try expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f); - try expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e); +// Run https://github.com/wangyi-fudan/wyhash/blob/77e50f267fbc7b8e2d09f2d455219adb70ad4749/test_vector.cpp directly. +const vectors = [_]TestVector{ + .{ .seed = 0, .expected = 0x409638ee2bde459, .input = "" }, + .{ .seed = 1, .expected = 0xa8412d091b5fe0a9, .input = "a" }, + .{ .seed = 2, .expected = 0x32dd92e4b2915153, .input = "abc" }, + .{ .seed = 3, .expected = 0x8619124089a3a16b, .input = "message digest" }, + .{ .seed = 4, .expected = 0x7a43afb61d7f5f40, .input = "abcdefghijklmnopqrstuvwxyz" }, + .{ .seed = 5, .expected = 0xff42329b90e50d58, .input = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" }, + .{ .seed = 6, .expected = 0xc39cab13b115aad3, .input = "12345678901234567890123456789012345678901234567890123456789012345678901234567890" }, +}; + +test "test vectors" { + for (vectors) |e| { + try expectEqual(e.expected, Wyhash.hash(e.seed, e.input)); + } } test "test vectors streaming" { - var wh = Wyhash.init(5); - for ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") |e| { - wh.update(mem.asBytes(&e)); - } - try expectEqual(wh.final(), 0x602a1894d3bbfe7f); + const step = 5; - const pattern = "1234567890"; - const count = 8; - const result = 0x829e9c148b75970e; - try expectEqual(Wyhash.hash(6, pattern ** 8), result); - - wh = Wyhash.init(6); - var i: u32 = 0; - while (i < count) : (i += 1) { - wh.update(pattern); + for (vectors) |e| { + var wh = Wyhash.init(e.seed); + var i: usize = 0; + while (i < e.input.len) : (i += step) { + const len = if (i + step > e.input.len) e.input.len - i else step; + wh.update(e.input[i..][0..len]); + } + try expectEqual(e.expected, wh.final()); + } +} + +test "test ensure idempotent final call" { + const e: TestVector = .{ .seed = 6, .expected = 0xc39cab13b115aad3, .input = "12345678901234567890123456789012345678901234567890123456789012345678901234567890" }; + var wh = Wyhash.init(e.seed); + wh.update(e.input); + + for (0..10) |_| { + try expectEqual(e.expected, wh.final()); } - try expectEqual(wh.final(), result); } test "iterative non-divisible update" {