std/hash: add smhasher verification tests

Not all hashes are added just yet as these need to be generated manually
from reference implementations as they are not included by default in
smhasher.
This commit is contained in:
Marc Tiehuis 2023-09-01 19:09:05 +12:00
parent bb2eb44430
commit 26d61812a8
6 changed files with 59 additions and 72 deletions

View file

@ -342,64 +342,21 @@ pub const CityHash64 = struct {
}
};
fn SMHasherTest(comptime hash_fn: anytype) u32 {
const HashResult = @typeInfo(@TypeOf(hash_fn)).Fn.return_type.?;
var key: [256]u8 = undefined;
var hashes_bytes: [256 * @sizeOf(HashResult)]u8 = undefined;
@memset(&key, 0);
@memset(&hashes_bytes, 0);
var i: u32 = 0;
while (i < 256) : (i += 1) {
key[i] = @as(u8, @intCast(i));
var h: HashResult = hash_fn(key[0..i], 256 - i);
// comptime can't really do reinterpret casting yet,
// so we need to write the bytes manually.
for (hashes_bytes[i * @sizeOf(HashResult) ..][0..@sizeOf(HashResult)]) |*byte| {
byte.* = @as(u8, @truncate(h));
h = h >> 8;
}
}
return @as(u32, @truncate(hash_fn(&hashes_bytes, 0)));
}
fn CityHash32hashIgnoreSeed(str: []const u8, seed: u32) u32 {
_ = seed;
return CityHash32.hash(str);
}
const verify = @import("verify.zig");
test "cityhash32" {
const Test = struct {
fn doTest() !void {
// Note: SMHasher doesn't provide a 32bit version of the algorithm.
// Note: The implementation was verified against the Google Abseil version.
try std.testing.expectEqual(SMHasherTest(CityHash32hashIgnoreSeed), 0x68254F81);
try std.testing.expectEqual(SMHasherTest(CityHash32hashIgnoreSeed), 0x68254F81);
}
};
try Test.doTest();
// TODO This is uncommented to prevent OOM on the CI server. Re-enable this test
// case once we ship stage2.
//@setEvalBranchQuota(50000);
//comptime Test.doTest();
// Note: SMHasher doesn't provide a 32bit version of the algorithm.
// Note: The implementation was verified against the Google Abseil version.
try std.testing.expectEqual(verify.smhasher(CityHash32hashIgnoreSeed), 0x68254F81);
}
test "cityhash64" {
const Test = struct {
fn doTest() !void {
// Note: This is not compliant with the SMHasher implementation of CityHash64!
// Note: The implementation was verified against the Google Abseil version.
try std.testing.expectEqual(SMHasherTest(CityHash64.hashWithSeed), 0x5FABC5C5);
}
};
try Test.doTest();
// TODO This is uncommented to prevent OOM on the CI server. Re-enable this test
// case once we ship stage2.
//@setEvalBranchQuota(50000);
//comptime Test.doTest();
// Note: This is not compliant with the SMHasher implementation of CityHash64!
// Note: The implementation was verified against the Google Abseil version.
try std.testing.expectEqual(verify.smhasher(CityHash64.hashWithSeed), 0x5FABC5C5);
}

View file

@ -4,7 +4,7 @@
//
// https://tools.ietf.org/html/draft-eastlake-fnv-14
const std = @import("../std.zig");
const std = @import("std");
const testing = std.testing;
pub const Fnv1a_32 = Fnv1a(u32, 0x01000193, 0x811c9dc5);

View file

@ -279,26 +279,10 @@ pub const Murmur3_32 = struct {
}
};
fn SMHasherTest(comptime hash_fn: anytype, comptime hashbits: u32) u32 {
const hashbytes = hashbits / 8;
var key: [256]u8 = [1]u8{0} ** 256;
var hashes: [hashbytes * 256]u8 = [1]u8{0} ** (hashbytes * 256);
var i: u32 = 0;
while (i < 256) : (i += 1) {
key[i] = @as(u8, @truncate(i));
var h = hash_fn(key[0..i], 256 - i);
if (native_endian == .Big)
h = @byteSwap(h);
@memcpy(hashes[i * hashbytes ..][0..hashbytes], @as([*]u8, @ptrCast(&h)));
}
return @as(u32, @truncate(hash_fn(&hashes, 0)));
}
const verify = @import("verify.zig");
test "murmur2_32" {
try testing.expectEqual(SMHasherTest(Murmur2_32.hashWithSeed, 32), 0x27864C1E);
try testing.expectEqual(verify.smhasher(Murmur2_32.hashWithSeed), 0x27864C1E);
var v0: u32 = 0x12345678;
var v1: u64 = 0x1234567812345678;
var v0le: u32 = v0;
@ -312,7 +296,7 @@ test "murmur2_32" {
}
test "murmur2_64" {
try std.testing.expectEqual(SMHasherTest(Murmur2_64.hashWithSeed, 64), 0x1F0D3804);
try std.testing.expectEqual(verify.smhasher(Murmur2_64.hashWithSeed), 0x1F0D3804);
var v0: u32 = 0x12345678;
var v1: u64 = 0x1234567812345678;
var v0le: u32 = v0;
@ -326,7 +310,7 @@ test "murmur2_64" {
}
test "murmur3_32" {
try std.testing.expectEqual(SMHasherTest(Murmur3_32.hashWithSeed, 32), 0xB0F57EE3);
try std.testing.expectEqual(verify.smhasher(Murmur3_32.hashWithSeed), 0xB0F57EE3);
var v0: u32 = 0x12345678;
var v1: u64 = 0x1234567812345678;
var v0le: u32 = v0;

35
lib/std/hash/verify.zig Normal file
View file

@ -0,0 +1,35 @@
const std = @import("std");
fn hashMaybeSeed(comptime hash_fn: anytype, seed: anytype, buf: []const u8) @typeInfo(@TypeOf(hash_fn)).Fn.return_type.? {
const HashFn = @typeInfo(@TypeOf(hash_fn)).Fn;
if (HashFn.params.len > 1) {
if (@typeInfo(HashFn.params[0].type.?) == .Int) {
return hash_fn(@intCast(seed), buf);
} else {
return hash_fn(buf, @intCast(seed));
}
} else {
return hash_fn(buf);
}
}
// Returns a verification code, the same as user by SMHasher.
//
// Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255, using 256-N as seed.
// First four-bytes of the hash, interpreted as little-endian is the verification code.
pub fn smhasher(comptime hash_fn: anytype) u32 {
const HashFnTy = @typeInfo(@TypeOf(hash_fn)).Fn;
const HashResult = HashFnTy.return_type.?;
const hash_size = @sizeOf(HashResult);
var buf: [256]u8 = undefined;
var buf_all: [256 * hash_size]u8 = undefined;
for (0..256) |i| {
buf[i] = @intCast(i);
const h = hashMaybeSeed(hash_fn, 256 - i, buf[0..i]);
std.mem.writeIntLittle(HashResult, buf_all[i * hash_size ..][0..hash_size], h);
}
return @truncate(hashMaybeSeed(hash_fn, 0, buf_all[0..]));
}

View file

@ -196,6 +196,7 @@ pub const Wyhash = struct {
}
};
const verify = @import("verify.zig");
const expectEqual = std.testing.expectEqual;
const TestVector = struct {
@ -229,6 +230,10 @@ test "test vectors at comptime" {
}
}
test "smhasher" {
try expectEqual(verify.smhasher(Wyhash.hash), 0xBD5E840C);
}
test "test vectors streaming" {
const step = 5;

View file

@ -438,6 +438,8 @@ fn validateType(comptime T: type) void {
}
}
const verify = @import("verify.zig");
fn testExpect(comptime H: type, seed: anytype, input: []const u8, expected: u64) !void {
try expectEqual(expected, H.hash(0, input));
@ -455,6 +457,8 @@ test "xxhash64" {
try testExpect(H, 0, "abcdefghijklmnopqrstuvwxyz", 0xcfe1f278fa89835c);
try testExpect(H, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 0xaaa46907d3047814);
try testExpect(H, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 0xe04a477f19ee145d);
try expectEqual(verify.smhasher(H.hash), 0x024B7CF4);
}
test "xxhash32" {
@ -467,4 +471,6 @@ test "xxhash32" {
try testExpect(H, 0, "abcdefghijklmnopqrstuvwxyz", 0x63a14d5f);
try testExpect(H, 0, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 0x9c285e64);
try testExpect(H, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 0x9c05f475);
try expectEqual(verify.smhasher(H.hash), 0xBA88B743);
}