mirror of
https://github.com/zigzap/zap.git
synced 2025-10-20 15:14:08 +00:00
893 lines
29 KiB
C
893 lines
29 KiB
C
/*
|
|
Copyright: Boaz Segev, 2018-2019
|
|
License: MIT
|
|
|
|
Feel free to copy, use and enjoy according to the license provided.
|
|
*/
|
|
#define FIO_INCLUDE_STR
|
|
#include <fio.h>
|
|
#include <fio_cli.h>
|
|
|
|
#ifndef TEST_XXHASH
|
|
#define TEST_XXHASH 1
|
|
#endif
|
|
|
|
/* *****************************************************************************
|
|
State machine and types
|
|
***************************************************************************** */
|
|
|
|
static uint8_t print_flag = 1;
|
|
|
|
static inline int fio_str_eq_print(fio_str_s *a, fio_str_s *b) {
|
|
/* always return 1, to avoid internal set collision mitigation. */
|
|
if (print_flag)
|
|
fprintf(stderr, "* Collision Detected: %s vs. %s\n", fio_str_data(a),
|
|
fio_str_data(b));
|
|
return 1;
|
|
}
|
|
|
|
// static inline void destroy_collision_object(fio_str_s *a) {
|
|
// fprintf(stderr, "* Collision Detected: %s\n", fio_str_data(a));
|
|
// fio_str_free2(a);
|
|
// }
|
|
|
|
#define FIO_SET_NAME collisions
|
|
#define FIO_SET_OBJ_TYPE fio_str_s *
|
|
#define FIO_SET_OBJ_COPY(dest, src) ((dest) = fio_str_new_copy2((src)))
|
|
#define FIO_SET_OBJ_COMPARE(a, b) fio_str_eq_print((a), (b))
|
|
#define FIO_SET_OBJ_DESTROY(a) fio_str_free2((a))
|
|
#include <fio.h>
|
|
|
|
typedef uintptr_t (*hashing_func_fn)(char *, size_t);
|
|
#define FIO_SET_NAME hash_name
|
|
#define FIO_SET_OBJ_TYPE hashing_func_fn
|
|
#include <fio.h>
|
|
|
|
#define FIO_ARY_NAME words
|
|
#define FIO_ARY_TYPE fio_str_s
|
|
#define FIO_ARY_COMPARE(a, b) fio_str_iseq(&(a), &(b))
|
|
#define FIO_ARY_COPY(dest, src) \
|
|
do { \
|
|
fio_str_clear(&(dest)), fio_str_concat(&(dest), &(src)); \
|
|
} while (0)
|
|
#define FIO_ARY_DESTROY(a) fio_str_free((&a))
|
|
#include <fio.h>
|
|
|
|
static hash_name_s hash_names = FIO_SET_INIT;
|
|
static words_s words = FIO_SET_INIT;
|
|
|
|
/* *****************************************************************************
|
|
Main
|
|
***************************************************************************** */
|
|
|
|
static void test_hash_function(hashing_func_fn h);
|
|
static void initialize_cli(int argc, char const *argv[]);
|
|
static void load_words(void);
|
|
static void initialize_hash_names(void);
|
|
static void print_hash_names(void);
|
|
static char *hash_name(hashing_func_fn fn);
|
|
static void cleanup(void);
|
|
|
|
int main(int argc, char const *argv[]) {
|
|
// FIO_LOG_LEVEL = FIO_LOG_LEVEL_DEBUG;
|
|
initialize_cli(argc, argv);
|
|
load_words();
|
|
initialize_hash_names();
|
|
if (fio_cli_get("-t")) {
|
|
fio_str_s tmp = FIO_STR_INIT_STATIC(fio_cli_get("-t"));
|
|
hashing_func_fn h = hash_name_find(&hash_names, fio_str_hash(&tmp), NULL);
|
|
if (h) {
|
|
test_hash_function(h);
|
|
} else {
|
|
FIO_LOG_ERROR("Test function %s unknown.", tmp.data);
|
|
fprintf(stderr, "Try any of the following:\n");
|
|
print_hash_names();
|
|
}
|
|
} else {
|
|
FIO_SET_FOR_LOOP(&hash_names, pos) { test_hash_function(pos->obj); }
|
|
}
|
|
cleanup();
|
|
return 0;
|
|
}
|
|
|
|
/* *****************************************************************************
|
|
CLI
|
|
***************************************************************************** */
|
|
|
|
static void initialize_cli(int argc, char const *argv[]) {
|
|
fio_cli_start(
|
|
argc, argv, 0, 0,
|
|
"This is a Hash algorythm collision test program. It accepts the "
|
|
"following arguments:",
|
|
FIO_CLI_STRING(
|
|
"-test -t test only the specified algorithm. Options include:"),
|
|
FIO_CLI_PRINT("\t\tsiphash13"), FIO_CLI_PRINT("\t\tsiphash24"),
|
|
FIO_CLI_PRINT("\t\tsha1"),
|
|
FIO_CLI_PRINT("\t\trisky (fio_str_hash_risky)"),
|
|
FIO_CLI_PRINT("\t\trisky2 (fio_str_hash_risky alternative)"),
|
|
// FIO_CLI_PRINT("\t\txor (xor all bytes and length)"),
|
|
FIO_CLI_STRING(
|
|
"-dictionary -d a text file containing words separated by an "
|
|
"EOL marker."),
|
|
FIO_CLI_BOOL("-v make output more verbouse (debug mode)"));
|
|
if (fio_cli_get_bool("-v"))
|
|
FIO_LOG_LEVEL = FIO_LOG_LEVEL_DEBUG;
|
|
FIO_LOG_DEBUG("initialized CLI.");
|
|
}
|
|
|
|
/* *****************************************************************************
|
|
Dictionary management
|
|
***************************************************************************** */
|
|
|
|
static void add_bad_words(void);
|
|
static void load_words(void) {
|
|
add_bad_words();
|
|
fio_str_s filename = FIO_STR_INIT;
|
|
fio_str_s data = FIO_STR_INIT;
|
|
|
|
if (fio_cli_get("-d")) {
|
|
fio_str_write(&filename, fio_cli_get("-d"), strlen(fio_cli_get("-d")));
|
|
} else {
|
|
fio_str_info_s tmp = fio_str_write(&filename, __FILE__, strlen(__FILE__));
|
|
while (tmp.len && tmp.data[tmp.len - 1] != '/') {
|
|
--tmp.len;
|
|
}
|
|
fio_str_resize(&filename, tmp.len);
|
|
fio_str_write(&filename, "words.txt", 9);
|
|
}
|
|
fio_str_readfile(&data, fio_str_data(&filename), 0, 0);
|
|
fio_str_info_s d = fio_str_info(&data);
|
|
if (d.len == 0) {
|
|
FIO_LOG_FATAL("Couldn't find / read dictionary file (or no words?)");
|
|
FIO_LOG_FATAL("\tmissing or empty: %s", fio_str_data(&filename));
|
|
cleanup();
|
|
fio_str_free(&filename);
|
|
exit(-1);
|
|
}
|
|
while (d.len) {
|
|
char *eol = memchr(d.data, '\n', d.len);
|
|
if (!eol) {
|
|
/* push what's left */
|
|
words_push(&words, FIO_STR_INIT_STATIC2(d.data, d.len));
|
|
break;
|
|
}
|
|
if (eol == d.data || (eol == d.data + 1 && eol[-1] == '\r')) {
|
|
/* empty line */
|
|
++d.data;
|
|
--d.len;
|
|
continue;
|
|
}
|
|
words_push(&words, FIO_STR_INIT_STATIC2(
|
|
d.data, (eol - (d.data + (eol[-1] == '\r')))));
|
|
d.len -= (eol + 1) - d.data;
|
|
d.data = eol + 1;
|
|
}
|
|
fio_free(&filename);
|
|
fio_free(&data);
|
|
FIO_LOG_INFO("Loaded %zu words.", words_count(&words));
|
|
}
|
|
|
|
/* *****************************************************************************
|
|
Cleanup
|
|
***************************************************************************** */
|
|
|
|
static void cleanup(void) {
|
|
print_flag = 0;
|
|
hash_name_free(&hash_names);
|
|
words_free(&words);
|
|
}
|
|
|
|
/* *****************************************************************************
|
|
Hash functions
|
|
***************************************************************************** */
|
|
|
|
static uintptr_t siphash13(char *data, size_t len) {
|
|
return fio_siphash13(data, len, 0, 0);
|
|
}
|
|
|
|
static uintptr_t siphash24(char *data, size_t len) {
|
|
return fio_siphash24(data, len, 0, 0);
|
|
}
|
|
static uintptr_t sha1(char *data, size_t len) {
|
|
fio_sha1_s s = fio_sha1_init();
|
|
fio_sha1_write(&s, data, len);
|
|
return ((uintptr_t *)fio_sha1_result(&s))[0];
|
|
}
|
|
static uintptr_t counter(char *data, size_t len) {
|
|
static uintptr_t counter = 0;
|
|
const size_t len_256 = len & (((size_t)-1) << 5);
|
|
|
|
for (size_t i = 0; i < len_256; i += 8) {
|
|
/* vectorized 32 bytes / 256 bit access */
|
|
uint64_t t0 = fio_str2u64(data);
|
|
uint64_t t1 = fio_str2u64(data + 8);
|
|
uint64_t t2 = fio_str2u64(data + 16);
|
|
uint64_t t3 = fio_str2u64(data + 24);
|
|
__asm__ volatile("" ::: "memory");
|
|
(void)t0;
|
|
(void)t1;
|
|
(void)t2;
|
|
(void)t3;
|
|
data += 32;
|
|
}
|
|
uint64_t tmp;
|
|
/* 64 bit words */
|
|
switch (len & 24) {
|
|
case 24:
|
|
tmp = fio_str2u64(data + 16);
|
|
__asm__ volatile("" ::: "memory");
|
|
case 16: /* overflow */
|
|
tmp = fio_str2u64(data + 8);
|
|
__asm__ volatile("" ::: "memory");
|
|
case 8: /* overflow */
|
|
tmp = fio_str2u64(data);
|
|
__asm__ volatile("" ::: "memory");
|
|
data += len & 24;
|
|
}
|
|
|
|
tmp = 0;
|
|
/* leftover bytes */
|
|
switch ((len & 7)) {
|
|
case 7: /* overflow */
|
|
tmp |= ((uint64_t)data[6]) << 8;
|
|
case 6: /* overflow */
|
|
tmp |= ((uint64_t)data[5]) << 16;
|
|
case 5: /* overflow */
|
|
tmp |= ((uint64_t)data[4]) << 24;
|
|
case 4: /* overflow */
|
|
tmp |= ((uint64_t)data[3]) << 32;
|
|
case 3: /* overflow */
|
|
tmp |= ((uint64_t)data[2]) << 40;
|
|
case 2: /* overflow */
|
|
tmp |= ((uint64_t)data[1]) << 48;
|
|
case 1: /* overflow */
|
|
tmp |= ((uint64_t)data[0]) << 56;
|
|
}
|
|
__asm__ volatile("" ::: "memory");
|
|
return ++counter;
|
|
}
|
|
|
|
#if TEST_XXHASH
|
|
#include "xxhash.h"
|
|
static uintptr_t xxhash_test(char *data, size_t len) {
|
|
return XXH64(data, len, 0);
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
Working version.
|
|
*/
|
|
inline FIO_FUNC uintptr_t fio_risky_hash2(const void *data, size_t len,
|
|
uint64_t salt);
|
|
|
|
inline FIO_FUNC uintptr_t risky2(char *data, size_t len) {
|
|
return fio_risky_hash2(data, len, 0);
|
|
}
|
|
|
|
inline FIO_FUNC uintptr_t risky(char *data, size_t len) {
|
|
return fio_risky_hash(data, len, 0);
|
|
}
|
|
|
|
/* *****************************************************************************
|
|
Hash setup and testing...
|
|
***************************************************************************** */
|
|
|
|
struct hash_fn_names_s {
|
|
char *name;
|
|
hashing_func_fn fn;
|
|
} hash_fn_list[] = {
|
|
{"counter (no hash, RAM access test)", counter},
|
|
{"siphash13", siphash13},
|
|
{"siphash24", siphash24},
|
|
{"sha1", sha1},
|
|
#if TEST_XXHASH
|
|
{"xxhash", xxhash_test},
|
|
#endif
|
|
{"risky", risky},
|
|
{"risky2", risky2},
|
|
{NULL, NULL},
|
|
};
|
|
|
|
static void initialize_hash_names(void) {
|
|
for (size_t i = 0; hash_fn_list[i].name; ++i) {
|
|
fio_str_s tmp = FIO_STR_INIT_STATIC(hash_fn_list[i].name);
|
|
hash_name_insert(&hash_names, fio_str_hash(&tmp), hash_fn_list[i].fn);
|
|
FIO_LOG_DEBUG("Registered %s hashing function.\n\t\t(%zu registered)",
|
|
hash_fn_list[i].name, hash_name_count(&hash_names));
|
|
}
|
|
}
|
|
|
|
static char *hash_name(hashing_func_fn fn) {
|
|
for (size_t i = 0; hash_fn_list[i].name; ++i) {
|
|
if (hash_fn_list[i].fn == fn)
|
|
return hash_fn_list[i].name;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void print_hash_names(void) {
|
|
for (size_t i = 0; hash_fn_list[i].name; ++i) {
|
|
fprintf(stderr, "* %s\n", hash_fn_list[i].name);
|
|
}
|
|
}
|
|
|
|
static void test_hash_function_speed(hashing_func_fn h, char *name) {
|
|
FIO_LOG_DEBUG("Speed testing for %s", name);
|
|
/* test based on code from BearSSL with credit to Thomas Pornin */
|
|
uint8_t buffer[8192];
|
|
memset(buffer, 'T', sizeof(buffer));
|
|
/* warmup */
|
|
uint64_t hash = 0;
|
|
for (size_t i = 0; i < 4; i++) {
|
|
hash += h((char *)buffer, sizeof(buffer));
|
|
memcpy(buffer, &hash, sizeof(hash));
|
|
}
|
|
/* loop until test runs for more than 2 seconds */
|
|
for (uint64_t cycles = (8192 << 4);;) {
|
|
clock_t start, end;
|
|
start = clock();
|
|
for (size_t i = cycles; i > 0; i--) {
|
|
hash += h((char *)buffer, sizeof(buffer));
|
|
__asm__ volatile("" ::: "memory");
|
|
}
|
|
end = clock();
|
|
memcpy(buffer, &hash, sizeof(hash));
|
|
if ((end - start) >= (2 * CLOCKS_PER_SEC) ||
|
|
cycles >= ((uint64_t)1 << 62)) {
|
|
fprintf(stderr, "%-20s %8.2f MB/s\n", name,
|
|
(double)(sizeof(buffer) * cycles) /
|
|
(((end - start) * (1000000.0 / CLOCKS_PER_SEC))));
|
|
break;
|
|
}
|
|
cycles <<= 2;
|
|
}
|
|
}
|
|
|
|
static void test_hash_function(hashing_func_fn h) {
|
|
size_t best_count = 0, best_capa = 1024;
|
|
#define test_for_best() \
|
|
if (collisions_capa(&c) > 1024 && \
|
|
(collisions_count(&c) * (double)1 / collisions_capa(&c)) > \
|
|
(best_count * (double)1 / best_capa)) { \
|
|
best_count = collisions_count(&c); \
|
|
best_capa = collisions_capa(&c); \
|
|
}
|
|
char *name = NULL;
|
|
for (size_t i = 0; hash_fn_list[i].name; ++i) {
|
|
if (hash_fn_list[i].fn == h) {
|
|
name = hash_fn_list[i].name;
|
|
break;
|
|
}
|
|
}
|
|
if (!name)
|
|
name = "unknown";
|
|
fprintf(stderr, "======= %s\n", name);
|
|
/* Speed test */
|
|
test_hash_function_speed(h, name);
|
|
/* Collision test */
|
|
collisions_s c = FIO_SET_INIT;
|
|
size_t count = 0;
|
|
FIO_ARY_FOR(&words, w) {
|
|
fio_str_info_s i = fio_str_info(w);
|
|
// fprintf(stderr, "%s\n", i.data);
|
|
printf("\33[2K [%zu] %s\r", ++count, i.data);
|
|
collisions_overwrite(&c, h(i.data, i.len), w, NULL);
|
|
test_for_best();
|
|
}
|
|
printf("\33[2K\r\n");
|
|
fprintf(stderr, "* Total collisions detected for %s: %zu\n", name,
|
|
words_count(&words) - collisions_count(&c));
|
|
fprintf(stderr, "* Final set utilization ratio (over 1024) %zu/%zu\n",
|
|
collisions_count(&c), collisions_capa(&c));
|
|
fprintf(stderr, "* Best set utilization ratio %zu/%zu\n", best_count,
|
|
best_capa);
|
|
collisions_free(&c);
|
|
}
|
|
|
|
/* *****************************************************************************
|
|
Finsing a mod64 inverse
|
|
See: https://lemire.me/blog/2017/09/18/computing-the-inverse-of-odd-integers/
|
|
***************************************************************************** */
|
|
|
|
/* will return `inv` if `inv` is inverse of `n` */
|
|
static uint64_t inverse64_test(uint64_t n, uint64_t inv) {
|
|
uint64_t result = inv * (2 - (n * inv));
|
|
return result;
|
|
}
|
|
|
|
static uint64_t inverse64(uint64_t x) {
|
|
uint64_t y = (3 * x) ^ 2;
|
|
y = inverse64_test(x, y);
|
|
y = inverse64_test(x, y);
|
|
y = inverse64_test(x, y);
|
|
y = inverse64_test(x, y);
|
|
if (FIO_LOG_LEVEL >= FIO_LOG_LEVEL_DEBUG) {
|
|
char buff[64];
|
|
fio_str_s t = FIO_STR_INIT;
|
|
fio_str_write(&t, "\n\t\tinverse for:\t", 16);
|
|
fio_str_write(&t, buff, fio_ltoa(buff, x, 16));
|
|
fio_str_write(&t, "\n\t\tis:\t\t\t", 8);
|
|
fio_str_write(&t, buff, fio_ltoa(buff, y, 16));
|
|
fio_str_write(&t, "\n\t\tsanity inverse test: 1==", 27);
|
|
fio_str_write_i(&t, x * y);
|
|
FIO_LOG_DEBUG("%s", fio_str_data(&t));
|
|
}
|
|
|
|
return y;
|
|
}
|
|
|
|
/* *****************************************************************************
|
|
Hash Breaking Word Workshop
|
|
***************************************************************************** */
|
|
|
|
/**
|
|
* Attacking 8 byte words, which follow this code path:
|
|
* h64 = seed + PRIME64_5;
|
|
* h64 += len; // len == 8
|
|
* if (p + 4 <= bEnd) {
|
|
* h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
|
|
* h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
|
|
* p += 4;
|
|
* }
|
|
*
|
|
* while (p < bEnd) {
|
|
* h64 ^= (*p) * PRIME64_5;
|
|
* h64 = XXH_rotl64(h64, 11) * PRIME64_1;
|
|
* p++;
|
|
* }
|
|
*
|
|
* h64 ^= h64 >> 33;
|
|
* h64 *= PRIME64_2;
|
|
* h64 ^= h64 >> 29;
|
|
* h64 *= PRIME64_3;
|
|
* h64 ^= h64 >> 32;
|
|
*/
|
|
|
|
FIO_FUNC void attack_xxhash2(void) {
|
|
/* POC - forcing XXHash to return seed only data (here, seed = 0) */
|
|
const uint64_t PRIME64_1 = 11400714785074694791ULL;
|
|
const uint64_t PRIME64_2 = 14029467366897019727ULL;
|
|
// const uint64_t PRIME64_3 = 1609587929392839161ULL;
|
|
// const uint64_t PRIME64_4 = 9650029242287828579ULL;
|
|
// const uint64_t PRIME64_5 = 2870177450012600261ULL;
|
|
const uint64_t PRIME64_1_INV = inverse64(PRIME64_1);
|
|
const uint64_t PRIME64_2_INV = inverse64(PRIME64_2);
|
|
// const uint64_t PRIME64_3_INV = inverse64(PRIME64_3);
|
|
// const uint64_t PRIME64_4_INV = inverse64(PRIME64_4);
|
|
// const uint64_t PRIME64_5_INV = inverse64(PRIME64_5);
|
|
const uint64_t seed_manipulation[4] = {PRIME64_1 + PRIME64_2, PRIME64_2, 0,
|
|
-PRIME64_1};
|
|
uint64_t v[4] = {0, 0, 0, 0};
|
|
/* attack v *= PRIME64_1 */
|
|
v[0] = v[0] * PRIME64_1_INV;
|
|
v[1] = v[1] * PRIME64_1_INV;
|
|
v[2] = v[2] * PRIME64_1_INV;
|
|
v[3] = v[3] * PRIME64_1_INV;
|
|
/* attack v = XXH_rotl64(v, 31) */
|
|
v[0] = (v[0] >> 31) | (v[0] << (64 - 31));
|
|
v[1] = (v[1] >> 31) | (v[1] << (64 - 31));
|
|
v[2] = (v[2] >> 31) | (v[2] << (64 - 31));
|
|
v[3] = (v[3] >> 31) | (v[3] << (64 - 31));
|
|
/* attack seed manipulation */
|
|
v[0] = v[0] - seed_manipulation[0];
|
|
v[1] = v[1] - seed_manipulation[1];
|
|
v[2] = v[2] - seed_manipulation[2];
|
|
v[3] = v[3] - seed_manipulation[3];
|
|
/* attack v += XXH_get64bits(p) * PRIME64_2 */
|
|
v[0] = v[0] * PRIME64_2_INV;
|
|
v[1] = v[1] * PRIME64_2_INV;
|
|
v[2] = v[2] * PRIME64_2_INV;
|
|
v[3] = v[3] * PRIME64_2_INV;
|
|
uint64_t seed_data = XXH64(v, 32, 0);
|
|
if (seed_data == 0)
|
|
fprintf(stderr, "XXHash seed data extracted for seed == 0!\n");
|
|
else
|
|
fprintf(stderr, "Seed extraction failed %llu\n", seed_data);
|
|
}
|
|
|
|
FIO_FUNC void attack_xxhash(void) {
|
|
/* POC - forcing XXHash to return seed only data (here, seed = 0) */
|
|
const uint64_t PRIME64_1 = 11400714785074694791ULL;
|
|
const uint64_t PRIME64_2 = 14029467366897019727ULL;
|
|
const uint64_t PRIME64_3 = 1609587929392839161ULL;
|
|
const uint64_t PRIME64_4 = 9650029242287828579ULL;
|
|
const uint64_t PRIME64_2_INV = 0x0BA79078168D4BAFULL;
|
|
const uint64_t seed_manipulation[4] = {PRIME64_1 + PRIME64_2, PRIME64_2, 0,
|
|
-PRIME64_1};
|
|
uint64_t v[4] = {0, 0, 0, 0};
|
|
/* attack seed manipulation */
|
|
v[0] = v[0] - seed_manipulation[0];
|
|
v[1] = v[1] - seed_manipulation[1];
|
|
v[2] = v[2] - seed_manipulation[2];
|
|
v[3] = v[3] - seed_manipulation[3];
|
|
/* attack v += XXH_get64bits(p) * PRIME64_2 */
|
|
v[0] = v[0] * PRIME64_2_INV;
|
|
v[1] = v[1] * PRIME64_2_INV;
|
|
v[2] = v[2] * PRIME64_2_INV;
|
|
v[3] = v[3] * PRIME64_2_INV;
|
|
|
|
uint64_t seed = 2870177450012600261ULL;
|
|
uint64_t expected_seed;
|
|
|
|
/* I didn't work out how to extract the seeed from this part */
|
|
expected_seed = fio_lrot(seed, 1) + fio_lrot(seed, 7) + fio_lrot(seed, 12) +
|
|
fio_lrot(seed, 18);
|
|
uint64_t tmp = seed * PRIME64_2;
|
|
tmp = fio_lrot(tmp, 31);
|
|
tmp *= PRIME64_1;
|
|
expected_seed ^= tmp;
|
|
expected_seed = expected_seed * PRIME64_1 + PRIME64_4;
|
|
expected_seed ^= tmp;
|
|
expected_seed = expected_seed * PRIME64_1 + PRIME64_4;
|
|
expected_seed ^= tmp;
|
|
expected_seed = expected_seed * PRIME64_1 + PRIME64_4;
|
|
expected_seed ^= tmp;
|
|
expected_seed = expected_seed * PRIME64_1 + PRIME64_4;
|
|
expected_seed += 32;
|
|
expected_seed ^= expected_seed >> 33;
|
|
expected_seed *= PRIME64_2;
|
|
expected_seed ^= expected_seed >> 29;
|
|
expected_seed *= PRIME64_3;
|
|
expected_seed ^= expected_seed >> 32;
|
|
|
|
uint64_t seed_data = XXH64(v, 32, 0);
|
|
if (seed_data == expected_seed)
|
|
fprintf(stderr, "XXHash extraxted seed data matches expectations!\n");
|
|
else
|
|
fprintf(stderr, "Seed extraction failed %llu\n", seed_data);
|
|
// char b[128] = {0};
|
|
// fio_ltoa(b, v[0], 16);
|
|
// fio_ltoa(b + 32, v[1], 16);
|
|
// fio_ltoa(b + 64, v[2], 16);
|
|
// fio_ltoa(b + 96, v[3], 16);
|
|
// fprintf(stderr, "Message was:\n%s\n%s\n%s\n%s\n", b, b + 32, b + 64, b +
|
|
// 96);
|
|
// Output (message):
|
|
// 0xFB9FE7DB392000B6
|
|
// 0xFFFFFFFFFFFFFFFF
|
|
// 0x0000000000000000
|
|
// 0x04601824C6DFFF49
|
|
}
|
|
|
|
/**
|
|
* Attacking 64 byte messages where the last 32 bytes are the same and the first
|
|
* 32 bytes use rotating 8 byte words. This is attcking the following part in
|
|
* the code:
|
|
*
|
|
* U64 v1 = seed + PRIME64_1 + PRIME64_2;
|
|
* U64 v2 = seed + PRIME64_2;
|
|
* U64 v3 = seed + 0;
|
|
* U64 v4 = seed - PRIME64_1;
|
|
*
|
|
* do {
|
|
* v1 += XXH_get64bits(p) * PRIME64_2;
|
|
* p += 8;
|
|
* v1 = XXH_rotl64(v1, 31);
|
|
* v1 *= PRIME64_1;
|
|
* //... v2, v3, v4 same;
|
|
* } while (p <= limit);
|
|
*
|
|
* h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) +
|
|
* XXH_rotl64(v4, 18);
|
|
*/
|
|
|
|
FIO_FUNC void add_bad4xxhash(void) {
|
|
attack_xxhash();
|
|
const uint64_t PRIME64_1 = 11400714785074694791ULL;
|
|
const uint64_t PRIME64_2 = 14029467366897019727ULL;
|
|
const uint64_t PRIME64_1_INV = inverse64(PRIME64_1);
|
|
const uint64_t PRIME64_2_INV = inverse64(PRIME64_2);
|
|
const uint64_t seed_manipulation[4] = {PRIME64_1 + PRIME64_2, PRIME64_2, 0,
|
|
-PRIME64_1};
|
|
|
|
uint64_t rotating[4] = {0x1, 0x20, 0x300, 0x4000};
|
|
uint8_t results[32][16] = {{0}};
|
|
uint8_t results_count = 0;
|
|
for (int i = 0; i < 4; ++i) {
|
|
for (int j = 0; j < 4; ++j) {
|
|
if (i == j) /* all 4 rotating words must be present */
|
|
continue;
|
|
/* mix rotating word order */
|
|
uint64_t v[4] = {rotating[i], rotating[j], rotating[3 - i],
|
|
rotating[3 - j]};
|
|
/* prepare vector against h64 = XXH_rotl64... */
|
|
v[0] = (v[0] >> 1) | (v[0] << (64 - 1));
|
|
v[1] = (v[1] >> 7) | (v[1] << (64 - 7));
|
|
v[2] = (v[2] >> 12) | (v[2] << (64 - 12));
|
|
v[3] = (v[3] >> 18) | (v[3] << (64 - 18));
|
|
/* attack v *= PRIME64_1 */
|
|
v[0] = v[0] * PRIME64_1_INV;
|
|
v[1] = v[1] * PRIME64_1_INV;
|
|
v[2] = v[2] * PRIME64_1_INV;
|
|
v[3] = v[3] * PRIME64_1_INV;
|
|
/* attack v = XXH_rotl64(v, 31) */
|
|
v[0] = (v[0] >> 31) | (v[0] << (64 - 31));
|
|
v[1] = (v[1] >> 31) | (v[1] << (64 - 31));
|
|
v[2] = (v[2] >> 31) | (v[2] << (64 - 31));
|
|
v[3] = (v[3] >> 31) | (v[3] << (64 - 31));
|
|
/* attack seed manipulation */
|
|
v[0] = v[0] - seed_manipulation[0];
|
|
v[1] = v[1] - seed_manipulation[1];
|
|
v[2] = v[2] - seed_manipulation[2];
|
|
v[3] = v[3] - seed_manipulation[3];
|
|
/* attack v += XXH_get64bits(p) * PRIME64_2 */
|
|
v[0] = v[0] * PRIME64_2_INV;
|
|
v[1] = v[1] * PRIME64_2_INV;
|
|
v[2] = v[2] * PRIME64_2_INV;
|
|
v[3] = v[3] * PRIME64_2_INV;
|
|
/* copy to results, if unique */
|
|
uint8_t unique = 1;
|
|
for (int t = 0; t < results_count; ++t) {
|
|
if (!memcmp(&results[0][t], v, 32))
|
|
unique = 0;
|
|
}
|
|
if (unique) {
|
|
memcpy(&results[0][results_count], v, 32);
|
|
++results_count;
|
|
}
|
|
}
|
|
}
|
|
if (results_count) {
|
|
fprintf(stderr, "Created %u vectors, now testing...\n", results_count);
|
|
uint64_t origin = XXH64(&results[0][0], 32, 0);
|
|
for (int i = 0; i < results_count; ++i) {
|
|
words_push(&words, FIO_STR_INIT_STATIC2(&results[0][i], 32));
|
|
if (i && origin == XXH64(&results[0][i], 32, 0))
|
|
fprintf(stderr, "Possible collision [%d]\n", i);
|
|
}
|
|
fprintf(stderr, "Done testing.\n");
|
|
}
|
|
}
|
|
|
|
FIO_FUNC void add_bad4risky(void) {}
|
|
|
|
FIO_FUNC void find_bit_collisions(hashing_func_fn fn, size_t collision_count,
|
|
uint8_t bit_count) {
|
|
words_s c = FIO_ARY_INIT;
|
|
const uint64_t mask = (1ULL << bit_count) - 1;
|
|
time_t start = clock();
|
|
while (words_count(&c) < collision_count) {
|
|
uint64_t rnd = fio_rand64();
|
|
if ((fn((char *)&rnd, 8) & mask) == mask) {
|
|
words_push(&c, FIO_STR_INIT_STATIC2((char *)&rnd, 8));
|
|
}
|
|
}
|
|
time_t end = clock();
|
|
char *name = hash_name(fn);
|
|
if (!name)
|
|
name = "unknown";
|
|
fprintf(stderr,
|
|
"* It took %zu cycles to find %zu (%u bit) collisions for %s (brute "
|
|
"fource):\n",
|
|
end - start, words_count(&c), bit_count, name);
|
|
FIO_ARY_FOR(&c, pos) {
|
|
uint64_t tmp = fio_str2u64(fio_str_data(pos));
|
|
fprintf(stderr, "* %p => %p\n", (void *)tmp,
|
|
(void *)fn(fio_str_data(pos), 8));
|
|
}
|
|
words_free(&c);
|
|
}
|
|
|
|
static void add_bad_words(void) {
|
|
if (!fio_cli_get("-t")) {
|
|
find_bit_collisions(risky, 16, 16);
|
|
find_bit_collisions(xxhash_test, 16, 16);
|
|
find_bit_collisions(siphash13, 16, 16);
|
|
find_bit_collisions(sha1, 16, 16);
|
|
}
|
|
add_bad4xxhash();
|
|
add_bad4risky();
|
|
}
|
|
|
|
/* *****************************************************************************
|
|
Hash experimentation workspace
|
|
***************************************************************************** */
|
|
/* Risky Hash consumption round, accepts a state word s and an input word w */
|
|
#define fio_risky_consume(s, w) \
|
|
(s) ^= (w); \
|
|
(s) = fio_lrot64((s), 33) + (w); \
|
|
(s) *= primes[0];
|
|
|
|
/* Computes a facil.io Risky Hash. */
|
|
static inline uintptr_t fio_risky_hash2(const void *data_, size_t len,
|
|
uint64_t seed) {
|
|
/* The primes used by Risky Hash */
|
|
const uint64_t primes[] = {
|
|
0xFBBA3FA15B22113B, // 1111101110111010001111111010000101011011001000100001000100111011
|
|
0xAB137439982B86C9, // 1010101100010011011101000011100110011000001010111000011011001001
|
|
};
|
|
/* The consumption vectors initialized state */
|
|
uint64_t v[4] = {
|
|
seed ^ primes[1],
|
|
~seed + primes[1],
|
|
fio_lrot64(seed, 17) ^ (primes[1] + primes[0]),
|
|
fio_lrot64(seed, 33) + (~primes[1]),
|
|
};
|
|
|
|
/* reading position */
|
|
const uint8_t *data = (uint8_t *)data_;
|
|
|
|
/* consume 256bit blocks */
|
|
for (size_t i = len >> 5; i; --i) {
|
|
fio_risky_consume(v[0], fio_str2u64(data));
|
|
fio_risky_consume(v[1], fio_str2u64(data + 8));
|
|
fio_risky_consume(v[2], fio_str2u64(data + 16));
|
|
fio_risky_consume(v[3], fio_str2u64(data + 24));
|
|
data += 32;
|
|
}
|
|
/* Consume any remaining 64 bit words. */
|
|
switch (len & 24) {
|
|
case 24:
|
|
fio_risky_consume(v[2], fio_str2u64(data + 16));
|
|
case 16: /* overflow */
|
|
fio_risky_consume(v[1], fio_str2u64(data + 8));
|
|
case 8: /* overflow */
|
|
fio_risky_consume(v[0], fio_str2u64(data));
|
|
data += len & 24;
|
|
}
|
|
|
|
uint64_t tmp = 0;
|
|
/* consume leftover bytes, if any */
|
|
switch ((len & 7)) {
|
|
case 7: /* overflow */
|
|
tmp |= ((uint64_t)data[6]) << 8;
|
|
case 6: /* overflow */
|
|
tmp |= ((uint64_t)data[5]) << 16;
|
|
case 5: /* overflow */
|
|
tmp |= ((uint64_t)data[4]) << 24;
|
|
case 4: /* overflow */
|
|
tmp |= ((uint64_t)data[3]) << 32;
|
|
case 3: /* overflow */
|
|
tmp |= ((uint64_t)data[2]) << 40;
|
|
case 2: /* overflow */
|
|
tmp |= ((uint64_t)data[1]) << 48;
|
|
case 1: /* overflow */
|
|
tmp |= ((uint64_t)data[0]) << 56;
|
|
/* ((len & 24) >> 3) is a 0-3 value representing the next state vector */
|
|
/* `switch` allows v[i] to be a register without a memory address */
|
|
/* using v[(len & 24) >> 3] forces implementation to use memory */
|
|
switch ((len & 24) >> 3) {
|
|
case 3:
|
|
fio_risky_consume(v[3], tmp);
|
|
break;
|
|
case 2:
|
|
fio_risky_consume(v[2], tmp);
|
|
break;
|
|
case 1:
|
|
fio_risky_consume(v[1], tmp);
|
|
break;
|
|
case 0:
|
|
fio_risky_consume(v[0], tmp);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* merge and mix */
|
|
uint64_t result = fio_lrot64(v[0], 17) + fio_lrot64(v[1], 13) +
|
|
fio_lrot64(v[2], 47) + fio_lrot64(v[3], 57);
|
|
result += len;
|
|
result += v[0] * primes[1];
|
|
result ^= fio_lrot64(result, 13);
|
|
result += v[1] * primes[1];
|
|
result ^= fio_lrot64(result, 29);
|
|
result += v[2] * primes[1];
|
|
result ^= fio_lrot64(result, 33);
|
|
result += v[3] * primes[1];
|
|
result ^= fio_lrot64(result, 51);
|
|
|
|
/* irreversible avalanche... I think */
|
|
result ^= (result >> 29) * primes[0];
|
|
return result;
|
|
}
|
|
|
|
#undef fio_risky_consume
|
|
|
|
inline FIO_FUNC uintptr_t fio_risky_hash_old(void *data_, size_t len,
|
|
uint64_t seed) {
|
|
/* inspired by xxHash: Yann Collet, Maciej Adamczyk... */
|
|
/* so I borrowed their primes as homage ;-) */
|
|
/* more primes at: https://asecuritysite.com/encryption/random3?val=64 */
|
|
const uint64_t primes[] = {
|
|
/* xxHash Primes */
|
|
14029467366897019727ULL, 11400714785074694791ULL, 1609587929392839161ULL,
|
|
9650029242287828579ULL, 2870177450012600261ULL,
|
|
};
|
|
/*
|
|
* 4 x 64 bit vectors for 256bit block consumption.
|
|
* When implementing a streaming variation, more fields might be required.
|
|
*/
|
|
struct risky_state_s {
|
|
uint64_t v[4];
|
|
} s = {{
|
|
(seed + primes[0] + primes[1]),
|
|
((~seed) + primes[0]),
|
|
((seed << 9) ^ primes[3]),
|
|
((seed >> 17) ^ primes[2]),
|
|
}};
|
|
|
|
/* A single data-consuming round, wrd is the data in big-endian 64 bit */
|
|
/* the design follows the xxHash basic round scheme and is easy to vectorize */
|
|
#define fio_risky_round_single(wrd, i) \
|
|
s.v[(i)] += (wrd)*primes[0]; \
|
|
s.v[(i)] = fio_lrot64(s.v[(i)], 33); \
|
|
s.v[(i)] *= primes[1];
|
|
|
|
/* an unrolled (vectorizable) 256bit round */
|
|
#define fio_risky_round_256(w0, w1, w2, w3) \
|
|
fio_risky_round_single(w0, 0); \
|
|
fio_risky_round_single(w1, 1); \
|
|
fio_risky_round_single(w2, 2); \
|
|
fio_risky_round_single(w3, 3);
|
|
|
|
uint8_t *data = (uint8_t *)data_;
|
|
|
|
/* loop over 256 bit "blocks" */
|
|
const size_t len_256 = len & (((size_t)-1) << 5);
|
|
for (size_t i = 0; i < len_256; i += 32) {
|
|
/* perform round for block */
|
|
fio_risky_round_256(fio_str2u64(data), fio_str2u64(data + 8),
|
|
fio_str2u64(data + 16), fio_str2u64(data + 24));
|
|
data += 32;
|
|
}
|
|
|
|
/* process last 64bit words in each vector */
|
|
switch (len & 24UL) {
|
|
case 24:
|
|
fio_risky_round_single(fio_str2u64(data), 0);
|
|
fio_risky_round_single(fio_str2u64(data + 8), 1);
|
|
fio_risky_round_single(fio_str2u64(data + 16), 2);
|
|
data += 24;
|
|
break;
|
|
case 16:
|
|
fio_risky_round_single(fio_str2u64(data), 0);
|
|
fio_risky_round_single(fio_str2u64(data + 8), 1);
|
|
data += 16;
|
|
break;
|
|
case 8:
|
|
fio_risky_round_single(fio_str2u64(data), 0);
|
|
data += 8;
|
|
break;
|
|
}
|
|
|
|
/* always process the last 64bits, if any, in the 4th vector */
|
|
uint64_t last_bytes = 0;
|
|
switch (len & 7) {
|
|
case 7:
|
|
last_bytes |= ((uint64_t)data[6] & 0xFF) << 56;
|
|
case 6: /* overflow */
|
|
last_bytes |= ((uint64_t)data[5] & 0xFF) << 48;
|
|
case 5: /* overflow */
|
|
last_bytes |= ((uint64_t)data[4] & 0xFF) << 40;
|
|
case 4: /* overflow */
|
|
last_bytes |= ((uint64_t)data[3] & 0xFF) << 32;
|
|
case 3: /* overflow */
|
|
last_bytes |= ((uint64_t)data[2] & 0xFF) << 24;
|
|
case 2: /* overflow */
|
|
last_bytes |= ((uint64_t)data[1] & 0xFF) << 16;
|
|
case 1: /* overflow */
|
|
last_bytes |= ((uint64_t)data[0] & 0xFF) << 8;
|
|
fio_risky_round_single(last_bytes, 3);
|
|
}
|
|
|
|
/* mix stage */
|
|
uint64_t result = (fio_lrot64(s.v[3], 63) + fio_lrot64(s.v[2], 57) +
|
|
fio_lrot64(s.v[1], 52) + fio_lrot64(s.v[0], 46));
|
|
result += len * primes[4];
|
|
result = ((result ^ s.v[0]) * primes[3]) + primes[2];
|
|
result = ((result ^ s.v[1]) * primes[3]) + primes[2];
|
|
result = ((result ^ s.v[2]) * primes[3]) + primes[2];
|
|
result = ((result ^ s.v[3]) * primes[3]) + primes[2];
|
|
/* avalanche */
|
|
result ^= (result >> 33);
|
|
result *= primes[1];
|
|
result ^= (result >> 29);
|
|
result *= primes[2];
|
|
return result;
|
|
|
|
#undef fio_risky_round_single
|
|
#undef fio_risky_round_256
|
|
}
|
|
|
|
#if TEST_XXHASH
|
|
#include "xxhash.c"
|
|
#endif
|