From 7e9b517e6d68872d56dcaaf31ab37f1bdfa39360 Mon Sep 17 00:00:00 2001 From: Scott Vokes Date: Wed, 3 Jan 2024 14:39:00 -0500 Subject: [PATCH] hash_id: Change to xorshift*. katef's testing with words.sh found some suspicious timing, profiling with callgrind showed there's still some kind of bad collision behavior doing PHI64(a) ^ PHI64(b) with exactly two IDs. It's probably still a bad idea to combine multiple Fibonacci hashes, even with xor-ing rather than adding. Changing to xorshift* (another fast, high quality hash function for 64-bit ints) immediately makes the issue go away, so do that. --- include/adt/hash.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/adt/hash.h b/include/adt/hash.h index d4ed1227d..74ee4a890 100644 --- a/include/adt/hash.h +++ b/include/adt/hash.h @@ -15,14 +15,17 @@ #define FSM_PHI_32 0x9e3779b9UL #define FSM_PHI_64 (uint64_t)0x9e3779b97f4a7c15UL -/* A suitable hash function for individual sequentially allocated - * identifiers. See Knuth 6.4, Fibonacci hashing. */ - SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW() static __inline__ uint64_t hash_id(unsigned id) { - return FSM_PHI_64 * (uint64_t)(id + (unsigned)1); + /* xorshift* A1(12,25,27), + * from http://vigna.di.unimi.it/ftp/papers/xorshift.pdf */ + uint64_t x = id + 1; + x ^= x >> 12; // a + x ^= x << 25; // b + x ^= x >> 27; // c + return x * 2685821657736338717LLU; } /* FNV-1a hash function, 32 and 64 bit versions. This is in the public