Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Further hash function cleanup. #456

Merged
merged 6 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 8 additions & 33 deletions include/adt/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
#include <stdint.h>

#include "adt/common.h"
#include "fsm/fsm.h"

/* 32 and 64-bit approximations of the golden ratio. */
#define FSM_PHI_32 0x9e3779b9UL
#define FSM_PHI_64 (uint64_t)0x9e3779b97f4a7c15UL
#define HASH_LOG_PROBES 0
/* #define HASH_PROBE_LIMIT 100 */

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static __inline__ uint64_t
hash_id(unsigned id)
hash_id(uint64_t id)
{
/* xorshift* A1(12,25,27),
* from http://vigna.di.unimi.it/ftp/papers/xorshift.pdf */
Expand All @@ -28,38 +28,13 @@ hash_id(unsigned id)
return x * 2685821657736338717LLU;
}

/* FNV-1a hash function, 32 and 64 bit versions. This is in the public
* domain. For details, see:
*
* http://www.isthe.com/chongo/tech/comp/fnv/index.html
*/

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static __inline__ uint32_t
hash_fnv1a_32(const uint8_t *buf, size_t length)
{
#define FNV1a_32_OFFSET_BASIS 0x811c9dc5UL
#define FNV1a_32_PRIME 0x01000193UL
uint32_t h = FNV1a_32_OFFSET_BASIS;
size_t i;
for (i = 0; i < length; i++) {
h ^= buf[i];
h *= FNV1a_32_PRIME;
}
return h;
}

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static __inline__ uint64_t
hash_fnv1a_64(const uint8_t *buf, size_t length)
hash_ids(size_t count, const fsm_state_t *ids)
{
#define FNV1a_64_OFFSET_BASIS 0xcbf29ce484222325UL
#define FNV1a_64_PRIME 0x100000001b3UL
uint64_t h = FNV1a_64_OFFSET_BASIS;
size_t i;
for (i = 0; i < length; i++) {
h ^= buf[i];
h *= FNV1a_64_PRIME;
uint64_t h = 0;
for (size_t i = 0; i < count; i++) {
h = hash_id(h ^ ids[i]);
}
return h;
}
Expand Down
3 changes: 0 additions & 3 deletions include/adt/stateset.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,5 @@ state_set_rebase(struct state_set **set, fsm_state_t base);
void
state_set_replace(struct state_set **set, fsm_state_t old, fsm_state_t new);

unsigned long
state_set_hash(const struct state_set *set);

#endif

27 changes: 20 additions & 7 deletions src/adt/idmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,15 @@ idmap_set(struct idmap *m, fsm_state_t state_id,
}

const uint64_t mask = m->bucket_count - 1;
for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
int res = 0;
size_t b_i;
for (b_i = 0; b_i < m->bucket_count; b_i++) {
struct idmap_bucket *b = &m->buckets[(h + b_i) & mask];
if (b->state == state_id) {
assert(b->values != NULL);
u64bitset_set(b->values, value);
return 1;
res = 1;
goto done;
} else if (b->state == NO_STATE) {
b->state = state_id;
assert(b->values == NULL);
Expand All @@ -217,20 +220,30 @@ idmap_set(struct idmap *m, fsm_state_t state_id,
b->values = f_calloc(m->alloc,
vw, sizeof(b->values[0]));
if (b->values == NULL) {
return 0;
goto done;
}
m->buckets_used++;

u64bitset_set(b->values, value);
return 1;
res = 1;
goto done;
} else {
continue; /* collision */
}

}

assert(!"unreachable");
return 0;
done:
#if HASH_LOG_PROBES
fprintf(stderr, "%s: res %d after %zu probes (%u buckets)\n",
__func__, res, b_i, m->bucket_count);
#endif

#ifdef HASH_PROBE_LIMIT
assert(b_i < HASH_PROBE_LIMIT);
#endif
assert(b_i < m->bucket_count);

return res;
}

static const struct idmap_bucket *
Expand Down
18 changes: 9 additions & 9 deletions src/adt/internedstateset.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,10 @@ dump_tables(FILE *f, const struct interned_state_set_pool *pool)
#endif
}

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static uint64_t
hash_state_ids(size_t count, const fsm_state_t *ids)
{
uint64_t h = 0;
for (size_t i = 0; i < count; i++) {
h ^= hash_id(ids[i]);
}
return h;
return hash_ids(count, ids);
}

static bool
Expand Down Expand Up @@ -329,8 +324,13 @@ interned_state_set_intern_set(struct interned_state_set_pool *pool,
fprintf(stderr, "%s: htab[(0x%lx + %lu) & 0x%lx => %d\n",
__func__, h, b_i, mask, *b);
#endif

#ifdef HASH_PROBE_LIMIT
assert(probes < HASH_PROBE_LIMIT);
#endif

if (*b == NO_ID) {
#if LOG_ISS > 3
#if LOG_ISS > 3 || HASH_LOG_PROBES
fprintf(stderr, "%s: empty bucket (%zd probes)\n", __func__, probes);
#endif
dst_bucket = b;
Expand All @@ -351,7 +351,7 @@ interned_state_set_intern_set(struct interned_state_set_pool *pool,

if (0 == memcmp(states, buf, s->length * sizeof(buf[0]))) {
*result = id;
#if LOG_ISS > 3
#if LOG_ISS > 3 || HASH_LOG_PROBES
fprintf(stderr, "%s: reused %u (%zd probes)\n", __func__, id, probes);
#endif
return true;
Expand All @@ -362,7 +362,7 @@ interned_state_set_intern_set(struct interned_state_set_pool *pool,
}
assert(dst_bucket != NULL);

#if LOG_ISS > 3
#if LOG_ISS > 3 || HASH_LOG_PROBES
fprintf(stderr, "%s: miss after %zd probes\n", __func__, probes);
#else
(void)probes;
Expand Down
16 changes: 0 additions & 16 deletions src/adt/stateset.c
Original file line number Diff line number Diff line change
Expand Up @@ -668,19 +668,3 @@ state_set_replace(struct state_set **setp, fsm_state_t old, fsm_state_t new)
}
}
}

unsigned long
state_set_hash(const struct state_set *set)
{
if (set == NULL) {
return 0; /* empty */
}

if (IS_SINGLETON(set)) {
fsm_state_t state;
state = SINGLETON_DECODE(set);
return hashrec(&state, sizeof state);
}

return hashrec(set->a, set->i * sizeof *set->a);
}
55 changes: 32 additions & 23 deletions src/libfsm/determinise.c
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ hash_iss(interned_state_set_id iss)
{
/* Just hashing the ID directly is fine here -- since they're
* interned, they're identified by pointer equality. */
return FSM_PHI_64 * (uintptr_t)iss;
return hash_id((uintptr_t)iss);
}

static struct mapping *
Expand Down Expand Up @@ -1309,15 +1309,10 @@ build_output_from_cached_analysis(struct analyze_closures_env *env, fsm_state_t

#define LOG_TO_SET_HTAB 0

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static uint64_t
to_set_hash(size_t count, const fsm_state_t *ids)
{
uint64_t h = hash_id(count);
for (size_t i = 0; i < count; i++) {
h ^= hash_id(ids[i]);
}
return h;
return hash_ids(count, ids);
}

static int
Expand All @@ -1333,15 +1328,14 @@ to_set_htab_check(struct analyze_closures_env *env,
return 0;
}


#if LOG_TO_SET_HTAB
#if LOG_TO_SET_HTAB || HASH_LOG_PROBES || defined(HASH_PROBE_LIMIT)
size_t probes = 0;
#endif
int res = 0;

const uint64_t mask = bcount - 1;
for (size_t b_i = 0; b_i < bcount; b_i++) {
#if LOG_TO_SET_HTAB
#if LOG_TO_SET_HTAB || HASH_LOG_PROBES || defined(HASH_PROBE_LIMIT)
probes++;
#endif
const struct to_set_bucket *b = &htab->buckets[(h + b_i) & mask];
Expand All @@ -1364,10 +1358,16 @@ to_set_htab_check(struct analyze_closures_env *env,
}

done:
#if LOG_TO_SET_HTAB
#if LOG_TO_SET_HTAB || HASH_LOG_PROBES
fprintf(stderr, "%s: result %d, %zu probes, htab: used %zu/%zu ceil\n",
__func__, res, probes, htab->buckets_used, htab->bucket_count);
#endif
#ifdef HASH_PROBE_LIMIT
if (probes >= HASH_PROBE_LIMIT) {
fprintf(stderr, "-- %zd probes, limit exceeded\n", probes);
}
assert(probes < HASH_PROBE_LIMIT);
#endif

return res;
}
Expand Down Expand Up @@ -1431,6 +1431,22 @@ to_set_htab_save(struct analyze_closures_env *env,
b->count = count;
b->offset = offset;
htab->buckets_used++;
#if HASH_LOG_PROBES
fprintf(stderr, "%s: [", __func__);
const fsm_state_t *ids = &env->to_sets.buf[offset];
for (size_t i = 0; i < count; i++) {
fprintf(stderr, "%s%d",
i > 0 ? " " : "", ids[i]);
}

fprintf(stderr, "] -> hash %lx -> b %zu (%zu/%zu used), %zd probes\n",
hash, (hash + b_i) & mask,
htab->buckets_used, htab->bucket_count, b_i);
#endif
#if HASH_PROBE_LIMIT
assert(b_i < HASH_PROBE_LIMIT);
#endif

return 1;
} else if (b->count == count) {
assert(b->offset != offset); /* no duplicates */
Expand Down Expand Up @@ -1586,8 +1602,8 @@ commit_buffered_result(struct analyze_closures_env *env, uint32_t *cache_result_
memcpy(&nr->entries[0], env->results.buffer.entries,
nr->count * sizeof(env->results.buffer.entries[0]));
#if LOG_GROUPING || LOG_COMMIT_BUFFERED_GROUP
fprintf(stderr, "%s: alloc %zu, hash 0x%016lx\n",
__func__, alloc_sz, hash_fnv1a_64((const uint8_t *)nr, alloc_sz));
fprintf(stderr, "%s: alloc %zu, count %u\n",
__func__, alloc_sz, nr->count));
#endif
}

Expand Down Expand Up @@ -1645,16 +1661,9 @@ hash_pair(fsm_state_t a, fsm_state_t b)
assert(b & RESULT_BIT);
a &=~ RESULT_BIT;
b &=~ RESULT_BIT;

/* Don't hash a and b separately and combine them with
* hash_id, because it's common to have adjacent pairs of
* result IDs, and with how hash_id works that leads to
* multiples of similar hash values bunching up.
*
* This could be replaced with a better hash function later,
* but use LOG_CACHE_HTAB to ensure there aren't visually obvious
* runs of collisions appearing in the tables. */
const uint64_t res = hash_id(a + b);
assert(a != b);
const uint64_t ab = ((uint64_t)a << 32) | (uint64_t)b;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a + b is commutative, this shifting isn't, I'm going to double-check that the ordering is consistent here.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 9cba560. Thanks @dgryski for catching this.

const uint64_t res = hash_id(ab);
/* fprintf(stderr, "%s: a %d, b %d -> %016lx\n", __func__, a, b, res); */
return res;
}
Expand Down