Skip to content

Commit

Permalink
Add include/adt/hash.h hash_ids and consolidate uses.
Browse files Browse the repository at this point in the history
- Add a function to hash a series of IDs, rather than doing it with
  a loop combining the intermediate hashes in inconsistent ways.

- Add a #define to log a probe count and a limit to assert, and
  rework the hashing code control flow for tracking/asserting on
  the probe count.

- Remove `hash_fnv1a_64` from a logging statement (normally not
  compiled), since that's the only remaining use remove the FNV1a
  functions from hash.h.

- Remove FSM_PHI_32 and FSM_PHI_64, now unused.

- parser.act still uses 32-bit FNV1a, but that's for character-by-
  character hashing, so that still makes sense, and it has its
  own copy of fnv1a.
  • Loading branch information
silentbicycle committed Jan 4, 2024
1 parent 78e0082 commit dfab0de
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 61 deletions.
39 changes: 7 additions & 32 deletions include/adt/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
#include <stdint.h>

#include "adt/common.h"
#include "fsm/fsm.h"

/* 32 and 64-bit approximations of the golden ratio. */
#define FSM_PHI_32 0x9e3779b9UL
#define FSM_PHI_64 (uint64_t)0x9e3779b97f4a7c15UL
#define HASH_LOG_PROBES 0
/* #define HASH_PROBE_LIMIT 100 */

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static __inline__ uint64_t
Expand All @@ -28,38 +28,13 @@ hash_id(uint64_t id)
return x * 2685821657736338717LLU;
}

/* FNV-1a hash function, 32 and 64 bit versions. This is in the public
* domain. For details, see:
*
* http://www.isthe.com/chongo/tech/comp/fnv/index.html
*/

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static __inline__ uint32_t
hash_fnv1a_32(const uint8_t *buf, size_t length)
{
#define FNV1a_32_OFFSET_BASIS 0x811c9dc5UL
#define FNV1a_32_PRIME 0x01000193UL
uint32_t h = FNV1a_32_OFFSET_BASIS;
size_t i;
for (i = 0; i < length; i++) {
h ^= buf[i];
h *= FNV1a_32_PRIME;
}
return h;
}

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static __inline__ uint64_t
hash_fnv1a_64(const uint8_t *buf, size_t length)
hash_ids(size_t count, const fsm_state_t *ids)
{
#define FNV1a_64_OFFSET_BASIS 0xcbf29ce484222325UL
#define FNV1a_64_PRIME 0x100000001b3UL
uint64_t h = FNV1a_64_OFFSET_BASIS;
size_t i;
for (i = 0; i < length; i++) {
h ^= buf[i];
h *= FNV1a_64_PRIME;
uint64_t h = 0;
for (size_t i = 0; i < count; i++) {
h = hash_id(h ^ ids[i]);
}
return h;
}
Expand Down
27 changes: 20 additions & 7 deletions src/adt/idmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,15 @@ idmap_set(struct idmap *m, fsm_state_t state_id,
}

const uint64_t mask = m->bucket_count - 1;
for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
int res = 0;
size_t b_i;
for (b_i = 0; b_i < m->bucket_count; b_i++) {
struct idmap_bucket *b = &m->buckets[(h + b_i) & mask];
if (b->state == state_id) {
assert(b->values != NULL);
u64bitset_set(b->values, value);
return 1;
res = 1;
goto done;
} else if (b->state == NO_STATE) {
b->state = state_id;
assert(b->values == NULL);
Expand All @@ -217,20 +220,30 @@ idmap_set(struct idmap *m, fsm_state_t state_id,
b->values = f_calloc(m->alloc,
vw, sizeof(b->values[0]));
if (b->values == NULL) {
return 0;
goto done;
}
m->buckets_used++;

u64bitset_set(b->values, value);
return 1;
res = 1;
goto done;
} else {
continue; /* collision */
}

}

assert(!"unreachable");
return 0;
done:
#if HASH_LOG_PROBES
fprintf(stderr, "%s: res %d after %zu probes (%u buckets)\n",
__func__, res, b_i, m->bucket_count);
#endif

#ifdef HASH_PROBE_LIMIT
assert(b_i < HASH_PROBE_LIMIT);
#endif
assert(b_i < m->bucket_count);

return res;
}

static const struct idmap_bucket *
Expand Down
18 changes: 9 additions & 9 deletions src/adt/internedstateset.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,10 @@ dump_tables(FILE *f, const struct interned_state_set_pool *pool)
#endif
}

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static uint64_t
hash_state_ids(size_t count, const fsm_state_t *ids)
{
uint64_t h = 0;
for (size_t i = 0; i < count; i++) {
h ^= hash_id(ids[i]);
}
return h;
return hash_ids(count, ids);
}

static bool
Expand Down Expand Up @@ -329,8 +324,13 @@ interned_state_set_intern_set(struct interned_state_set_pool *pool,
fprintf(stderr, "%s: htab[(0x%lx + %lu) & 0x%lx => %d\n",
__func__, h, b_i, mask, *b);
#endif

#ifdef HASH_PROBE_LIMIT
assert(probes < HASH_PROBE_LIMIT);
#endif

if (*b == NO_ID) {
#if LOG_ISS > 3
#if LOG_ISS > 3 || HASH_LOG_PROBES
fprintf(stderr, "%s: empty bucket (%zd probes)\n", __func__, probes);
#endif
dst_bucket = b;
Expand All @@ -351,7 +351,7 @@ interned_state_set_intern_set(struct interned_state_set_pool *pool,

if (0 == memcmp(states, buf, s->length * sizeof(buf[0]))) {
*result = id;
#if LOG_ISS > 3
#if LOG_ISS > 3 || HASH_LOG_PROBES
fprintf(stderr, "%s: reused %u (%zd probes)\n", __func__, id, probes);
#endif
return true;
Expand All @@ -362,7 +362,7 @@ interned_state_set_intern_set(struct interned_state_set_pool *pool,
}
assert(dst_bucket != NULL);

#if LOG_ISS > 3
#if LOG_ISS > 3 || HASH_LOG_PROBES
fprintf(stderr, "%s: miss after %zd probes\n", __func__, probes);
#else
(void)probes;
Expand Down
42 changes: 29 additions & 13 deletions src/libfsm/determinise.c
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ hash_iss(interned_state_set_id iss)
{
/* Just hashing the ID directly is fine here -- since they're
* interned, they're identified by pointer equality. */
return FSM_PHI_64 * (uintptr_t)iss;
return hash_id((uintptr_t)iss);
}

static struct mapping *
Expand Down Expand Up @@ -1309,15 +1309,10 @@ build_output_from_cached_analysis(struct analyze_closures_env *env, fsm_state_t

#define LOG_TO_SET_HTAB 0

SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
static uint64_t
to_set_hash(size_t count, const fsm_state_t *ids)
{
uint64_t h = hash_id(count);
for (size_t i = 0; i < count; i++) {
h ^= hash_id(ids[i]);
}
return h;
return hash_ids(count, ids);
}

static int
Expand All @@ -1333,15 +1328,14 @@ to_set_htab_check(struct analyze_closures_env *env,
return 0;
}


#if LOG_TO_SET_HTAB
#if LOG_TO_SET_HTAB || HASH_LOG_PROBES || defined(HASH_PROBE_LIMIT)
size_t probes = 0;
#endif
int res = 0;

const uint64_t mask = bcount - 1;
for (size_t b_i = 0; b_i < bcount; b_i++) {
#if LOG_TO_SET_HTAB
#if LOG_TO_SET_HTAB || HASH_LOG_PROBES || defined(HASH_PROBE_LIMIT)
probes++;
#endif
const struct to_set_bucket *b = &htab->buckets[(h + b_i) & mask];
Expand All @@ -1364,10 +1358,16 @@ to_set_htab_check(struct analyze_closures_env *env,
}

done:
#if LOG_TO_SET_HTAB
#if LOG_TO_SET_HTAB || HASH_LOG_PROBES
fprintf(stderr, "%s: result %d, %zu probes, htab: used %zu/%zu ceil\n",
__func__, res, probes, htab->buckets_used, htab->bucket_count);
#endif
#ifdef HASH_PROBE_LIMIT
if (probes >= HASH_PROBE_LIMIT) {
fprintf(stderr, "-- %zd probes, limit exceeded\n", probes);
}
assert(probes < HASH_PROBE_LIMIT);
#endif

return res;
}
Expand Down Expand Up @@ -1431,6 +1431,22 @@ to_set_htab_save(struct analyze_closures_env *env,
b->count = count;
b->offset = offset;
htab->buckets_used++;
#if HASH_LOG_PROBES
fprintf(stderr, "%s: [", __func__);
const fsm_state_t *ids = &env->to_sets.buf[offset];
for (size_t i = 0; i < count; i++) {
fprintf(stderr, "%s%d",
i > 0 ? " " : "", ids[i]);
}

fprintf(stderr, "] -> hash %lx -> b %zu (%zu/%zu used), %zd probes\n",
hash, (hash + b_i) & mask,
htab->buckets_used, htab->bucket_count, b_i);
#endif
#if HASH_PROBE_LIMIT
assert(b_i < HASH_PROBE_LIMIT);
#endif

return 1;
} else if (b->count == count) {
assert(b->offset != offset); /* no duplicates */
Expand Down Expand Up @@ -1586,8 +1602,8 @@ commit_buffered_result(struct analyze_closures_env *env, uint32_t *cache_result_
memcpy(&nr->entries[0], env->results.buffer.entries,
nr->count * sizeof(env->results.buffer.entries[0]));
#if LOG_GROUPING || LOG_COMMIT_BUFFERED_GROUP
fprintf(stderr, "%s: alloc %zu, hash 0x%016lx\n",
__func__, alloc_sz, hash_fnv1a_64((const uint8_t *)nr, alloc_sz));
fprintf(stderr, "%s: alloc %zu, count %u\n",
__func__, alloc_sz, nr->count));
#endif
}

Expand Down

0 comments on commit dfab0de

Please sign in to comment.