From 4493af865e411dd9288c1af40d3358543281e0d2 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Sat, 6 Jan 2024 14:18:35 -0500 Subject: [PATCH] [hypertable] Update hypertable implementation to match updated local-hypertable implementation in cheetah. --- cilksan/cilksan_internal.h | 7 +- cilksan/hypertable.h | 343 +++++++++++++++++-------------------- cilksan/reducers.cpp | 12 +- 3 files changed, 169 insertions(+), 193 deletions(-) diff --git a/cilksan/cilksan_internal.h b/cilksan/cilksan_internal.h index 1ce074d..7f65d93 100644 --- a/cilksan/cilksan_internal.h +++ b/cilksan/cilksan_internal.h @@ -168,7 +168,7 @@ class CilkSanImpl_t { // Attempt to look up a view for a reducer. Returns a pointer to a view if it // exists and nullptr if not. void *reducer_lookup(hyper_table *reducer_views, uintptr_t key) const { - bucket *b = reducer_views->find(key); + hyper_table::bucket *b = reducer_views->find(key); if (b) { assert(key == b->key); return b->value.view; @@ -192,8 +192,9 @@ class CilkSanImpl_t { identity(new_view); // Insert the view into the table of reducer_views. - bucket new_bucket = {.key = (uintptr_t)key, - .value = {.view = new_view, .reduce_fn = reduce}}; + hyper_table::bucket new_bucket = { + .key = (uintptr_t)key, + .value = {.view = new_view, .reduce_fn = reduce}}; bool success = reducer_views->insert(new_bucket); assert(success && "create_reducer_view failed to insert new reducer."); (void)success; diff --git a/cilksan/hypertable.h b/cilksan/hypertable.h index 39be162..68270e5 100644 --- a/cilksan/hypertable.h +++ b/cilksan/hypertable.h @@ -15,22 +15,24 @@ static inline bool is_valid(uintptr_t key) { return !is_empty(key) && !is_tombstone(key); } -// An entry in the hash table. -struct bucket { - uintptr_t key = KEY_EMPTY; /* EMPTY, DELETED, or a user-provided pointer. */ - reducer_base value; - - void make_tombstone() { key = KEY_DELETED; } -}; - class CilkSanImpl_t; // Hash table of reducers. We don't need any locking or support for concurrent // updates, since the hypertable is local. class hyper_table { - +public: using index_t = uint32_t; + // An entry in the hash table. + struct bucket { + uintptr_t key = KEY_EMPTY; /* EMPTY, DELETED, or a user-provided pointer. */ + index_t hash; /* hash of the key when inserted into the table. */ + reducer_base value; + + void make_tombstone() { key = KEY_DELETED; } + }; + +private: // Data type for indexing the hash table. This type is used for hashes as // well as the table's capacity. static constexpr int32_t MIN_CAPACITY = 4; @@ -42,10 +44,12 @@ class hyper_table { static inline index_t hash(uintptr_t key_in) { uint64_t x = key_in ^ salt; - // mix64 from SplitMix. - x = (x ^ (x >> 33)) * 0xff51afd7ed558ccdUL; - x = (x ^ (x >> 33)) * 0xc4ceb9fe1a85ec53UL; - return x; + // mix, based on abseil's low-level hash, and convert 64-bit integers into + // 32-bit integers. + const size_t half_bits = sizeof(uintptr_t) * 4; + const uintptr_t low_mask = ((uintptr_t)(1) << half_bits) - 1; + uintptr_t v = (x & low_mask) * (x >> half_bits); + return (v & low_mask) ^ (v >> half_bits); } static inline index_t get_table_entry(int32_t capacity, uintptr_t key) { @@ -60,20 +64,105 @@ class hyper_table { return i; } - static inline bool continue_search(index_t tgt, index_t hash, - index_t init_hash) { + // For theoretical and practical efficiency, the hash table implements + // ordered linear probing --- consecutive hashes in the table are + // always stored in sorted order --- in a circular buffer. + // Intuitively, this ordering means any hash-table probe for a target + // T can stop when it encounters an element in the table whose hash is + // greater than T. + // + // Implementing ordered linear probing on a circular buffer, however, + // leads to several tricky cases when probing for an element or its + // insertion point. These cases depend on whether the probe or the + // run --- the ordered sequence of hashes in the table --- wraps + // around from the end to the beginning of the buffer's allocated + // memory. In general, there are four cases: + // + // Example case 1: No wrapping (common case) + // Index: ... | 3 | 4 | 5 | 6 | ... + // Hashes: ... | 3 | 3 | 3 | 5 | ... + // Target: 4 + // The probe starts at index 4 and scans increasing indices, stopping when + // it sees hash = 5 at index 6. + // + // Example case 2: Probe and run both wrap + // Index: | 0 | 1 | 2 | ... | 6 | 7 | + // Hashes: | 6 | 7 | 0 | ... | 6 | 6 | + // Target: 7 + // The run of 6's wraps around, as does the probe for 7. + // + // Example case 3: Probe does not wrap, run does wrap + // Index: | 0 | 1 | 2 | ... | 6 | 7 | + // Hashes: | 6 | 7 | 0 | ... | 6 | 6 | + // Target: 0 + // The run of 6's and 7's wrap around. The probe for 0 starts in the middle + // of this wrapped run and must continue past it, even though the hashes in + // the run are larger than the target. + // + // Example case 4: Probe wraps, run does not wrap + // Index: | 0 | 1 | 2 | ... | 6 | 7 | + // Hashes: | 6 | 0 | 1 | ... | 6 | 6 | + // Target: 7 + // After the wrapped run of 6's is a run starting at 0, which does not wrap. + // The probe for 7 wraps around before encountering the 0. The probe should + // stop at that point, even though 0 is smaller than 7. + // + // We characterize these four cases in terms of the following variables: + // + // - T: The target hash value being probed for. + // - i: The current index in the table being examined in the probe. + // - H[i]: The hash value of the key at index i, assuming that table entry is + // occupied. + // + // We can identify cases where the probe or the run wraps around the end of + // the circular buffer by comparing i to T (for the probe) and i to H[i] (for + // the run). A probe starts at i == T and proceeds to scan increasing values + // of i (mod table size). Therefore, we typically expect i >= T and i >= + // H[i]. But when wrapping occurs, i will be smaller than the hash, that is, + // i < T when the probe wraps and i < H[i] when the run wraps. + // + // We can describe these four cases in terms of these variables as follows: + // Normal Probe, Normal Run (NP+NR): T <= i and H[i] <= i + // The probe _terminates_ at i where T < H[i]. + // Wrapped Probe, Wrapped Run (WP+WR): T > i and H[i] > i + // The probe _terminates_ at i where T < H[i]. + // Normal Probe, Wrapped Run (NP+WR): T <= i and H[i] > i + // The probe _must continue_ even though T < H[i]. + // Wrapped Probe, Normal Run (WP+NR): T > i and H[i] <= i + // The probe _must terminate_ even though T > H[i]. + // + // The table uses the following bit trick to handle all of these cases simply: + // + // Continue the probe if and only if i-T <= i-H[i], using an _unsigned + // integer_ comparison. + // + // Intuitively, this trick makes the case of wrapping around the table + // coincide with unsigned integer overflow, allowing the same + // comparison to be used in all cases. + // + // We can justify this bit trick in all cases: + // + // NP+NR and WP+WR: The original termination condition, T < H[i], implies + // that -T > -H[i]. Adding i to both sides does not affect the comparison. + // + // NP+WR: The wrapped run, H[i] > i, implies that i-H[i] is negative, which + // becomes are large positive unsigned integer. Meanwhile, i-T is a small + // positive unsigned integer, because i > T. Hence, i-T < i-H[i], which + // correctly implies that the probe must continue. + // + // WP+NR: The wrapped probe, T > i, implies that i-T is negative, which + // becomes a large positive unsigned integer. Meanwhile, i >= H[i], + // implying that i-H[i] is a small positive unsigned integer. Hence, i-T > + // i-H[i], which correctly implies that the probe should stop. + // + // Note: One can formulate this bit trick as T-i >= H[i]-i instead, preserving + // the direction of the inequality. I formulate the trick this way simply + // because I prefer that the common case involve comparisons of small positive + // integers. + + static inline bool continue_probe(index_t tgt, index_t hash, index_t idx) { // NOTE: index_t must be unsigned for this check to work. - index_t norm_tgt = tgt - init_hash; - index_t norm_hash = hash - init_hash; - return norm_tgt >= norm_hash; - } - - static inline bool stop_insert_scan(index_t tgt, index_t hash, - index_t init_hash) { - // NOTE: index_t must be unsigned for this check to work. - index_t norm_tgt = tgt - init_hash; - index_t norm_hash = hash - init_hash; - return norm_tgt <= norm_hash; + return (idx - tgt) <= (idx - hash); } // Constant used to determine the target maximum load factor. The table will @@ -131,6 +220,8 @@ class hyper_table { buckets = bucket_array_create(new_capacity); capacity = new_capacity; occupancy = 0; + // Set count of insertions and removals to prevent insertions into + // new table from triggering another rebuild. ins_rm_count = -old_occupancy; for (int32_t i = 0; i < old_capacity; ++i) { @@ -164,69 +255,39 @@ class hyper_table { // Target hash index_t tgt = get_table_entry(capacity, key); bucket *buckets = this->buckets; - // Start the search at the target hash + // Start the probe at the target hash index_t i = tgt; - index_t init_hash = (index_t)(-1); do { uintptr_t curr_key = buckets[i].key; - // If we find the key, return that bucket. + // Found the key? Return that bucket. // TODO: Consider moving this bucket to the front of the run. if (key == curr_key) return &buckets[i]; - // If we find an empty entry, the search failed. + // Found an empty entry? The probe failed. if (is_empty(curr_key)) return nullptr; - // If we find a tombstone, continue the search. + // Found a tombstone? Continue the probe. if (is_tombstone(curr_key)) { i = inc_index(i, capacity); continue; } - // Otherwise we have another valid key that does not match. - // Record this hash for future search steps. - init_hash = get_table_entry(capacity, curr_key); - if ((tgt > i && i >= init_hash) || - (tgt < init_hash && ((tgt > i) == (init_hash > i)))) { - // The search will stop at init_hash anyway, so return early. - return nullptr; - } - break; - } while (i != tgt); - - do { - uintptr_t curr_key = buckets[i].key; - // If we find the key, return that bucket. - // TODO: Consider moving this bucket to the front of the run. - if (key == curr_key) - return &buckets[i]; - - // If we find an empty entry, the search failed. - if (is_empty(curr_key)) - return nullptr; + // Otherwise, buckets[i] is another valid key that does not match. + index_t curr_hash = buckets[i].hash; - // If we find a tombstone, continue the search. - if (is_tombstone(curr_key)) { + if (continue_probe(tgt, curr_hash, i)) { i = inc_index(i, capacity); continue; } - // Otherwise we have another valid key that does not match. - // Compare the hashes to decide whether or not to continue the - // search. - index_t curr_hash = get_table_entry(capacity, curr_key); - if (continue_search(tgt, curr_hash, init_hash)) { - i = inc_index(i, capacity); - continue; - } - - // If none of the above cases match, then the search failed to + // If none of the above cases match, then the probe failed to // find the key. return nullptr; } while (i != tgt); - // The search failed to find the key. + // The probe failed to find the key. return nullptr; } @@ -267,7 +328,7 @@ class hyper_table { } } - // The key is not aleady in the table. Append the bucket. + // The key is not already in the table. Append the bucket. buckets[occupancy] = b; ++this->occupancy; return true; @@ -288,7 +349,8 @@ class hyper_table { } // Target hash - index_t tgt = get_table_entry(capacity, b.key); + const index_t tgt = get_table_entry(capacity, b.key); + b.hash = tgt; // If we find an empty entry, insert the bucket there. if (is_empty(buckets[tgt].key)) { @@ -298,113 +360,20 @@ class hyper_table { return true; } - // Search for the place to insert b. + // Probe for the place to insert b. index_t i = tgt; - // Searching for an appropriate insertion point requires handling four - // conditions based on tgt --- the target index of the item being inserted - // --- i --- the current index in the hash table being examined in the - // search --- and hash --- the target index of the item at index i. - // - // Generally speaking, items that hash to the same index appear next to each - // other in the table, and items that hash to adjacent indices (modulo the - // table's capacity) appear next to each other in sorted order based on the - // indices they hash to. These invariants hold with the exception that - // tombstones can exist between items in the table that would otherwise be - // adjacent. Let a _run_ be a sequence of hash values for consecutive valid - // entries in the table (modulo the table's capacity). - // - // The search must accommodate the following 4 conditions: - // - Non-wrapped search (NS): tgt <= i - // - Wrapped search (WS): tgt > i - // - Non-wrapped run (NR): hash <= i - // - Wrapped run (WR): hash > i - // - // These conditions lead to 4 cases: - // - NS+NR: hash <= tgt <= i: - // Common case. Search terminates when hash > tgt. - // - WS+WR: i < hash <= tgt: - // Like NS+NR, search terminates when hash > tgt. - // - NS+WR: tgt <= i < hash: - // The search needs to treat tgt as larger than hash. Given init --- the - // hash of the first non-tombstone encountered --- comparing shifted - // values of tgt and hash --- specifically, X-init+2^k mod 2^k, where X - // \in {tgt, hash} --- causes tgt to become large and allows the search to - // terminate when shifted hash > shifted tgt. - // - WS+NR: hash <= i < tgt: - // The search needs to stop search before wrapping and treat hash as - // larger than tgt. - // Given init, computing on shifted values of tgt and hash --- i.e., - // X-init+2^k mod 2^k where X \in {tgt, hash} --- causes hash to become - // large and allows the search to terminate when shifted hash > shifted - // tgt. - - // Probe to find either a place to insert b or another valid entry in the - // hash table, whose hash is then stored in init_hash. - index_t init_hash = (index_t)(-1); - do { - uintptr_t curr_key = buckets[i].key; - // If we find the key, overwrite that bucket. - // TODO: Reconsider what we do in this case. - if (b.key == curr_key) { - buckets[i].value = b.value; - return true; - } - - // If we find an empty entry, insert b there. - if (is_empty(curr_key)) { - buckets[i] = b; - ++this->occupancy; - ++this->ins_rm_count; - return true; - } - - if (is_tombstone(curr_key)) { - // Check whether the next entry is valid. - index_t next_i = inc_index(i, capacity); - uintptr_t next_key = buckets[next_i].key; - if (is_valid(next_key)) { - // Record the hash of the first valid entry found and exit the loop. - init_hash = get_table_entry(capacity, next_key); - // Check if the search can be terminated early, either because - // init_hash == tgt, or we're in the WS+NR case, or we're terminating - // the search in the NS+NR or WS+WR cases. - if ((tgt == init_hash) || (tgt > next_i && next_i >= init_hash) || - (tgt < init_hash && ((tgt > next_i) == (init_hash > next_i)))) { - // The hash at the end of this run of tombstones would terminate the - // search. Because there are only tombstones between tgt and - // next_i, inserting b at tgt is safe. - buckets[tgt] = b; - ++this->occupancy; - ++this->ins_rm_count; - return true; - } - break; - } - // We found a tombstone followed by an invalid entry (tombstone or - // empty). Continue searching. - i = next_i; - continue; - } - - // Record the hash of the first valid entry found and exit the loop. - init_hash = get_table_entry(capacity, curr_key); - break; - - } while (i != tgt); - assert(init_hash != (index_t)(-1)); - - // Use init_hash to continue probing to find a place to insert b. + const index_t probe_end = tgt; do { uintptr_t curr_key = buckets[i].key; - // If we find the key, overwrite that bucket. - // TODO: Reconsider what we do in this case. + // Found the key? Overwrite that bucket. + // TODO: Reconsider what to do in this case. if (b.key == curr_key) { buckets[i].value = b.value; return true; } - // If we find an empty entry, insert b there. + // Found an empty entry? Insert b there. if (is_empty(curr_key)) { buckets[i] = b; ++this->occupancy; @@ -412,52 +381,58 @@ class hyper_table { return true; } - // If we find a tombstone, check whether to insert b here, and finish the - // insert if so. + // Found a tombstone? if (is_tombstone(curr_key)) { index_t current_tomb = i; - // Scan all consecutive tombstones from i. + // Scan consecutive tombstones from i. index_t next_i = inc_index(i, capacity); uintptr_t tomb_end = buckets[next_i].key; - while (is_tombstone(tomb_end)) { + while (next_i != probe_end && is_tombstone(tomb_end)) { next_i = inc_index(next_i, capacity); tomb_end = buckets[next_i].key; } - // If the next entry is empty, then the search would stop. It's safe to - // insert the bucket at the tombstone. + + // If the next entry is empty, then the probe would stop. It's + // safe to insert the bucket at the tombstone at i. if (is_empty(tomb_end)) { buckets[current_tomb] = b; ++this->occupancy; ++this->ins_rm_count; return true; } - // Check if the hash of the element at the end of this run of tombstones - // would terminate the search. - index_t tomb_end_hash = get_table_entry(capacity, tomb_end); - if (stop_insert_scan(tgt, tomb_end_hash, init_hash)) { - // It's safe to insert the element at the current tombstone. + + // Check if the hash at the end of this run of tombstones would + // terminate the probe or if the probe has traversed the whole + // table. + index_t tomb_end_hash = buckets[next_i].hash; + if (next_i == probe_end || + !continue_probe(tgt, tomb_end_hash, next_i)) { + // It's safe to insert b at the current tombstone. buckets[current_tomb] = b; ++this->occupancy; ++this->ins_rm_count; return true; } + // None of the locations among these consecutive tombstones are - // appropriate for this bucket. Continue the search. - i = inc_index(next_i, capacity); + // appropriate for this bucket. Continue the probe. + i = next_i; continue; } - // Otherwise we have another valid key that does not match. Compare the - // hashes to decide whether or not to continue the search. - index_t curr_hash = get_table_entry(capacity, curr_key); - if (continue_search(tgt, curr_hash, init_hash)) { + // Otherwise this entry contains another valid key that does + // not match. Compare the hashes to decide whether or not to + // continue the probe. + index_t curr_hash = buckets[i].hash; + if (continue_probe(tgt, curr_hash, i)) { i = inc_index(i, capacity); continue; } - // This is an appropriate location to insert the bucket. Stop the search. + // This is an appropriate location to insert the bucket. Stop + // the probe. break; - } while (i != tgt); + } while (i != probe_end); index_t insert_tgt = i; // The search found a place to insert the bucket, but it's occupied. Insert diff --git a/cilksan/reducers.cpp b/cilksan/reducers.cpp index 4cb1d11..3516940 100644 --- a/cilksan/reducers.cpp +++ b/cilksan/reducers.cpp @@ -15,7 +15,7 @@ static void reducer_register(const csi_id_t call_id, unsigned MAAP_count, if (CilkSanImpl.stealable()) { hyper_table *reducer_views = CilkSanImpl.get_or_create_reducer_views(); - reducer_views->insert((bucket){ + reducer_views->insert((hyper_table::bucket){ .key = (uintptr_t)key, .value = {.view = key, .reduce_fn = (__cilk_reduce_fn)reduce_ptr}}); } @@ -139,9 +139,9 @@ void CilkSanImpl_t::reduce_local_views() { // Reduce every reducer view in the table with its leftmost view. int32_t capacity = reducer_views->capacity; - bucket *buckets = reducer_views->buckets; + hyper_table::bucket *buckets = reducer_views->buckets; for (int32_t i = 0; i < capacity; ++i) { - bucket b = buckets[i]; + hyper_table::bucket b = buckets[i]; if (!is_valid(b.key)) continue; @@ -199,16 +199,16 @@ hyper_table::merge_two_hyper_tables(CilkSanImpl_t *__restrict__ tool, int32_t src_capacity = (src->capacity < MIN_HT_CAPACITY) ? src->occupancy : src->capacity; - bucket *src_buckets = src->buckets; + hyper_table::bucket *src_buckets = src->buckets; // Iterate over the contents of the source hyper_table. for (int32_t i = 0; i < src_capacity; ++i) { - struct bucket b = src_buckets[i]; + hyper_table::bucket b = src_buckets[i]; if (!is_valid(b.key)) continue; // For each valid key in the source table, lookup that key in the // destination table. - bucket *dst_bucket = dst->find(b.key); + hyper_table::bucket *dst_bucket = dst->find(b.key); if (nullptr == dst_bucket) { // The destination table does not contain this key. Insert the