From 4493af865e411dd9288c1af40d3358543281e0d2 Mon Sep 17 00:00:00 2001
From: TB Schardl <neboat@mit.edu>
Date: Sat, 6 Jan 2024 14:18:35 -0500
Subject: [PATCH] [hypertable] Update hypertable implementation to match
 updated local-hypertable implementation in cheetah.

---
 cilksan/cilksan_internal.h |   7 +-
 cilksan/hypertable.h       | 343 +++++++++++++++++--------------------
 cilksan/reducers.cpp       |  12 +-
 3 files changed, 169 insertions(+), 193 deletions(-)

diff --git a/cilksan/cilksan_internal.h b/cilksan/cilksan_internal.h
index 1ce074d..7f65d93 100644
--- a/cilksan/cilksan_internal.h
+++ b/cilksan/cilksan_internal.h
@@ -168,7 +168,7 @@ class CilkSanImpl_t {
   // Attempt to look up a view for a reducer.  Returns a pointer to a view if it
   // exists and nullptr if not.
   void *reducer_lookup(hyper_table *reducer_views, uintptr_t key) const {
-    bucket *b = reducer_views->find(key);
+    hyper_table::bucket *b = reducer_views->find(key);
     if (b) {
       assert(key == b->key);
       return b->value.view;
@@ -192,8 +192,9 @@ class CilkSanImpl_t {
     identity(new_view);
 
     // Insert the view into the table of reducer_views.
-    bucket new_bucket = {.key = (uintptr_t)key,
-                         .value = {.view = new_view, .reduce_fn = reduce}};
+    hyper_table::bucket new_bucket = {
+        .key = (uintptr_t)key,
+        .value = {.view = new_view, .reduce_fn = reduce}};
     bool success = reducer_views->insert(new_bucket);
     assert(success && "create_reducer_view failed to insert new reducer.");
     (void)success;
diff --git a/cilksan/hypertable.h b/cilksan/hypertable.h
index 39be162..68270e5 100644
--- a/cilksan/hypertable.h
+++ b/cilksan/hypertable.h
@@ -15,22 +15,24 @@ static inline bool is_valid(uintptr_t key) {
   return !is_empty(key) && !is_tombstone(key);
 }
 
-// An entry in the hash table.
-struct bucket {
-  uintptr_t key = KEY_EMPTY; /* EMPTY, DELETED, or a user-provided pointer. */
-  reducer_base value;
-
-  void make_tombstone() { key = KEY_DELETED; }
-};
-
 class CilkSanImpl_t;
 
 // Hash table of reducers.  We don't need any locking or support for concurrent
 // updates, since the hypertable is local.
 class hyper_table {
-
+public:
   using index_t = uint32_t;
 
+  // An entry in the hash table.
+  struct bucket {
+    uintptr_t key = KEY_EMPTY; /* EMPTY, DELETED, or a user-provided pointer. */
+    index_t hash; /* hash of the key when inserted into the table. */
+    reducer_base value;
+
+    void make_tombstone() { key = KEY_DELETED; }
+  };
+
+private:
   // Data type for indexing the hash table.  This type is used for hashes as
   // well as the table's capacity.
   static constexpr int32_t MIN_CAPACITY = 4;
@@ -42,10 +44,12 @@ class hyper_table {
 
   static inline index_t hash(uintptr_t key_in) {
     uint64_t x = key_in ^ salt;
-    // mix64 from SplitMix.
-    x = (x ^ (x >> 33)) * 0xff51afd7ed558ccdUL;
-    x = (x ^ (x >> 33)) * 0xc4ceb9fe1a85ec53UL;
-    return x;
+    // mix, based on abseil's low-level hash, and convert 64-bit integers into
+    // 32-bit integers.
+    const size_t half_bits = sizeof(uintptr_t) * 4;
+    const uintptr_t low_mask = ((uintptr_t)(1) << half_bits) - 1;
+    uintptr_t v = (x & low_mask) * (x >> half_bits);
+    return (v & low_mask) ^ (v >> half_bits);
   }
 
   static inline index_t get_table_entry(int32_t capacity, uintptr_t key) {
@@ -60,20 +64,105 @@ class hyper_table {
     return i;
   }
 
-  static inline bool continue_search(index_t tgt, index_t hash,
-                                     index_t init_hash) {
+  // For theoretical and practical efficiency, the hash table implements
+  // ordered linear probing --- consecutive hashes in the table are
+  // always stored in sorted order --- in a circular buffer.
+  // Intuitively, this ordering means any hash-table probe for a target
+  // T can stop when it encounters an element in the table whose hash is
+  // greater than T.
+  //
+  // Implementing ordered linear probing on a circular buffer, however,
+  // leads to several tricky cases when probing for an element or its
+  // insertion point.  These cases depend on whether the probe or the
+  // run --- the ordered sequence of hashes in the table --- wraps
+  // around from the end to the beginning of the buffer's allocated
+  // memory.  In general, there are four cases:
+  //
+  // Example case 1: No wrapping (common case)
+  //     Index:  ... | 3 | 4 | 5 | 6 | ...
+  //     Hashes: ... | 3 | 3 | 3 | 5 | ...
+  //     Target: 4
+  //   The probe starts at index 4 and scans increasing indices, stopping when
+  //   it sees hash = 5 at index 6.
+  //
+  // Example case 2: Probe and run both wrap
+  //     Index:  | 0 | 1 | 2 | ... | 6 | 7 |
+  //     Hashes: | 6 | 7 | 0 | ... | 6 | 6 |
+  //     Target: 7
+  //   The run of 6's wraps around, as does the probe for 7.
+  //
+  // Example case 3: Probe does not wrap, run does wrap
+  //     Index:  | 0 | 1 | 2 | ... | 6 | 7 |
+  //     Hashes: | 6 | 7 | 0 | ... | 6 | 6 |
+  //     Target: 0
+  //   The run of 6's and 7's wrap around.  The probe for 0 starts in the middle
+  //   of this wrapped run and must continue past it, even though the hashes in
+  //   the run are larger than the target.
+  //
+  // Example case 4: Probe wraps, run does not wrap
+  //     Index:  | 0 | 1 | 2 | ... | 6 | 7 |
+  //     Hashes: | 6 | 0 | 1 | ... | 6 | 6 |
+  //     Target: 7
+  //   After the wrapped run of 6's is a run starting at 0, which does not wrap.
+  //   The probe for 7 wraps around before encountering the 0.  The probe should
+  //   stop at that point, even though 0 is smaller than 7.
+  //
+  // We characterize these four cases in terms of the following variables:
+  //
+  // - T: The target hash value being probed for.
+  // - i: The current index in the table being examined in the probe.
+  // - H[i]: The hash value of the key at index i, assuming that table entry is
+  //   occupied.
+  //
+  // We can identify cases where the probe or the run wraps around the end of
+  // the circular buffer by comparing i to T (for the probe) and i to H[i] (for
+  // the run).  A probe starts at i == T and proceeds to scan increasing values
+  // of i (mod table size).  Therefore, we typically expect i >= T and i >=
+  // H[i].  But when wrapping occurs, i will be smaller than the hash, that is,
+  // i < T when the probe wraps and i < H[i] when the run wraps.
+  //
+  // We can describe these four cases in terms of these variables as follows:
+  //   Normal Probe, Normal Run (NP+NR):   T <= i and H[i] <= i
+  //     The probe _terminates_ at i where T < H[i].
+  //   Wrapped Probe, Wrapped Run (WP+WR): T > i and H[i] > i
+  //     The probe _terminates_ at i where T < H[i].
+  //   Normal Probe, Wrapped Run (NP+WR):  T <= i and H[i] > i
+  //     The probe _must continue_ even though T < H[i].
+  //   Wrapped Probe, Normal Run (WP+NR):  T > i and H[i] <= i
+  //     The probe _must terminate_ even though T > H[i].
+  //
+  // The table uses the following bit trick to handle all of these cases simply:
+  //
+  //   Continue the probe if and only if i-T <= i-H[i], using an _unsigned
+  //   integer_ comparison.
+  //
+  // Intuitively, this trick makes the case of wrapping around the table
+  // coincide with unsigned integer overflow, allowing the same
+  // comparison to be used in all cases.
+  //
+  // We can justify this bit trick in all cases:
+  //
+  //   NP+NR and WP+WR: The original termination condition, T < H[i], implies
+  //   that -T > -H[i].  Adding i to both sides does not affect the comparison.
+  //
+  //   NP+WR: The wrapped run, H[i] > i, implies that i-H[i] is negative, which
+  //   becomes are large positive unsigned integer.  Meanwhile, i-T is a small
+  //   positive unsigned integer, because i > T.  Hence, i-T < i-H[i], which
+  //   correctly implies that the probe must continue.
+  //
+  //   WP+NR: The wrapped probe, T > i, implies that i-T is negative, which
+  //   becomes a large positive unsigned integer.  Meanwhile, i >= H[i],
+  //   implying that i-H[i] is a small positive unsigned integer.  Hence, i-T >
+  //   i-H[i], which correctly implies that the probe should stop.
+  //
+  // Note: One can formulate this bit trick as T-i >= H[i]-i instead, preserving
+  // the direction of the inequality.  I formulate the trick this way simply
+  // because I prefer that the common case involve comparisons of small positive
+  // integers.
+
+  static inline bool continue_probe(index_t tgt, index_t hash, index_t idx) {
     // NOTE: index_t must be unsigned for this check to work.
-    index_t norm_tgt = tgt - init_hash;
-    index_t norm_hash = hash - init_hash;
-    return norm_tgt >= norm_hash;
-  }
-
-  static inline bool stop_insert_scan(index_t tgt, index_t hash,
-                                      index_t init_hash) {
-    // NOTE: index_t must be unsigned for this check to work.
-    index_t norm_tgt = tgt - init_hash;
-    index_t norm_hash = hash - init_hash;
-    return norm_tgt <= norm_hash;
+    return (idx - tgt) <= (idx - hash);
   }
 
   // Constant used to determine the target maximum load factor.  The table will
@@ -131,6 +220,8 @@ class hyper_table {
     buckets = bucket_array_create(new_capacity);
     capacity = new_capacity;
     occupancy = 0;
+    // Set count of insertions and removals to prevent insertions into
+    // new table from triggering another rebuild.
     ins_rm_count = -old_occupancy;
 
     for (int32_t i = 0; i < old_capacity; ++i) {
@@ -164,69 +255,39 @@ class hyper_table {
     // Target hash
     index_t tgt = get_table_entry(capacity, key);
     bucket *buckets = this->buckets;
-    // Start the search at the target hash
+    // Start the probe at the target hash
     index_t i = tgt;
-    index_t init_hash = (index_t)(-1);
     do {
       uintptr_t curr_key = buckets[i].key;
-      // If we find the key, return that bucket.
+      // Found the key?  Return that bucket.
       // TODO: Consider moving this bucket to the front of the run.
       if (key == curr_key)
         return &buckets[i];
 
-      // If we find an empty entry, the search failed.
+      // Found an empty entry?  The probe failed.
       if (is_empty(curr_key))
         return nullptr;
 
-      // If we find a tombstone, continue the search.
+      // Found a tombstone?  Continue the probe.
       if (is_tombstone(curr_key)) {
         i = inc_index(i, capacity);
         continue;
       }
 
-      // Otherwise we have another valid key that does not match.
-      // Record this hash for future search steps.
-      init_hash = get_table_entry(capacity, curr_key);
-      if ((tgt > i && i >= init_hash) ||
-          (tgt < init_hash && ((tgt > i) == (init_hash > i)))) {
-        // The search will stop at init_hash anyway, so return early.
-        return nullptr;
-      }
-      break;
-    } while (i != tgt);
-
-    do {
-      uintptr_t curr_key = buckets[i].key;
-      // If we find the key, return that bucket.
-      // TODO: Consider moving this bucket to the front of the run.
-      if (key == curr_key)
-        return &buckets[i];
-
-      // If we find an empty entry, the search failed.
-      if (is_empty(curr_key))
-        return nullptr;
+      // Otherwise, buckets[i] is another valid key that does not match.
+      index_t curr_hash = buckets[i].hash;
 
-      // If we find a tombstone, continue the search.
-      if (is_tombstone(curr_key)) {
+      if (continue_probe(tgt, curr_hash, i)) {
         i = inc_index(i, capacity);
         continue;
       }
 
-      // Otherwise we have another valid key that does not match.
-      // Compare the hashes to decide whether or not to continue the
-      // search.
-      index_t curr_hash = get_table_entry(capacity, curr_key);
-      if (continue_search(tgt, curr_hash, init_hash)) {
-        i = inc_index(i, capacity);
-        continue;
-      }
-
-      // If none of the above cases match, then the search failed to
+      // If none of the above cases match, then the probe failed to
       // find the key.
       return nullptr;
     } while (i != tgt);
 
-    // The search failed to find the key.
+    // The probe failed to find the key.
     return nullptr;
   }
 
@@ -267,7 +328,7 @@ class hyper_table {
           }
         }
 
-        // The key is not aleady in the table.  Append the bucket.
+        // The key is not already in the table.  Append the bucket.
         buckets[occupancy] = b;
         ++this->occupancy;
         return true;
@@ -288,7 +349,8 @@ class hyper_table {
     }
 
     // Target hash
-    index_t tgt = get_table_entry(capacity, b.key);
+    const index_t tgt = get_table_entry(capacity, b.key);
+    b.hash = tgt;
 
     // If we find an empty entry, insert the bucket there.
     if (is_empty(buckets[tgt].key)) {
@@ -298,113 +360,20 @@ class hyper_table {
       return true;
     }
 
-    // Search for the place to insert b.
+    // Probe for the place to insert b.
     index_t i = tgt;
 
-    // Searching for an appropriate insertion point requires handling four
-    // conditions based on tgt --- the target index of the item being inserted
-    // --- i --- the current index in the hash table being examined in the
-    // search --- and hash --- the target index of the item at index i.
-    //
-    // Generally speaking, items that hash to the same index appear next to each
-    // other in the table, and items that hash to adjacent indices (modulo the
-    // table's capacity) appear next to each other in sorted order based on the
-    // indices they hash to.  These invariants hold with the exception that
-    // tombstones can exist between items in the table that would otherwise be
-    // adjacent.  Let a _run_ be a sequence of hash values for consecutive valid
-    // entries in the table (modulo the table's capacity).
-    //
-    // The search must accommodate the following 4 conditions:
-    // - Non-wrapped search (NS): tgt <= i
-    // - Wrapped search (WS):     tgt > i
-    // - Non-wrapped run (NR):    hash <= i
-    // - Wrapped run (WR):        hash > i
-    //
-    // These conditions lead to 4 cases:
-    // - NS+NR: hash <= tgt <= i:
-    //   Common case.  Search terminates when hash > tgt.
-    // - WS+WR: i < hash <= tgt:
-    //   Like NS+NR, search terminates when hash > tgt.
-    // - NS+WR: tgt <= i < hash:
-    //   The search needs to treat tgt as larger than hash.  Given init --- the
-    //   hash of the first non-tombstone encountered --- comparing shifted
-    //   values of tgt and hash --- specifically, X-init+2^k mod 2^k, where X
-    //   \in {tgt, hash} --- causes tgt to become large and allows the search to
-    //   terminate when shifted hash > shifted tgt.
-    // - WS+NR: hash <= i < tgt:
-    //   The search needs to stop search before wrapping and treat hash as
-    //   larger than tgt.
-    //   Given init, computing on shifted values of tgt and hash --- i.e.,
-    //   X-init+2^k mod 2^k where X \in {tgt, hash} --- causes hash to become
-    //   large and allows the search to terminate when shifted hash > shifted
-    //   tgt.
-
-    // Probe to find either a place to insert b or another valid entry in the
-    // hash table, whose hash is then stored in init_hash.
-    index_t init_hash = (index_t)(-1);
-    do {
-      uintptr_t curr_key = buckets[i].key;
-      // If we find the key, overwrite that bucket.
-      // TODO: Reconsider what we do in this case.
-      if (b.key == curr_key) {
-        buckets[i].value = b.value;
-        return true;
-      }
-
-      // If we find an empty entry, insert b there.
-      if (is_empty(curr_key)) {
-        buckets[i] = b;
-        ++this->occupancy;
-        ++this->ins_rm_count;
-        return true;
-      }
-
-      if (is_tombstone(curr_key)) {
-        // Check whether the next entry is valid.
-        index_t next_i = inc_index(i, capacity);
-        uintptr_t next_key = buckets[next_i].key;
-        if (is_valid(next_key)) {
-          // Record the hash of the first valid entry found and exit the loop.
-          init_hash = get_table_entry(capacity, next_key);
-          // Check if the search can be terminated early, either because
-          // init_hash == tgt, or we're in the WS+NR case, or we're terminating
-          // the search in the NS+NR or WS+WR cases.
-          if ((tgt == init_hash) || (tgt > next_i && next_i >= init_hash) ||
-              (tgt < init_hash && ((tgt > next_i) == (init_hash > next_i)))) {
-            // The hash at the end of this run of tombstones would terminate the
-            // search.  Because there are only tombstones between tgt and
-            // next_i, inserting b at tgt is safe.
-            buckets[tgt] = b;
-            ++this->occupancy;
-            ++this->ins_rm_count;
-            return true;
-          }
-          break;
-        }
-        // We found a tombstone followed by an invalid entry (tombstone or
-        // empty).  Continue searching.
-        i = next_i;
-        continue;
-      }
-
-      // Record the hash of the first valid entry found and exit the loop.
-      init_hash = get_table_entry(capacity, curr_key);
-      break;
-
-    } while (i != tgt);
-    assert(init_hash != (index_t)(-1));
-
-    // Use init_hash to continue probing to find a place to insert b.
+    const index_t probe_end = tgt;
     do {
       uintptr_t curr_key = buckets[i].key;
-      // If we find the key, overwrite that bucket.
-      // TODO: Reconsider what we do in this case.
+      // Found the key?  Overwrite that bucket.
+      // TODO: Reconsider what to do in this case.
       if (b.key == curr_key) {
         buckets[i].value = b.value;
         return true;
       }
 
-      // If we find an empty entry, insert b there.
+      // Found an empty entry?  Insert b there.
       if (is_empty(curr_key)) {
         buckets[i] = b;
         ++this->occupancy;
@@ -412,52 +381,58 @@ class hyper_table {
         return true;
       }
 
-      // If we find a tombstone, check whether to insert b here, and finish the
-      // insert if so.
+      // Found a tombstone?
       if (is_tombstone(curr_key)) {
         index_t current_tomb = i;
-        // Scan all consecutive tombstones from i.
+        // Scan consecutive tombstones from i.
         index_t next_i = inc_index(i, capacity);
         uintptr_t tomb_end = buckets[next_i].key;
-        while (is_tombstone(tomb_end)) {
+        while (next_i != probe_end && is_tombstone(tomb_end)) {
           next_i = inc_index(next_i, capacity);
           tomb_end = buckets[next_i].key;
         }
-        // If the next entry is empty, then the search would stop.  It's safe to
-        // insert the bucket at the tombstone.
+
+        // If the next entry is empty, then the probe would stop.  It's
+        // safe to insert the bucket at the tombstone at i.
         if (is_empty(tomb_end)) {
           buckets[current_tomb] = b;
           ++this->occupancy;
           ++this->ins_rm_count;
           return true;
         }
-        // Check if the hash of the element at the end of this run of tombstones
-        // would terminate the search.
-        index_t tomb_end_hash = get_table_entry(capacity, tomb_end);
-        if (stop_insert_scan(tgt, tomb_end_hash, init_hash)) {
-          // It's safe to insert the element at the current tombstone.
+
+        // Check if the hash at the end of this run of tombstones would
+        // terminate the probe or if the probe has traversed the whole
+        // table.
+        index_t tomb_end_hash = buckets[next_i].hash;
+        if (next_i == probe_end ||
+            !continue_probe(tgt, tomb_end_hash, next_i)) {
+          // It's safe to insert b at the current tombstone.
           buckets[current_tomb] = b;
           ++this->occupancy;
           ++this->ins_rm_count;
           return true;
         }
+
         // None of the locations among these consecutive tombstones are
-        // appropriate for this bucket.  Continue the search.
-        i = inc_index(next_i, capacity);
+        // appropriate for this bucket.  Continue the probe.
+        i = next_i;
         continue;
       }
 
-      // Otherwise we have another valid key that does not match.  Compare the
-      // hashes to decide whether or not to continue the search.
-      index_t curr_hash = get_table_entry(capacity, curr_key);
-      if (continue_search(tgt, curr_hash, init_hash)) {
+      // Otherwise this entry contains another valid key that does
+      // not match.  Compare the hashes to decide whether or not to
+      // continue the probe.
+      index_t curr_hash = buckets[i].hash;
+      if (continue_probe(tgt, curr_hash, i)) {
         i = inc_index(i, capacity);
         continue;
       }
 
-      // This is an appropriate location to insert the bucket.  Stop the search.
+      // This is an appropriate location to insert the bucket.  Stop
+      // the probe.
       break;
-    } while (i != tgt);
+    } while (i != probe_end);
 
     index_t insert_tgt = i;
     // The search found a place to insert the bucket, but it's occupied.  Insert
diff --git a/cilksan/reducers.cpp b/cilksan/reducers.cpp
index 4cb1d11..3516940 100644
--- a/cilksan/reducers.cpp
+++ b/cilksan/reducers.cpp
@@ -15,7 +15,7 @@ static void reducer_register(const csi_id_t call_id, unsigned MAAP_count,
 
   if (CilkSanImpl.stealable()) {
     hyper_table *reducer_views = CilkSanImpl.get_or_create_reducer_views();
-    reducer_views->insert((bucket){
+    reducer_views->insert((hyper_table::bucket){
         .key = (uintptr_t)key,
         .value = {.view = key, .reduce_fn = (__cilk_reduce_fn)reduce_ptr}});
   }
@@ -139,9 +139,9 @@ void CilkSanImpl_t::reduce_local_views() {
 
   // Reduce every reducer view in the table with its leftmost view.
   int32_t capacity = reducer_views->capacity;
-  bucket *buckets = reducer_views->buckets;
+  hyper_table::bucket *buckets = reducer_views->buckets;
   for (int32_t i = 0; i < capacity; ++i) {
-    bucket b = buckets[i];
+    hyper_table::bucket b = buckets[i];
     if (!is_valid(b.key))
       continue;
 
@@ -199,16 +199,16 @@ hyper_table::merge_two_hyper_tables(CilkSanImpl_t *__restrict__ tool,
 
   int32_t src_capacity =
       (src->capacity < MIN_HT_CAPACITY) ? src->occupancy : src->capacity;
-  bucket *src_buckets = src->buckets;
+  hyper_table::bucket *src_buckets = src->buckets;
   // Iterate over the contents of the source hyper_table.
   for (int32_t i = 0; i < src_capacity; ++i) {
-    struct bucket b = src_buckets[i];
+    hyper_table::bucket b = src_buckets[i];
     if (!is_valid(b.key))
       continue;
 
     // For each valid key in the source table, lookup that key in the
     // destination table.
-    bucket *dst_bucket = dst->find(b.key);
+    hyper_table::bucket *dst_bucket = dst->find(b.key);
 
     if (nullptr == dst_bucket) {
       // The destination table does not contain this key.  Insert the