diff --git a/src/snmalloc/backend_helpers/statsrange.h b/src/snmalloc/backend_helpers/statsrange.h
index 8548be9cb..7e92a801a 100644
--- a/src/snmalloc/backend_helpers/statsrange.h
+++ b/src/snmalloc/backend_helpers/statsrange.h
@@ -17,8 +17,7 @@ namespace snmalloc
     {
       using ContainsParent<ParentRange>::parent;
 
-      static inline std::atomic<size_t> current_usage{};
-      static inline std::atomic<size_t> peak_usage{};
+      static inline Stat usage{};
 
     public:
       static constexpr bool Aligned = ParentRange::Aligned;
@@ -31,34 +30,26 @@ namespace snmalloc
 
       CapPtr<void, ChunkBounds> alloc_range(size_t size)
       {
-        auto result = parent.alloc_range(size);
-        if (result != nullptr)
-        {
-          auto prev = current_usage.fetch_add(size);
-          auto curr = peak_usage.load();
-          while (curr < prev + size)
-          {
-            if (peak_usage.compare_exchange_weak(curr, prev + size))
-              break;
-          }
-        }
-        return result;
+        auto r = parent.alloc_range(size);
+        if (r != nullptr)
+          usage += size;
+        return r;
       }
 
       void dealloc_range(CapPtr<void, ChunkBounds> base, size_t size)
       {
-        current_usage -= size;
+        usage -= size;
         parent.dealloc_range(base, size);
       }
 
       size_t get_current_usage()
       {
-        return current_usage.load();
+        return usage.get_curr();
       }
 
       size_t get_peak_usage()
       {
-        return peak_usage.load();
+        return usage.get_peak();
       }
     };
   };
diff --git a/src/snmalloc/ds_core/ds_core.h b/src/snmalloc/ds_core/ds_core.h
index 2083190bc..11115df73 100644
--- a/src/snmalloc/ds_core/ds_core.h
+++ b/src/snmalloc/ds_core/ds_core.h
@@ -15,3 +15,4 @@
 #include "ptrwrap.h"
 #include "redblacktree.h"
 #include "seqset.h"
+#include "stats.h"
diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h
new file mode 100644
index 000000000..6fa203769
--- /dev/null
+++ b/src/snmalloc/ds_core/stats.h
@@ -0,0 +1,92 @@
+#include "defines.h"
+
+#include <atomic>
+#include <cstddef>
+
+namespace snmalloc
+{
+  /**
+   * Very basic statistic that tracks current and peak values.
+   */
+  class Stat
+  {
+  private:
+    std::atomic<size_t> curr{0};
+    std::atomic<size_t> peak{0};
+
+  public:
+    void increase(size_t amount)
+    {
+      size_t c = (curr += amount);
+      size_t p = peak.load(std::memory_order_relaxed);
+      while (c > p)
+      {
+        if (peak.compare_exchange_strong(p, c))
+          break;
+      }
+    }
+
+    void decrease(size_t amount)
+    {
+      size_t prev = curr.fetch_sub(amount);
+      SNMALLOC_ASSERT_MSG(
+        prev >= amount, "prev = {}, amount = {}", prev, amount);
+      UNUSED(prev);
+    }
+
+    size_t get_curr()
+    {
+      return curr.load(std::memory_order_relaxed);
+    }
+
+    size_t get_peak()
+    {
+      return peak.load(std::memory_order_relaxed);
+    }
+
+    void operator+=(size_t amount)
+    {
+      increase(amount);
+    }
+
+    void operator-=(size_t amount)
+    {
+      decrease(amount);
+    }
+
+    void operator++()
+    {
+      increase(1);
+    }
+
+    void operator--()
+    {
+      decrease(1);
+    }
+  };
+
+  /**
+   * Very basic statistic that can only grow.  Not thread-safe.
+   */
+  class MonotoneLocalStat
+  {
+    std::atomic<size_t> value{0};
+
+  public:
+    void operator++(int)
+    {
+      value.fetch_add(1, std::memory_order_relaxed);
+    }
+
+    void operator+=(const MonotoneLocalStat& other)
+    {
+      auto v = other.value.load(std::memory_order_relaxed);
+      value.fetch_add(v, std::memory_order_relaxed);
+    }
+
+    size_t operator*()
+    {
+      return value.load(std::memory_order_relaxed);
+    }
+  };
+} // namespace snmalloc
diff --git a/src/snmalloc/mem/allocstats.h b/src/snmalloc/mem/allocstats.h
new file mode 100644
index 000000000..bfa789c36
--- /dev/null
+++ b/src/snmalloc/mem/allocstats.h
@@ -0,0 +1,44 @@
+#include "../ds_core/ds_core.h"
+#include "sizeclasstable.h"
+
+#include <array>
+
+namespace snmalloc
+{
+  struct AllocStat
+  {
+    MonotoneLocalStat objects_allocated{};
+    MonotoneLocalStat objects_deallocated{};
+    MonotoneLocalStat slabs_allocated{};
+    MonotoneLocalStat slabs_deallocated{};
+  };
+
+  class AllocStats
+  {
+    std::array<AllocStat, SIZECLASS_REP_SIZE> sizeclass{};
+
+  public:
+    AllocStat& operator[](sizeclass_t index)
+    {
+      auto i = index.raw();
+      return sizeclass[i];
+    }
+
+    AllocStat& operator[](smallsizeclass_t index)
+    {
+      return sizeclass[sizeclass_t::from_small_class(index).raw()];
+    }
+
+    void operator+=(const AllocStats& other)
+    {
+      for (size_t i = 0; i < SIZECLASS_REP_SIZE; i++)
+      {
+        sizeclass[i].objects_allocated += other.sizeclass[i].objects_allocated;
+        sizeclass[i].objects_deallocated +=
+          other.sizeclass[i].objects_deallocated;
+        sizeclass[i].slabs_allocated += other.sizeclass[i].slabs_allocated;
+        sizeclass[i].slabs_deallocated += other.sizeclass[i].slabs_deallocated;
+      }
+    }
+  };
+} // namespace snmalloc
\ No newline at end of file
diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h
index c7fc79b72..1bb126a51 100644
--- a/src/snmalloc/mem/corealloc.h
+++ b/src/snmalloc/mem/corealloc.h
@@ -111,6 +111,11 @@ namespace snmalloc
      */
     Ticker<typename Config::Pal> ticker;
 
+    /**
+     * Tracks this allocators memory usage
+     */
+    AllocStats stats;
+
     /**
      * The message queue needs to be accessible from other threads
      *
@@ -364,6 +369,8 @@ namespace snmalloc
         // don't touch the cache lines at this point in snmalloc_check_client.
         auto start = clear_slab(meta, sizeclass);
 
+        stats[sizeclass].slabs_deallocated++;
+
         Config::Backend::dealloc_chunk(
           get_backend_local_state(),
           *meta,
@@ -400,6 +407,8 @@ namespace snmalloc
         // Remove from set of fully used slabs.
         meta->node.remove();
 
+        stats[entry.get_sizeclass()].slabs_deallocated++;
+
         Config::Backend::dealloc_chunk(
           get_backend_local_state(), *meta, p, size);
 
@@ -478,14 +487,18 @@ namespace snmalloc
                            SNMALLOC_FAST_PATH_LAMBDA {
                              return capptr_domesticate<Config>(local_state, p);
                            };
-      auto cb = [this,
-                 &need_post](freelist::HeadPtr msg) SNMALLOC_FAST_PATH_LAMBDA {
+
+      size_t received_bytes = 0;
+
+      auto cb = [this, &need_post, &received_bytes](
+                  freelist::HeadPtr msg) SNMALLOC_FAST_PATH_LAMBDA {
 #ifdef SNMALLOC_TRACING
         message<1024>("Handling remote");
 #endif
 
         auto& entry =
           Config::Backend::template get_metaentry(snmalloc::address_cast(msg));
+        received_bytes += sizeclass_full_to_size(entry.get_sizeclass());
 
         handle_dealloc_remote(entry, msg.as_void(), need_post);
 
@@ -514,6 +527,9 @@ namespace snmalloc
         post();
       }
 
+      // Push size to global statistics
+      RemoteDeallocCache::remote_inflight -= received_bytes;
+
       return action(args...);
     }
 
@@ -542,10 +558,7 @@ namespace snmalloc
       }
       else
       {
-        if (
-          !need_post &&
-          !attached_cache->remote_dealloc_cache.reserve_space(entry))
-          need_post = true;
+        need_post |= attached_cache->remote_dealloc_cache.reserve_space(entry);
         attached_cache->remote_dealloc_cache
           .template dealloc<sizeof(CoreAllocator)>(
             entry.get_remote()->trunc_id(), p.as_void());
@@ -668,13 +681,14 @@ namespace snmalloc
       // pointers
       auto& entry =
         Config::Backend::template get_metaentry(snmalloc::address_cast(p));
-      if (SNMALLOC_LIKELY(dealloc_local_object_fast(entry, p, entropy)))
+      if (SNMALLOC_LIKELY(dealloc_local_object_fast<false>(entry, p, entropy)))
         return;
 
       dealloc_local_object_slow(p, entry);
     }
 
-    SNMALLOC_FAST_PATH static bool dealloc_local_object_fast(
+    template<bool Statistics = true>
+    SNMALLOC_FAST_PATH bool dealloc_local_object_fast(
       const PagemapEntry& entry,
       CapPtr<void, capptr::bounds::Alloc> p,
       LocalEntropy& entropy)
@@ -695,6 +709,10 @@ namespace snmalloc
       // Update the head and the next pointer in the free list.
       meta->free_queue.add(cp, key, entropy);
 
+      if constexpr (Statistics)
+      {
+        stats[entry.get_sizeclass()].objects_deallocated++;
+      }
       return SNMALLOC_LIKELY(!meta->return_object());
     }
 
@@ -741,6 +759,7 @@ namespace snmalloc
         }
 
         auto r = finish_alloc<zero_mem, Config>(p, sizeclass);
+        stats[sizeclass].objects_allocated++;
         return ticker.check_tick(r);
       }
       return small_alloc_slow<zero_mem>(sizeclass, fast_free_list);
@@ -813,6 +832,9 @@ namespace snmalloc
       }
 
       auto r = finish_alloc<zero_mem, Config>(p, sizeclass);
+
+      stats[sizeclass].objects_allocated++;
+      stats[sizeclass].slabs_allocated++;
       return ticker.check_tick(r);
     }
 
@@ -834,7 +856,7 @@ namespace snmalloc
       {
         auto p_wild = message_queue().destroy();
         auto p_tame = domesticate(p_wild);
-
+        size_t received_bytes = 0;
         while (p_tame != nullptr)
         {
           bool need_post = true; // Always going to post, so ignore.
@@ -842,9 +864,11 @@ namespace snmalloc
             p_tame->atomic_read_next(RemoteAllocator::key_global, domesticate);
           const PagemapEntry& entry =
             Config::Backend::get_metaentry(snmalloc::address_cast(p_tame));
+          received_bytes += sizeclass_full_to_size(entry.get_sizeclass());
           handle_dealloc_remote(entry, p_tame.as_void(), need_post);
           p_tame = n_tame;
         }
+        RemoteDeallocCache::remote_inflight -= received_bytes;
       }
       else
       {
@@ -986,6 +1010,11 @@ namespace snmalloc
 
       return debug_is_empty_impl(result);
     }
+
+    const AllocStats& get_stats()
+    {
+      return stats;
+    }
   };
 
   /**
diff --git a/src/snmalloc/mem/globalalloc.h b/src/snmalloc/mem/globalalloc.h
index dc9528f66..e618ed729 100644
--- a/src/snmalloc/mem/globalalloc.h
+++ b/src/snmalloc/mem/globalalloc.h
@@ -87,6 +87,9 @@ namespace snmalloc
       }
     }
 
+    if (result == nullptr)
+      SNMALLOC_CHECK(RemoteDeallocCache::remote_inflight.get_curr() == 0);
+
     if (result != nullptr)
     {
       *result = okay;
@@ -134,4 +137,78 @@ namespace snmalloc
     }
   }
 
+  template<SNMALLOC_CONCEPT(IsConfig) Config>
+  inline static void get_stats(AllocStats& stats)
+  {
+    auto alloc = AllocPool<Config>::iterate();
+    while (alloc != nullptr)
+    {
+      stats += alloc->get_stats();
+      alloc = AllocPool<Config>::iterate(alloc);
+    }
+  }
+
+  template<SNMALLOC_CONCEPT(IsConfig) Config>
+  inline static void print_alloc_stats()
+  {
+    static std::atomic<size_t> dump{0};
+
+    auto l_dump = dump++;
+    if (l_dump == 0)
+    {
+      message<1024>(
+        "snmalloc_allocs,dumpid,sizeclass,size,allocated,deallocated,in_use,"
+        "bytes,slabs allocated,slabs deallocated,slabs in_use,slabs bytes");
+      message<1024>(
+        "snmalloc_totals,dumpid,backend bytes,peak backend "
+        "bytes,requested,slabs requested bytes,remote inflight bytes,allocator "
+        "count");
+    }
+
+    AllocStats stats;
+    snmalloc::get_stats<Config>(stats);
+    size_t total_live{0};
+    size_t total_live_slabs{0};
+    for (size_t i = 0; i < snmalloc::SIZECLASS_REP_SIZE; i++)
+    {
+      auto sc = snmalloc::sizeclass_t::from_raw(i);
+      auto allocated = *stats[sc].objects_allocated;
+      auto deallocated = *stats[sc].objects_deallocated;
+      auto slabs_allocated = *stats[sc].slabs_allocated;
+      auto slabs_deallocated = *stats[sc].slabs_deallocated;
+      if (allocated == 0 && deallocated == 0)
+        continue;
+      auto size = snmalloc::sizeclass_full_to_size(sc);
+      auto slab_size = snmalloc::sizeclass_full_to_slab_size(sc);
+      auto in_use = allocated - deallocated;
+      auto amount = in_use * size;
+      total_live += amount;
+      auto in_use_slabs = slabs_allocated - slabs_deallocated;
+      auto amount_slabs = in_use_slabs * slab_size;
+      total_live_slabs += amount_slabs;
+
+      snmalloc::message<1024>(
+        "snmalloc_allocs,{},{},{},{},{},{},{},{},{},{},{}",
+        l_dump,
+        i,
+        size,
+        allocated,
+        deallocated,
+        in_use,
+        amount,
+        slabs_allocated,
+        slabs_deallocated,
+        in_use_slabs,
+        amount_slabs);
+    }
+    snmalloc::message<1024>(
+      "snmalloc_totals,{},{},{},{},{},{},{}",
+      l_dump,
+      Config::Backend::get_current_usage(),
+      Config::Backend::get_peak_usage(),
+      total_live,
+      total_live_slabs,
+      RemoteDeallocCache::remote_inflight.get_curr(),
+      Config::pool().get_count());
+  }
 } // namespace snmalloc
diff --git a/src/snmalloc/mem/localalloc.h b/src/snmalloc/mem/localalloc.h
index c85d30b2b..f8110e049 100644
--- a/src/snmalloc/mem/localalloc.h
+++ b/src/snmalloc/mem/localalloc.h
@@ -211,6 +211,12 @@ namespace snmalloc
             chunk.unsafe_ptr(), bits::next_pow2(size));
         }
 
+        if (chunk.unsafe_ptr() != nullptr)
+        {
+          auto sc = size_to_sizeclass_full(size);
+          core_alloc->stats[sc].objects_allocated++;
+          core_alloc->stats[sc].slabs_allocated++;
+        }
         return capptr_chunk_is_alloc(capptr_to_user_address_control(chunk));
       });
     }
@@ -246,7 +252,7 @@ namespace snmalloc
       };
 
       return local_cache.template alloc<zero_mem, Config>(
-        domesticate, size, slowpath);
+        domesticate, core_alloc->stats, size, slowpath);
     }
 
     /**
@@ -418,7 +424,7 @@ namespace snmalloc
         message<1024>("flush(): core_alloc={}", core_alloc);
 #endif
         local_cache.remote_allocator = &Config::unused_remote;
-        local_cache.remote_dealloc_cache.capacity = 0;
+        local_cache.remote_dealloc_cache.cache_bytes = REMOTE_CACHE;
       }
     }
 
@@ -648,7 +654,7 @@ namespace snmalloc
       {
         dealloc_cheri_checks(p_tame.unsafe_ptr());
 
-        if (SNMALLOC_LIKELY(CoreAlloc::dealloc_local_object_fast(
+        if (SNMALLOC_LIKELY(core_alloc->dealloc_local_object_fast(
               entry, p_tame, local_cache.entropy)))
           return;
         core_alloc->dealloc_local_object_slow(p_tame, entry);
diff --git a/src/snmalloc/mem/localcache.h b/src/snmalloc/mem/localcache.h
index cfbbaa576..0bac2541f 100644
--- a/src/snmalloc/mem/localcache.h
+++ b/src/snmalloc/mem/localcache.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "../ds/ds.h"
+#include "allocstats.h"
 #include "freelist.h"
 #include "remotecache.h"
 #include "sizeclasstable.h"
@@ -94,8 +95,11 @@ namespace snmalloc
       typename Config,
       typename Slowpath,
       typename Domesticator>
-    SNMALLOC_FAST_PATH capptr::Alloc<void>
-    alloc(Domesticator domesticate, size_t size, Slowpath slowpath)
+    SNMALLOC_FAST_PATH capptr::Alloc<void> alloc(
+      Domesticator domesticate,
+      AllocStats& stats,
+      size_t size,
+      Slowpath slowpath)
     {
       auto& key = entropy.get_free_list_key();
       smallsizeclass_t sizeclass = size_to_sizeclass(size);
@@ -103,6 +107,7 @@ namespace snmalloc
       if (SNMALLOC_LIKELY(!fl.empty()))
       {
         auto p = fl.take(key, domesticate);
+        stats[sizeclass].objects_allocated++;
         return finish_alloc<zero_mem, Config>(p, sizeclass);
       }
       return slowpath(sizeclass, &fl);
diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h
index 36737207d..9dc686e43 100644
--- a/src/snmalloc/mem/pool.h
+++ b/src/snmalloc/mem/pool.h
@@ -34,9 +34,15 @@ namespace snmalloc
 
     FlagWord lock{};
     capptr::Alloc<T> list{nullptr};
+    std::atomic<size_t> count{0};
 
   public:
     constexpr PoolState() = default;
+
+    size_t get_count()
+    {
+      return count.load(std::memory_order_relaxed);
+    }
   };
 
   /**
@@ -156,6 +162,8 @@ namespace snmalloc
       p->list_next = pool.list;
       pool.list = p;
 
+      pool.count++;
+
       p->set_in_use();
       return p.unsafe_ptr();
     }
diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h
index 96f5e0973..a53737f24 100644
--- a/src/snmalloc/mem/remotecache.h
+++ b/src/snmalloc/mem/remotecache.h
@@ -19,14 +19,17 @@ namespace snmalloc
   {
     std::array<freelist::Builder<false>, REMOTE_SLOTS> list;
 
+    static inline Stat remote_inflight;
+
     /**
-     * The total amount of memory we are waiting for before we will dispatch
-     * to other allocators. Zero can mean we have not initialised the allocator
-     * yet. This is initialised to the 0 so that we always hit a slow path to
-     * start with, when we hit the slow path and need to dispatch everything, we
-     * can check if we are a real allocator and lazily provide a real allocator.
+     * The total amount of bytes of memory in the cache.
+     *
+     * REMOTE_CACHE is used as the initial value, so that we always hit a slow
+     * path to start with, when we hit the slow path and need to dispatch
+     * everything, we can check if we are a real allocator and lazily provide a
+     * real allocator.
      */
-    int64_t capacity{0};
+    size_t cache_bytes{REMOTE_CACHE};
 
 #ifndef NDEBUG
     bool initialised = false;
@@ -56,13 +59,10 @@ namespace snmalloc
     template<typename Entry>
     SNMALLOC_FAST_PATH bool reserve_space(const Entry& entry)
     {
-      auto size =
-        static_cast<int64_t>(sizeclass_full_to_size(entry.get_sizeclass()));
+      auto size = sizeclass_full_to_size(entry.get_sizeclass());
 
-      bool result = capacity > size;
-      if (result)
-        capacity -= size;
-      return result;
+      cache_bytes += size;
+      return cache_bytes < REMOTE_CACHE;
     }
 
     template<size_t allocator_size>
@@ -91,6 +91,8 @@ namespace snmalloc
                              return capptr_domesticate<Config>(local_state, p);
                            };
 
+      // We are about to post cache_bytes bytes to other allocators.
+      remote_inflight += cache_bytes;
       while (true)
       {
         auto my_slot = get_slot<allocator_size>(id, post_round);
@@ -152,7 +154,7 @@ namespace snmalloc
       }
 
       // Reset capacity as we have empty everything
-      capacity = REMOTE_CACHE;
+      cache_bytes = 0;
 
       return sent_something;
     }
@@ -177,7 +179,7 @@ namespace snmalloc
         // a null address.
         l.init(0, RemoteAllocator::key_global);
       }
-      capacity = REMOTE_CACHE;
+      cache_bytes = 0;
     }
   };
 } // namespace snmalloc
diff --git a/src/test/func/cleanup/cleanup.cc b/src/test/func/cleanup/cleanup.cc
new file mode 100644
index 000000000..f733e5ed0
--- /dev/null
+++ b/src/test/func/cleanup/cleanup.cc
@@ -0,0 +1,61 @@
+#include <iostream>
+#include <snmalloc/snmalloc.h>
+#include <thread>
+#include <vector>
+
+void ecall()
+{
+  snmalloc::ScopedAllocator a;
+  std::vector<void*> allocs;
+  for (size_t j = 0; j < 1000; j++)
+  {
+    allocs.push_back(a.alloc.alloc(j % 1024));
+  }
+  auto p = a.alloc.alloc(1 * 1024 * 1024);
+  memset(p, 0, 1 * 1024 * 1024);
+
+  for (size_t j = 0; j < allocs.size(); j++)
+    a.alloc.dealloc(allocs[j]);
+
+  a.alloc.dealloc(p);
+}
+
+void thread_body()
+{
+  for (int i = 0; i < 1000; i++)
+  {
+    ecall();
+    std::this_thread::sleep_for(std::chrono::milliseconds(10));
+  }
+}
+
+void monitor_body()
+{
+  for (int i = 0; i < 60; i++)
+  {
+    std::cout << "Current: "
+              << snmalloc::Alloc::Config::Backend::get_current_usage()
+              << std::endl;
+    std::cout << "Peak   : "
+              << snmalloc::Alloc::Config::Backend::get_peak_usage()
+              << std::endl;
+    std::cout << "Allocs : " << snmalloc::Alloc::Config::pool().get_count()
+              << std::endl;
+    std::cout << "--------------------------------------------" << std::endl;
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+  }
+}
+
+int main()
+{
+  std::vector<std::thread> threads;
+  for (int i = 0; i < 8; i++)
+  {
+    threads.push_back(std::thread(thread_body));
+  }
+  threads.push_back(std::thread(monitor_body));
+
+  for (auto& t : threads)
+    t.join();
+  return 0;
+}
\ No newline at end of file
diff --git a/src/test/func/statistics/stats.cc b/src/test/func/statistics/stats.cc
index c8db1cad7..f5790dcce 100644
--- a/src/test/func/statistics/stats.cc
+++ b/src/test/func/statistics/stats.cc
@@ -18,6 +18,7 @@ void debug_check_empty_1()
   auto r = a.alloc(size);
 
   snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
+  snmalloc::print_alloc_stats<snmalloc::StandardConfig>();
   if (result != false)
   {
     std::cout << "debug_check_empty failed to detect leaked memory:" << size
@@ -25,8 +26,12 @@ void debug_check_empty_1()
     abort();
   }
 
+  snmalloc::print_alloc_stats<snmalloc::StandardConfig>();
+
   a.dealloc(r);
 
+  snmalloc::print_alloc_stats<snmalloc::StandardConfig>();
+
   snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
   if (result != true)
   {
@@ -34,7 +39,11 @@ void debug_check_empty_1()
     abort();
   }
 
-  r = a.alloc(size);
+  snmalloc::print_alloc_stats<snmalloc::StandardConfig>();
+
+  r = a.alloc(16);
+
+  snmalloc::print_alloc_stats<snmalloc::StandardConfig>();
 
   snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
   if (result != false)
@@ -44,14 +53,20 @@ void debug_check_empty_1()
     abort();
   }
 
+  snmalloc::print_alloc_stats<snmalloc::StandardConfig>();
+
   a.dealloc(r);
 
+  snmalloc::print_alloc_stats<snmalloc::StandardConfig>();
+
   snmalloc::debug_check_empty<snmalloc::StandardConfig>(&result);
   if (result != true)
   {
     std::cout << "debug_check_empty failed to say empty:" << size << std::endl;
     abort();
   }
+
+  snmalloc::print_alloc_stats<snmalloc::StandardConfig>();
 }
 
 template<size_t size>
diff --git a/src/test/perf/churn/churn.cc b/src/test/perf/churn/churn.cc
new file mode 100644
index 000000000..435cf4575
--- /dev/null
+++ b/src/test/perf/churn/churn.cc
@@ -0,0 +1,94 @@
+#include <iostream>
+#include <queue>
+#include <snmalloc/snmalloc.h>
+#include <thread>
+#include <vector>
+
+int main()
+{
+  std::vector<std::thread> threads;
+  std::atomic<size_t> running;
+  snmalloc::Stat requests;
+  std::atomic<bool> done{false};
+
+  for (size_t i = 0; i < 16; i++)
+  {
+    threads.push_back(std::thread([&running, &requests, &done]() {
+      std::queue<size_t*> q;
+      while (!done)
+      {
+        snmalloc::ScopedAllocator alloc;
+        running++;
+
+        if (rand() % 1000 == 0)
+        {
+          // Deallocate everything in the queue
+          while (q.size() > 0)
+          {
+            auto p = q.front();
+            requests -= *p;
+            alloc->dealloc(p);
+            q.pop();
+          }
+        }
+
+        for (size_t j = 0; j < 1000; j++)
+        {
+          if (q.size() >= 20000 || (q.size() > 0 && (rand() % 10 == 0)))
+          {
+            auto p = q.front();
+            requests -= *p;
+            alloc->dealloc(p);
+            q.pop();
+          }
+          else
+          {
+            size_t size =
+              (rand() % 1024 == 0) ? 16 * 1024 * (1 << (rand() % 3)) : 48;
+            requests += size;
+            auto p = (size_t*)alloc->alloc(size);
+            *p = size;
+            q.push(p);
+          }
+        }
+
+        running--;
+        std::this_thread::sleep_for(std::chrono::microseconds(rand() % 2000));
+      }
+    }));
+  }
+
+  std::thread([&requests]() {
+    size_t count = 0;
+    while (count < 60)
+    {
+      count++;
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      // std::cout << "Inflight:            " <<
+      // snmalloc::RemoteDeallocCache::remote_inflight << std::endl; std::cout
+      // << "Current reservation: " << snmalloc::Globals::get_current_usage() <<
+      // std::endl; std::cout << "Peak reservation:    " <<
+      // snmalloc::Globals::get_peak_usage() << std::endl; std::cout <<
+      // "Allocator count:     " << snmalloc::Globals::pool().get_count() <<
+      // std::endl; std::cout << "Running threads:     " << running <<
+      // std::endl; std::cout << "Index:               " << count << std::endl;
+      // std::cout << "------------------------------------------" << std::endl;
+      std::cout << count << ","
+                << snmalloc::Alloc::Config::Backend::get_peak_usage() << ","
+                << snmalloc::Alloc::Config::Backend::get_current_usage() << ","
+                << requests.get_curr() << "," << requests.get_peak() << ","
+                << snmalloc::RemoteDeallocCache::remote_inflight.get_peak()
+                << ","
+                << snmalloc::RemoteDeallocCache::remote_inflight.get_curr()
+                << std::endl;
+      snmalloc::print_alloc_stats<snmalloc::Alloc::Config>();
+    }
+  }).join();
+
+  done = true;
+
+  for (auto& t : threads)
+    t.join();
+
+  return 0;
+}
\ No newline at end of file