diff --git a/src/snmalloc/backend_helpers/statsrange.h b/src/snmalloc/backend_helpers/statsrange.h index 8548be9cb..7e92a801a 100644 --- a/src/snmalloc/backend_helpers/statsrange.h +++ b/src/snmalloc/backend_helpers/statsrange.h @@ -17,8 +17,7 @@ namespace snmalloc { using ContainsParent::parent; - static inline std::atomic current_usage{}; - static inline std::atomic peak_usage{}; + static inline Stat usage{}; public: static constexpr bool Aligned = ParentRange::Aligned; @@ -31,34 +30,26 @@ namespace snmalloc CapPtr alloc_range(size_t size) { - auto result = parent.alloc_range(size); - if (result != nullptr) - { - auto prev = current_usage.fetch_add(size); - auto curr = peak_usage.load(); - while (curr < prev + size) - { - if (peak_usage.compare_exchange_weak(curr, prev + size)) - break; - } - } - return result; + auto r = parent.alloc_range(size); + if (r != nullptr) + usage += size; + return r; } void dealloc_range(CapPtr base, size_t size) { - current_usage -= size; + usage -= size; parent.dealloc_range(base, size); } size_t get_current_usage() { - return current_usage.load(); + return usage.get_curr(); } size_t get_peak_usage() { - return peak_usage.load(); + return usage.get_peak(); } }; }; diff --git a/src/snmalloc/ds_core/ds_core.h b/src/snmalloc/ds_core/ds_core.h index 2083190bc..11115df73 100644 --- a/src/snmalloc/ds_core/ds_core.h +++ b/src/snmalloc/ds_core/ds_core.h @@ -15,3 +15,4 @@ #include "ptrwrap.h" #include "redblacktree.h" #include "seqset.h" +#include "stats.h" diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h new file mode 100644 index 000000000..6fa203769 --- /dev/null +++ b/src/snmalloc/ds_core/stats.h @@ -0,0 +1,92 @@ +#include "defines.h" + +#include +#include + +namespace snmalloc +{ + /** + * Very basic statistic that tracks current and peak values. + */ + class Stat + { + private: + std::atomic curr{0}; + std::atomic peak{0}; + + public: + void increase(size_t amount) + { + size_t c = (curr += amount); + size_t p = peak.load(std::memory_order_relaxed); + while (c > p) + { + if (peak.compare_exchange_strong(p, c)) + break; + } + } + + void decrease(size_t amount) + { + size_t prev = curr.fetch_sub(amount); + SNMALLOC_ASSERT_MSG( + prev >= amount, "prev = {}, amount = {}", prev, amount); + UNUSED(prev); + } + + size_t get_curr() + { + return curr.load(std::memory_order_relaxed); + } + + size_t get_peak() + { + return peak.load(std::memory_order_relaxed); + } + + void operator+=(size_t amount) + { + increase(amount); + } + + void operator-=(size_t amount) + { + decrease(amount); + } + + void operator++() + { + increase(1); + } + + void operator--() + { + decrease(1); + } + }; + + /** + * Very basic statistic that can only grow. Not thread-safe. + */ + class MonotoneLocalStat + { + std::atomic value{0}; + + public: + void operator++(int) + { + value.fetch_add(1, std::memory_order_relaxed); + } + + void operator+=(const MonotoneLocalStat& other) + { + auto v = other.value.load(std::memory_order_relaxed); + value.fetch_add(v, std::memory_order_relaxed); + } + + size_t operator*() + { + return value.load(std::memory_order_relaxed); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/mem/allocstats.h b/src/snmalloc/mem/allocstats.h new file mode 100644 index 000000000..bfa789c36 --- /dev/null +++ b/src/snmalloc/mem/allocstats.h @@ -0,0 +1,44 @@ +#include "../ds_core/ds_core.h" +#include "sizeclasstable.h" + +#include + +namespace snmalloc +{ + struct AllocStat + { + MonotoneLocalStat objects_allocated{}; + MonotoneLocalStat objects_deallocated{}; + MonotoneLocalStat slabs_allocated{}; + MonotoneLocalStat slabs_deallocated{}; + }; + + class AllocStats + { + std::array sizeclass{}; + + public: + AllocStat& operator[](sizeclass_t index) + { + auto i = index.raw(); + return sizeclass[i]; + } + + AllocStat& operator[](smallsizeclass_t index) + { + return sizeclass[sizeclass_t::from_small_class(index).raw()]; + } + + void operator+=(const AllocStats& other) + { + for (size_t i = 0; i < SIZECLASS_REP_SIZE; i++) + { + sizeclass[i].objects_allocated += other.sizeclass[i].objects_allocated; + sizeclass[i].objects_deallocated += + other.sizeclass[i].objects_deallocated; + sizeclass[i].slabs_allocated += other.sizeclass[i].slabs_allocated; + sizeclass[i].slabs_deallocated += other.sizeclass[i].slabs_deallocated; + } + } + }; +} // namespace snmalloc \ No newline at end of file diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/corealloc.h index c7fc79b72..1bb126a51 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/corealloc.h @@ -111,6 +111,11 @@ namespace snmalloc */ Ticker ticker; + /** + * Tracks this allocators memory usage + */ + AllocStats stats; + /** * The message queue needs to be accessible from other threads * @@ -364,6 +369,8 @@ namespace snmalloc // don't touch the cache lines at this point in snmalloc_check_client. auto start = clear_slab(meta, sizeclass); + stats[sizeclass].slabs_deallocated++; + Config::Backend::dealloc_chunk( get_backend_local_state(), *meta, @@ -400,6 +407,8 @@ namespace snmalloc // Remove from set of fully used slabs. meta->node.remove(); + stats[entry.get_sizeclass()].slabs_deallocated++; + Config::Backend::dealloc_chunk( get_backend_local_state(), *meta, p, size); @@ -478,14 +487,18 @@ namespace snmalloc SNMALLOC_FAST_PATH_LAMBDA { return capptr_domesticate(local_state, p); }; - auto cb = [this, - &need_post](freelist::HeadPtr msg) SNMALLOC_FAST_PATH_LAMBDA { + + size_t received_bytes = 0; + + auto cb = [this, &need_post, &received_bytes]( + freelist::HeadPtr msg) SNMALLOC_FAST_PATH_LAMBDA { #ifdef SNMALLOC_TRACING message<1024>("Handling remote"); #endif auto& entry = Config::Backend::template get_metaentry(snmalloc::address_cast(msg)); + received_bytes += sizeclass_full_to_size(entry.get_sizeclass()); handle_dealloc_remote(entry, msg.as_void(), need_post); @@ -514,6 +527,9 @@ namespace snmalloc post(); } + // Push size to global statistics + RemoteDeallocCache::remote_inflight -= received_bytes; + return action(args...); } @@ -542,10 +558,7 @@ namespace snmalloc } else { - if ( - !need_post && - !attached_cache->remote_dealloc_cache.reserve_space(entry)) - need_post = true; + need_post |= attached_cache->remote_dealloc_cache.reserve_space(entry); attached_cache->remote_dealloc_cache .template dealloc( entry.get_remote()->trunc_id(), p.as_void()); @@ -668,13 +681,14 @@ namespace snmalloc // pointers auto& entry = Config::Backend::template get_metaentry(snmalloc::address_cast(p)); - if (SNMALLOC_LIKELY(dealloc_local_object_fast(entry, p, entropy))) + if (SNMALLOC_LIKELY(dealloc_local_object_fast(entry, p, entropy))) return; dealloc_local_object_slow(p, entry); } - SNMALLOC_FAST_PATH static bool dealloc_local_object_fast( + template + SNMALLOC_FAST_PATH bool dealloc_local_object_fast( const PagemapEntry& entry, CapPtr p, LocalEntropy& entropy) @@ -695,6 +709,10 @@ namespace snmalloc // Update the head and the next pointer in the free list. meta->free_queue.add(cp, key, entropy); + if constexpr (Statistics) + { + stats[entry.get_sizeclass()].objects_deallocated++; + } return SNMALLOC_LIKELY(!meta->return_object()); } @@ -741,6 +759,7 @@ namespace snmalloc } auto r = finish_alloc(p, sizeclass); + stats[sizeclass].objects_allocated++; return ticker.check_tick(r); } return small_alloc_slow(sizeclass, fast_free_list); @@ -813,6 +832,9 @@ namespace snmalloc } auto r = finish_alloc(p, sizeclass); + + stats[sizeclass].objects_allocated++; + stats[sizeclass].slabs_allocated++; return ticker.check_tick(r); } @@ -834,7 +856,7 @@ namespace snmalloc { auto p_wild = message_queue().destroy(); auto p_tame = domesticate(p_wild); - + size_t received_bytes = 0; while (p_tame != nullptr) { bool need_post = true; // Always going to post, so ignore. @@ -842,9 +864,11 @@ namespace snmalloc p_tame->atomic_read_next(RemoteAllocator::key_global, domesticate); const PagemapEntry& entry = Config::Backend::get_metaentry(snmalloc::address_cast(p_tame)); + received_bytes += sizeclass_full_to_size(entry.get_sizeclass()); handle_dealloc_remote(entry, p_tame.as_void(), need_post); p_tame = n_tame; } + RemoteDeallocCache::remote_inflight -= received_bytes; } else { @@ -986,6 +1010,11 @@ namespace snmalloc return debug_is_empty_impl(result); } + + const AllocStats& get_stats() + { + return stats; + } }; /** diff --git a/src/snmalloc/mem/globalalloc.h b/src/snmalloc/mem/globalalloc.h index dc9528f66..e618ed729 100644 --- a/src/snmalloc/mem/globalalloc.h +++ b/src/snmalloc/mem/globalalloc.h @@ -87,6 +87,9 @@ namespace snmalloc } } + if (result == nullptr) + SNMALLOC_CHECK(RemoteDeallocCache::remote_inflight.get_curr() == 0); + if (result != nullptr) { *result = okay; @@ -134,4 +137,78 @@ namespace snmalloc } } + template + inline static void get_stats(AllocStats& stats) + { + auto alloc = AllocPool::iterate(); + while (alloc != nullptr) + { + stats += alloc->get_stats(); + alloc = AllocPool::iterate(alloc); + } + } + + template + inline static void print_alloc_stats() + { + static std::atomic dump{0}; + + auto l_dump = dump++; + if (l_dump == 0) + { + message<1024>( + "snmalloc_allocs,dumpid,sizeclass,size,allocated,deallocated,in_use," + "bytes,slabs allocated,slabs deallocated,slabs in_use,slabs bytes"); + message<1024>( + "snmalloc_totals,dumpid,backend bytes,peak backend " + "bytes,requested,slabs requested bytes,remote inflight bytes,allocator " + "count"); + } + + AllocStats stats; + snmalloc::get_stats(stats); + size_t total_live{0}; + size_t total_live_slabs{0}; + for (size_t i = 0; i < snmalloc::SIZECLASS_REP_SIZE; i++) + { + auto sc = snmalloc::sizeclass_t::from_raw(i); + auto allocated = *stats[sc].objects_allocated; + auto deallocated = *stats[sc].objects_deallocated; + auto slabs_allocated = *stats[sc].slabs_allocated; + auto slabs_deallocated = *stats[sc].slabs_deallocated; + if (allocated == 0 && deallocated == 0) + continue; + auto size = snmalloc::sizeclass_full_to_size(sc); + auto slab_size = snmalloc::sizeclass_full_to_slab_size(sc); + auto in_use = allocated - deallocated; + auto amount = in_use * size; + total_live += amount; + auto in_use_slabs = slabs_allocated - slabs_deallocated; + auto amount_slabs = in_use_slabs * slab_size; + total_live_slabs += amount_slabs; + + snmalloc::message<1024>( + "snmalloc_allocs,{},{},{},{},{},{},{},{},{},{},{}", + l_dump, + i, + size, + allocated, + deallocated, + in_use, + amount, + slabs_allocated, + slabs_deallocated, + in_use_slabs, + amount_slabs); + } + snmalloc::message<1024>( + "snmalloc_totals,{},{},{},{},{},{},{}", + l_dump, + Config::Backend::get_current_usage(), + Config::Backend::get_peak_usage(), + total_live, + total_live_slabs, + RemoteDeallocCache::remote_inflight.get_curr(), + Config::pool().get_count()); + } } // namespace snmalloc diff --git a/src/snmalloc/mem/localalloc.h b/src/snmalloc/mem/localalloc.h index c85d30b2b..f8110e049 100644 --- a/src/snmalloc/mem/localalloc.h +++ b/src/snmalloc/mem/localalloc.h @@ -211,6 +211,12 @@ namespace snmalloc chunk.unsafe_ptr(), bits::next_pow2(size)); } + if (chunk.unsafe_ptr() != nullptr) + { + auto sc = size_to_sizeclass_full(size); + core_alloc->stats[sc].objects_allocated++; + core_alloc->stats[sc].slabs_allocated++; + } return capptr_chunk_is_alloc(capptr_to_user_address_control(chunk)); }); } @@ -246,7 +252,7 @@ namespace snmalloc }; return local_cache.template alloc( - domesticate, size, slowpath); + domesticate, core_alloc->stats, size, slowpath); } /** @@ -418,7 +424,7 @@ namespace snmalloc message<1024>("flush(): core_alloc={}", core_alloc); #endif local_cache.remote_allocator = &Config::unused_remote; - local_cache.remote_dealloc_cache.capacity = 0; + local_cache.remote_dealloc_cache.cache_bytes = REMOTE_CACHE; } } @@ -648,7 +654,7 @@ namespace snmalloc { dealloc_cheri_checks(p_tame.unsafe_ptr()); - if (SNMALLOC_LIKELY(CoreAlloc::dealloc_local_object_fast( + if (SNMALLOC_LIKELY(core_alloc->dealloc_local_object_fast( entry, p_tame, local_cache.entropy))) return; core_alloc->dealloc_local_object_slow(p_tame, entry); diff --git a/src/snmalloc/mem/localcache.h b/src/snmalloc/mem/localcache.h index cfbbaa576..0bac2541f 100644 --- a/src/snmalloc/mem/localcache.h +++ b/src/snmalloc/mem/localcache.h @@ -1,6 +1,7 @@ #pragma once #include "../ds/ds.h" +#include "allocstats.h" #include "freelist.h" #include "remotecache.h" #include "sizeclasstable.h" @@ -94,8 +95,11 @@ namespace snmalloc typename Config, typename Slowpath, typename Domesticator> - SNMALLOC_FAST_PATH capptr::Alloc - alloc(Domesticator domesticate, size_t size, Slowpath slowpath) + SNMALLOC_FAST_PATH capptr::Alloc alloc( + Domesticator domesticate, + AllocStats& stats, + size_t size, + Slowpath slowpath) { auto& key = entropy.get_free_list_key(); smallsizeclass_t sizeclass = size_to_sizeclass(size); @@ -103,6 +107,7 @@ namespace snmalloc if (SNMALLOC_LIKELY(!fl.empty())) { auto p = fl.take(key, domesticate); + stats[sizeclass].objects_allocated++; return finish_alloc(p, sizeclass); } return slowpath(sizeclass, &fl); diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h index 36737207d..9dc686e43 100644 --- a/src/snmalloc/mem/pool.h +++ b/src/snmalloc/mem/pool.h @@ -34,9 +34,15 @@ namespace snmalloc FlagWord lock{}; capptr::Alloc list{nullptr}; + std::atomic count{0}; public: constexpr PoolState() = default; + + size_t get_count() + { + return count.load(std::memory_order_relaxed); + } }; /** @@ -156,6 +162,8 @@ namespace snmalloc p->list_next = pool.list; pool.list = p; + pool.count++; + p->set_in_use(); return p.unsafe_ptr(); } diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h index 96f5e0973..a53737f24 100644 --- a/src/snmalloc/mem/remotecache.h +++ b/src/snmalloc/mem/remotecache.h @@ -19,14 +19,17 @@ namespace snmalloc { std::array, REMOTE_SLOTS> list; + static inline Stat remote_inflight; + /** - * The total amount of memory we are waiting for before we will dispatch - * to other allocators. Zero can mean we have not initialised the allocator - * yet. This is initialised to the 0 so that we always hit a slow path to - * start with, when we hit the slow path and need to dispatch everything, we - * can check if we are a real allocator and lazily provide a real allocator. + * The total amount of bytes of memory in the cache. + * + * REMOTE_CACHE is used as the initial value, so that we always hit a slow + * path to start with, when we hit the slow path and need to dispatch + * everything, we can check if we are a real allocator and lazily provide a + * real allocator. */ - int64_t capacity{0}; + size_t cache_bytes{REMOTE_CACHE}; #ifndef NDEBUG bool initialised = false; @@ -56,13 +59,10 @@ namespace snmalloc template SNMALLOC_FAST_PATH bool reserve_space(const Entry& entry) { - auto size = - static_cast(sizeclass_full_to_size(entry.get_sizeclass())); + auto size = sizeclass_full_to_size(entry.get_sizeclass()); - bool result = capacity > size; - if (result) - capacity -= size; - return result; + cache_bytes += size; + return cache_bytes < REMOTE_CACHE; } template @@ -91,6 +91,8 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; + // We are about to post cache_bytes bytes to other allocators. + remote_inflight += cache_bytes; while (true) { auto my_slot = get_slot(id, post_round); @@ -152,7 +154,7 @@ namespace snmalloc } // Reset capacity as we have empty everything - capacity = REMOTE_CACHE; + cache_bytes = 0; return sent_something; } @@ -177,7 +179,7 @@ namespace snmalloc // a null address. l.init(0, RemoteAllocator::key_global); } - capacity = REMOTE_CACHE; + cache_bytes = 0; } }; } // namespace snmalloc diff --git a/src/test/func/cleanup/cleanup.cc b/src/test/func/cleanup/cleanup.cc new file mode 100644 index 000000000..f733e5ed0 --- /dev/null +++ b/src/test/func/cleanup/cleanup.cc @@ -0,0 +1,61 @@ +#include +#include +#include +#include + +void ecall() +{ + snmalloc::ScopedAllocator a; + std::vector allocs; + for (size_t j = 0; j < 1000; j++) + { + allocs.push_back(a.alloc.alloc(j % 1024)); + } + auto p = a.alloc.alloc(1 * 1024 * 1024); + memset(p, 0, 1 * 1024 * 1024); + + for (size_t j = 0; j < allocs.size(); j++) + a.alloc.dealloc(allocs[j]); + + a.alloc.dealloc(p); +} + +void thread_body() +{ + for (int i = 0; i < 1000; i++) + { + ecall(); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } +} + +void monitor_body() +{ + for (int i = 0; i < 60; i++) + { + std::cout << "Current: " + << snmalloc::Alloc::Config::Backend::get_current_usage() + << std::endl; + std::cout << "Peak : " + << snmalloc::Alloc::Config::Backend::get_peak_usage() + << std::endl; + std::cout << "Allocs : " << snmalloc::Alloc::Config::pool().get_count() + << std::endl; + std::cout << "--------------------------------------------" << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(1)); + } +} + +int main() +{ + std::vector threads; + for (int i = 0; i < 8; i++) + { + threads.push_back(std::thread(thread_body)); + } + threads.push_back(std::thread(monitor_body)); + + for (auto& t : threads) + t.join(); + return 0; +} \ No newline at end of file diff --git a/src/test/func/statistics/stats.cc b/src/test/func/statistics/stats.cc index c8db1cad7..f5790dcce 100644 --- a/src/test/func/statistics/stats.cc +++ b/src/test/func/statistics/stats.cc @@ -18,6 +18,7 @@ void debug_check_empty_1() auto r = a.alloc(size); snmalloc::debug_check_empty(&result); + snmalloc::print_alloc_stats(); if (result != false) { std::cout << "debug_check_empty failed to detect leaked memory:" << size @@ -25,8 +26,12 @@ void debug_check_empty_1() abort(); } + snmalloc::print_alloc_stats(); + a.dealloc(r); + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(&result); if (result != true) { @@ -34,7 +39,11 @@ void debug_check_empty_1() abort(); } - r = a.alloc(size); + snmalloc::print_alloc_stats(); + + r = a.alloc(16); + + snmalloc::print_alloc_stats(); snmalloc::debug_check_empty(&result); if (result != false) @@ -44,14 +53,20 @@ void debug_check_empty_1() abort(); } + snmalloc::print_alloc_stats(); + a.dealloc(r); + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(&result); if (result != true) { std::cout << "debug_check_empty failed to say empty:" << size << std::endl; abort(); } + + snmalloc::print_alloc_stats(); } template diff --git a/src/test/perf/churn/churn.cc b/src/test/perf/churn/churn.cc new file mode 100644 index 000000000..435cf4575 --- /dev/null +++ b/src/test/perf/churn/churn.cc @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include + +int main() +{ + std::vector threads; + std::atomic running; + snmalloc::Stat requests; + std::atomic done{false}; + + for (size_t i = 0; i < 16; i++) + { + threads.push_back(std::thread([&running, &requests, &done]() { + std::queue q; + while (!done) + { + snmalloc::ScopedAllocator alloc; + running++; + + if (rand() % 1000 == 0) + { + // Deallocate everything in the queue + while (q.size() > 0) + { + auto p = q.front(); + requests -= *p; + alloc->dealloc(p); + q.pop(); + } + } + + for (size_t j = 0; j < 1000; j++) + { + if (q.size() >= 20000 || (q.size() > 0 && (rand() % 10 == 0))) + { + auto p = q.front(); + requests -= *p; + alloc->dealloc(p); + q.pop(); + } + else + { + size_t size = + (rand() % 1024 == 0) ? 16 * 1024 * (1 << (rand() % 3)) : 48; + requests += size; + auto p = (size_t*)alloc->alloc(size); + *p = size; + q.push(p); + } + } + + running--; + std::this_thread::sleep_for(std::chrono::microseconds(rand() % 2000)); + } + })); + } + + std::thread([&requests]() { + size_t count = 0; + while (count < 60) + { + count++; + std::this_thread::sleep_for(std::chrono::seconds(1)); + // std::cout << "Inflight: " << + // snmalloc::RemoteDeallocCache::remote_inflight << std::endl; std::cout + // << "Current reservation: " << snmalloc::Globals::get_current_usage() << + // std::endl; std::cout << "Peak reservation: " << + // snmalloc::Globals::get_peak_usage() << std::endl; std::cout << + // "Allocator count: " << snmalloc::Globals::pool().get_count() << + // std::endl; std::cout << "Running threads: " << running << + // std::endl; std::cout << "Index: " << count << std::endl; + // std::cout << "------------------------------------------" << std::endl; + std::cout << count << "," + << snmalloc::Alloc::Config::Backend::get_peak_usage() << "," + << snmalloc::Alloc::Config::Backend::get_current_usage() << "," + << requests.get_curr() << "," << requests.get_peak() << "," + << snmalloc::RemoteDeallocCache::remote_inflight.get_peak() + << "," + << snmalloc::RemoteDeallocCache::remote_inflight.get_curr() + << std::endl; + snmalloc::print_alloc_stats(); + } + }).join(); + + done = true; + + for (auto& t : threads) + t.join(); + + return 0; +} \ No newline at end of file