diff --git a/FEXCore/Source/Utils/Allocator.cpp b/FEXCore/Source/Utils/Allocator.cpp index 58311a3744..b254945c2e 100644 --- a/FEXCore/Source/Utils/Allocator.cpp +++ b/FEXCore/Source/Utils/Allocator.cpp @@ -112,14 +112,18 @@ void ReenableSBRKAllocations(void* Ptr) { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" -void SetupHooks() { - Alloc64 = Alloc::OSAllocator::Create64BitAllocator(); +static void AssignHookOverrides() { SetJemallocMmapHook(FEX_mmap); SetJemallocMunmapHook(FEX_munmap); FEXCore::Allocator::mmap = FEX_mmap; FEXCore::Allocator::munmap = FEX_munmap; } +void SetupHooks() { + Alloc64 = Alloc::OSAllocator::Create64BitAllocator(); + AssignHookOverrides(); +} + void ClearHooks() { SetJemallocMmapHook(::mmap); SetJemallocMunmapHook(::munmap); @@ -300,7 +304,7 @@ fextl::vector StealMemoryRegion(uintptr_t Begin, uintptr_t End) { return Regions; } -fextl::vector Steal48BitVA() { +fextl::vector Setup48BitAllocatorIfExists() { size_t Bits = FEXCore::Allocator::DetermineVASize(); if (Bits < 48) { return {}; @@ -308,7 +312,12 @@ fextl::vector Steal48BitVA() { uintptr_t Begin48BitVA = 0x0'8000'0000'0000ULL; uintptr_t End48BitVA = 0x1'0000'0000'0000ULL; - return StealMemoryRegion(Begin48BitVA, End48BitVA); + auto Regions = StealMemoryRegion(Begin48BitVA, End48BitVA); + + Alloc64 = Alloc::OSAllocator::Create64BitAllocatorWithRegions(Regions); + AssignHookOverrides(); + + return Regions; } void ReclaimMemoryRegion(const fextl::vector& Regions) { diff --git a/FEXCore/Source/Utils/Allocator/64BitAllocator.cpp b/FEXCore/Source/Utils/Allocator/64BitAllocator.cpp index 60d6545c05..928ca9cd94 100644 --- a/FEXCore/Source/Utils/Allocator/64BitAllocator.cpp +++ b/FEXCore/Source/Utils/Allocator/64BitAllocator.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -35,6 +37,8 @@ thread_local FEXCore::Core::InternalThreadState* TLSThread {}; class OSAllocator_64Bit final : public Alloc::HostAllocator { public: OSAllocator_64Bit(); + OSAllocator_64Bit(fextl::vector& Regions); + virtual ~OSAllocator_64Bit(); void* AllocateSlab(size_t Size) override { return nullptr; @@ -99,19 +103,20 @@ class OSAllocator_64Bit final : public Alloc::HostAllocator { // This returns the size of the LiveVMARegion in addition to the flex set that tracks the used data // The LiveVMARegion lives at the start of the VMA region which means on initialization we need to set that // tracked ranged as used immediately - static size_t GetSizeWithFlexSet(size_t Size) { + static size_t GetFEXManagedVMARegionSize(size_t Size) { // One element per page // 0x10'0000'0000 bytes // 0x100'0000 Pages // 1 bit per page for tracking means 0x20'0000 (Pages / 8) bytes of flex space // Which is 2MB of tracking - uint64_t NumElements = (Size >> FEXCore::Utils::FEX_PAGE_SHIFT) * sizeof(FlexBitElementType); - return sizeof(LiveVMARegion) + FEXCore::FlexBitSet::Size(NumElements); + const uint64_t NumElements = Size >> FEXCore::Utils::FEX_PAGE_SHIFT; + return sizeof(LiveVMARegion) + FEXCore::FlexBitSet::SizeInBytes(NumElements); } static void InitializeVMARegionUsed(LiveVMARegion* Region, size_t AdditionalSize) { - size_t SizeOfLiveRegion = FEXCore::AlignUp(LiveVMARegion::GetSizeWithFlexSet(Region->SlabInfo->RegionSize), FEXCore::Utils::FEX_PAGE_SIZE); + size_t SizeOfLiveRegion = + FEXCore::AlignUp(LiveVMARegion::GetFEXManagedVMARegionSize(Region->SlabInfo->RegionSize), FEXCore::Utils::FEX_PAGE_SIZE); size_t SizePlusManagedData = SizeOfLiveRegion + AdditionalSize; Region->FreeSpace = Region->SlabInfo->RegionSize - SizePlusManagedData; @@ -155,7 +160,8 @@ class OSAllocator_64Bit final : public Alloc::HostAllocator { ReservedRegions->erase(ReservedIterator); // mprotect the new region we've allocated - size_t SizeOfLiveRegion = FEXCore::AlignUp(LiveVMARegion::GetSizeWithFlexSet(ReservedRegion->RegionSize), FEXCore::Utils::FEX_PAGE_SIZE); + size_t SizeOfLiveRegion = + FEXCore::AlignUp(LiveVMARegion::GetFEXManagedVMARegionSize(ReservedRegion->RegionSize), FEXCore::Utils::FEX_PAGE_SIZE); size_t SizePlusManagedData = UsedSize + SizeOfLiveRegion; [[maybe_unused]] auto Res = mprotect(reinterpret_cast(ReservedRegion->Base), SizePlusManagedData, PROT_READ | PROT_WRITE); @@ -180,7 +186,7 @@ class OSAllocator_64Bit final : public Alloc::HostAllocator { // 32-bit old kernel workarounds fextl::vector Steal32BitIfOldKernel(); - void AllocateMemoryRegions(const fextl::vector& Ranges); + void AllocateMemoryRegions(fextl::vector& Ranges); LiveVMARegion* FindLiveRegionForAddress(uintptr_t Addr, uintptr_t AddrEnd); }; @@ -383,7 +389,7 @@ void* OSAllocator_64Bit::Mmap(void* addr, size_t length, int prot, int flags, in if (!LiveRegion) { // Couldn't find a fit in the live regions // Allocate a new reserved region - size_t lengthOfLiveRegion = FEXCore::AlignUp(LiveVMARegion::GetSizeWithFlexSet(length), FEXCore::Utils::FEX_PAGE_SIZE); + size_t lengthOfLiveRegion = FEXCore::AlignUp(LiveVMARegion::GetFEXManagedVMARegionSize(length), FEXCore::Utils::FEX_PAGE_SIZE); size_t lengthPlusManagedData = length + lengthOfLiveRegion; for (auto it = ReservedRegions->begin(); it != ReservedRegions->end(); ++it) { if ((*it)->RegionSize >= lengthPlusManagedData) { @@ -515,27 +521,43 @@ fextl::vector OSAllocator_64Bit::Steal32BitIfO return FEXCore::Allocator::StealMemoryRegion(LOWER_BOUND_32, UPPER_BOUND_32); } -void OSAllocator_64Bit::AllocateMemoryRegions(const fextl::vector& Ranges) { - for (auto [Ptr, AllocationSize] : Ranges) { - if (!ObjectAlloc) { - auto MaxSize = std::min(size_t(64) * 1024 * 1024, AllocationSize); +void OSAllocator_64Bit::AllocateMemoryRegions(fextl::vector& Ranges) { + // Need to allocate the ObjectAlloc up front. Find a region that is larger than our minimum size first. + const size_t ObjectAllocSize = 64 * 1024 * 1024; + + for (auto& it : Ranges) { + if (ObjectAllocSize > it.Size) { + continue; + } - // Allocate up to 64 MiB the first allocation for an intrusive allocator - mprotect(Ptr, MaxSize, PROT_READ | PROT_WRITE); + // Allocate up to 64 MiB the first allocation for an intrusive allocator + mprotect(it.Ptr, ObjectAllocSize, PROT_READ | PROT_WRITE); - // This enables the kernel to use transparent large pages in the allocator which can reduce memory pressure - ::madvise(Ptr, MaxSize, MADV_HUGEPAGE); + // This enables the kernel to use transparent large pages in the allocator which can reduce memory pressure + ::madvise(it.Ptr, ObjectAllocSize, MADV_HUGEPAGE); - ObjectAlloc = new (Ptr) Alloc::ForwardOnlyIntrusiveArenaAllocator(Ptr, MaxSize); - ReservedRegions = ObjectAlloc->new_construct(ReservedRegions, ObjectAlloc); - LiveRegions = ObjectAlloc->new_construct(LiveRegions, ObjectAlloc); + ObjectAlloc = new (it.Ptr) Alloc::ForwardOnlyIntrusiveArenaAllocator(it.Ptr, ObjectAllocSize); + ReservedRegions = ObjectAlloc->new_construct(ReservedRegions, ObjectAlloc); + LiveRegions = ObjectAlloc->new_construct(LiveRegions, ObjectAlloc); - if (AllocationSize > MaxSize) { - AllocationSize -= MaxSize; - (uint8_t*&)Ptr += MaxSize; - } else { - continue; - } + if (it.Size >= ObjectAllocSize) { + // Modify region size + it.Size -= ObjectAllocSize; + (uint8_t*&)it.Ptr += ObjectAllocSize; + } + + break; + } + + if (!ObjectAlloc) { + ERROR_AND_DIE_FMT("Couldn't allocate object allocator!"); + } + + for (auto [Ptr, AllocationSize] : Ranges) { + // Skip using any regions that are <= two pages. FEX's VMA allocator requires two pages + // for tracking data. So three pages are minimum for a single page VMA allocation. + if (AllocationSize <= (FEXCore::Utils::FEX_PAGE_SIZE * 2)) { + continue; } ReservedVMARegion* Region = ObjectAlloc->new_construct(); @@ -557,6 +579,10 @@ OSAllocator_64Bit::OSAllocator_64Bit() { FEXCore::Allocator::ReclaimMemoryRegion(LowMem); } +OSAllocator_64Bit::OSAllocator_64Bit(fextl::vector& Regions) { + AllocateMemoryRegions(Regions); +} + OSAllocator_64Bit::~OSAllocator_64Bit() { // This needs a mutex to be thread safe auto lk = FEXCore::GuardSignalDeferringSectionWithFallback(AllocationMutex, TLSThread); @@ -576,6 +602,62 @@ OSAllocator_64Bit::~OSAllocator_64Bit() { fextl::unique_ptr Create64BitAllocator() { return fextl::make_unique(); } + +template +struct alloc_delete : public std::default_delete { + void operator()(T* ptr) const { + if (ptr) { + const auto size = sizeof(T); + const auto MinPage = FEXCore::AlignUp(size, FEXCore::Utils::FEX_PAGE_SIZE); + + std::destroy_at(ptr); + ::munmap(ptr, MinPage); + } + } + + template + requires (std::is_base_of_v) + operator fextl::default_delete() { + return fextl::default_delete(); + } +}; + +template +requires (!std::is_array_v) +fextl::unique_ptr make_alloc_unique(FEXCore::Allocator::MemoryRegion& Base, Args&&... args) { + const auto size = sizeof(T); + const auto MinPage = FEXCore::AlignUp(size, FEXCore::Utils::FEX_PAGE_SIZE); + if (Base.Size < size || MinPage != FEXCore::Utils::FEX_PAGE_SIZE) { + ERROR_AND_DIE_FMT("Couldn't fit allocator in to page!"); + } + + auto ptr = ::mmap(Base.Ptr, MinPage, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (ptr == MAP_FAILED) { + ERROR_AND_DIE_FMT("Couldn't allocate memory region"); + } + + // Remove the page from the base region. + // Could be zero after this. + Base.Size -= MinPage; + Base.Ptr = reinterpret_cast(reinterpret_cast(Base.Ptr) + MinPage); + + auto Result = ::new (ptr) T(std::forward(args)...); + return fextl::unique_ptr>(Result); +} + +fextl::unique_ptr Create64BitAllocatorWithRegions(fextl::vector& Regions) { + // This is a bit tricky as we can't allocate memory safely except from the Regions provided. Otherwise we might overwrite memory pages we + // don't own. Scan the memory regions and find the smallest one. + FEXCore::Allocator::MemoryRegion& Smallest = Regions[0]; + for (auto& it : Regions) { + if (it.Size <= Smallest.Size) { + Smallest = it; + } + } + + return make_alloc_unique(Smallest, Regions); +} + } // namespace Alloc::OSAllocator namespace FEXCore::Allocator { diff --git a/FEXCore/Source/Utils/Allocator/FlexBitSet.h b/FEXCore/Source/Utils/Allocator/FlexBitSet.h index 57fe6280da..f15de92582 100644 --- a/FEXCore/Source/Utils/Allocator/FlexBitSet.h +++ b/FEXCore/Source/Utils/Allocator/FlexBitSet.h @@ -145,8 +145,14 @@ struct FlexBitSet final { return Get(Element); } - static size_t Size(uint64_t Elements) { - return FEXCore::AlignUp(Elements / MinimumSizeBits, MinimumSizeBits); + // Returns the number of bits required to hold the number of elements. + // Just rounds up to the MinimumSizeInBits. + constexpr static size_t SizeInBits(uint64_t Elements) { + return FEXCore::AlignUp(Elements, MinimumSizeBits); + } + // Returns the number of bytes required to hold the number of elements. + constexpr static size_t SizeInBytes(uint64_t Elements) { + return SizeInBits(Elements) / 8; } }; diff --git a/FEXCore/Source/Utils/Allocator/HostAllocator.h b/FEXCore/Source/Utils/Allocator/HostAllocator.h index 445722de7b..a3e4648923 100644 --- a/FEXCore/Source/Utils/Allocator/HostAllocator.h +++ b/FEXCore/Source/Utils/Allocator/HostAllocator.h @@ -2,9 +2,10 @@ #pragma once #include #include +#include +#include #include -#include #include namespace FEXCore::Core { @@ -49,4 +50,5 @@ class GlobalAllocator { namespace Alloc::OSAllocator { fextl::unique_ptr Create64BitAllocator(); +fextl::unique_ptr Create64BitAllocatorWithRegions(fextl::vector& Regions); } // namespace Alloc::OSAllocator diff --git a/FEXCore/include/FEXCore/Utils/Allocator.h b/FEXCore/include/FEXCore/Utils/Allocator.h index 795aff1aaa..524288242f 100644 --- a/FEXCore/include/FEXCore/Utils/Allocator.h +++ b/FEXCore/include/FEXCore/Utils/Allocator.h @@ -86,7 +86,7 @@ FEX_DEFAULT_VISIBILITY void ReclaimMemoryRegion(const fextl::vector Steal48BitVA(); +FEX_DEFAULT_VISIBILITY fextl::vector Setup48BitAllocatorIfExists(); #ifndef _WIN32 FEX_DEFAULT_VISIBILITY void RegisterTLSData(FEXCore::Core::InternalThreadState* Thread); diff --git a/FEXCore/unittests/APITests/FlexBitSet.cpp b/FEXCore/unittests/APITests/FlexBitSet.cpp new file mode 100644 index 0000000000..98355e9b09 --- /dev/null +++ b/FEXCore/unittests/APITests/FlexBitSet.cpp @@ -0,0 +1,41 @@ +#include +#include + +#include "Utils/Allocator/FlexBitSet.h" + +TEST_CASE("FlexBitSet - Sizing") { + // Ensure that FlexBitSet sizing is correct. + + // Size of zero shouldn't take any space. + CHECK(FEXCore::FlexBitSet::SizeInBytes(0) == 0); + CHECK(FEXCore::FlexBitSet::SizeInBytes(0) == 0); + CHECK(FEXCore::FlexBitSet::SizeInBytes(0) == 0); + CHECK(FEXCore::FlexBitSet::SizeInBytes(0) == 0); + + CHECK(FEXCore::FlexBitSet::SizeInBits(0) == 0); + CHECK(FEXCore::FlexBitSet::SizeInBits(0) == 0); + CHECK(FEXCore::FlexBitSet::SizeInBits(0) == 0); + CHECK(FEXCore::FlexBitSet::SizeInBits(0) == 0); + + // Size of 1 should take one sizeof(ElementSize) size + CHECK(FEXCore::FlexBitSet::SizeInBytes(1) == sizeof(uint8_t)); + CHECK(FEXCore::FlexBitSet::SizeInBytes(1) == sizeof(uint16_t)); + CHECK(FEXCore::FlexBitSet::SizeInBytes(1) == sizeof(uint32_t)); + CHECK(FEXCore::FlexBitSet::SizeInBytes(1) == sizeof(uint64_t)); + + CHECK(FEXCore::FlexBitSet::SizeInBits(1) == sizeof(uint8_t) * 8); + CHECK(FEXCore::FlexBitSet::SizeInBits(1) == sizeof(uint16_t) * 8); + CHECK(FEXCore::FlexBitSet::SizeInBits(1) == sizeof(uint32_t) * 8); + CHECK(FEXCore::FlexBitSet::SizeInBits(1) == sizeof(uint64_t) * 8); + + // Size of `sizeof(ElementSize) * 8` should take one sizeof(ElementSize) size + CHECK(FEXCore::FlexBitSet::SizeInBytes(sizeof(uint8_t) * 8) == sizeof(uint8_t)); + CHECK(FEXCore::FlexBitSet::SizeInBytes(sizeof(uint16_t) * 8) == sizeof(uint16_t)); + CHECK(FEXCore::FlexBitSet::SizeInBytes(sizeof(uint32_t) * 8) == sizeof(uint32_t)); + CHECK(FEXCore::FlexBitSet::SizeInBytes(sizeof(uint64_t) * 8) == sizeof(uint64_t)); + + CHECK(FEXCore::FlexBitSet::SizeInBits(sizeof(uint8_t) * 8) == sizeof(uint8_t) * 8); + CHECK(FEXCore::FlexBitSet::SizeInBits(sizeof(uint16_t) * 8) == sizeof(uint16_t) * 8); + CHECK(FEXCore::FlexBitSet::SizeInBits(sizeof(uint32_t) * 8) == sizeof(uint32_t) * 8); + CHECK(FEXCore::FlexBitSet::SizeInBits(sizeof(uint64_t) * 8) == sizeof(uint64_t) * 8); +} diff --git a/Source/Tools/FEXLoader/FEXLoader.cpp b/Source/Tools/FEXLoader/FEXLoader.cpp index dd0aec576c..53bf90cf41 100644 --- a/Source/Tools/FEXLoader/FEXLoader.cpp +++ b/Source/Tools/FEXLoader/FEXLoader.cpp @@ -496,7 +496,7 @@ int main(int argc, char** argv, char** const envp) { if (Loader.Is64BitMode()) { // Destroy the 48th bit if it exists - Base48Bit = FEXCore::Allocator::Steal48BitVA(); + Base48Bit = FEXCore::Allocator::Setup48BitAllocatorIfExists(); } else { // Reserve [0x1_0000_0000, 0x2_0000_0000). // Safety net if 32-bit address calculation overflows in to 64-bit range.