Project-OSRM · SiarheiFedartsou · Jun 11, 2024 · Jun 11, 2024 · Jun 12, 2024 · Jul 3, 2024
diff --git a/.github/workflows/osrm-backend.yml b/.github/workflows/osrm-backend.yml
@@ -653,7 +653,7 @@ jobs:
   benchmarks:
     if: github.event_name == 'pull_request'
     needs: [format-taginfo-docs]
-    runs-on: ubuntu-24.04
+    runs-on: self-hosted
     env:
       CCOMPILER: clang-16
       CXXCOMPILER: clang++-16
@@ -664,37 +664,17 @@ jobs:
       GITHUB_REPOSITORY: ${{ github.repository }}
       RUN_BIG_BENCHMARK: ${{ contains(github.event.pull_request.labels.*.name, 'Performance') }}
     steps: 
-      - name: Enable data.osm.pbf cache
-        if: ${{ ! env.RUN_BIG_BENCHMARK }}        
-        uses: actions/cache@v4
-        with:
-          path: ~/data.osm.pbf
-          key: v1-data-osm-pbf
-          restore-keys: |
-            v1-data-osm-pbf
-      - name: Enable compiler cache
-        uses: actions/cache@v4
-        with:
-          path: ~/.ccache
-          key: v1-ccache-benchmarks-${{ github.sha }}
-          restore-keys: |
-            v1-ccache-benchmarks-
-      - name: Enable Conan cache
-        uses: actions/cache@v4
-        with:
-          path: ~/.conan
-          key: v1-conan-benchmarks-${{ github.sha }}
-          restore-keys: |
-            v1-conan-benchmarks-
       - name: Checkout PR Branch
         uses: actions/checkout@v4
         with:
           ref: ${{ github.head_ref }}
           path: pr
-      - name: Install dependencies
-        run: | 
-          python3 -m pip install "conan<2.0.0" "requests==2.31.0" "numpy==1.26.4" --break-system-packages
-          sudo apt-get update -y && sudo apt-get install ccache
+      - name: Activate virtualenv
+        run: |
+          python3 -m venv .venv
+          source .venv/bin/activate
+          echo PATH=$PATH >> $GITHUB_ENV
+          pip install "conan<2.0.0" "requests==2.31.0" "numpy==1.26.4" 
       - name: Prepare data
         run: |
           if [ "$RUN_BIG_BENCHMARK" = "true" ]; then
@@ -740,32 +720,41 @@ jobs:
           make -C test/data 
       # we run benchmarks in tmpfs to avoid impact of disk IO
       - name: Create folder for tmpfs 
-        run: mkdir -p /opt/benchmarks
+        run: |
+          # if by any chance it was mounted before(e.g. due to previous job failed), unmount it
+          sudo umount ~/benchmarks | true
+          rm -rf ~/benchmarks 
+          mkdir -p ~/benchmarks
+      # see https://llvm.org/docs/Benchmarking.html
       - name: Run PR Benchmarks 
         run: |
-          sudo mount -t tmpfs -o size=4g none /opt/benchmarks
-          cp -rf pr/build /opt/benchmarks/build
-          mkdir -p /opt/benchmarks/test
-          cp -rf pr/test/data /opt/benchmarks/test/data
-          cp -rf pr/profiles /opt/benchmarks/profiles
-
-          ./pr/scripts/ci/run_benchmarks.sh -f /opt/benchmarks -r $(pwd)/pr_results -s $(pwd)/pr -b /opt/benchmarks/build -o ~/data.osm.pbf -g ~/gps_traces.csv
-          sudo umount /opt/benchmarks
+          sudo cset shield -c 2-3 -k on
+          sudo mount -t tmpfs -o size=4g none ~/benchmarks
+          cp -rf pr/build ~/benchmarks/build
+          mkdir -p ~/benchmarks/test
+          cp -rf pr/test/data ~/benchmarks/test/data
+          cp -rf pr/profiles ~/benchmarks/profiles
+
+          sudo cset shield --exec -- ./pr/scripts/ci/run_benchmarks.sh -f ~/benchmarks -r $(pwd)/pr_results -s $(pwd)/pr -b ~/benchmarks/build -o ~/data.osm.pbf -g ~/gps_traces.csv
+          sudo umount ~/benchmarks
+          sudo cset shield --reset
       - name: Run Base Benchmarks
         run: |
-          sudo mount -t tmpfs -o size=4g none /opt/benchmarks
-          cp -rf base/build /opt/benchmarks/build
-          mkdir -p /opt/benchmarks/test
-          cp -rf base/test/data /opt/benchmarks/test/data
-          cp -rf base/profiles /opt/benchmarks/profiles
+          sudo cset shield -c 2-3 -k on
+          sudo mount -t tmpfs -o size=4g none ~/benchmarks
+          cp -rf base/build ~/benchmarks/build
+          mkdir -p ~/benchmarks/test
+          cp -rf base/test/data ~/benchmarks/test/data
+          cp -rf base/profiles ~/benchmarks/profiles
 
           # TODO: remove it when base branch will have this file at needed location
-          if [ ! -f /opt/benchmarks/test/data/portugal_to_korea.json ]; then
-            cp base/src/benchmarks/portugal_to_korea.json /opt/benchmarks/test/data/portugal_to_korea.json
+          if [ ! -f ~/benchmarks/test/data/portugal_to_korea.json ]; then
+            cp base/src/benchmarks/portugal_to_korea.json ~/benchmarks/test/data/portugal_to_korea.json
           fi
           # we intentionally use scripts from PR branch to be able to update them and see results in the same PR
-          ./pr/scripts/ci/run_benchmarks.sh -f /opt/benchmarks -r $(pwd)/base_results -s $(pwd)/pr -b /opt/benchmarks/build -o ~/data.osm.pbf -g ~/gps_traces.csv
-          sudo umount /opt/benchmarks
+          sudo cset shield --exec -- cset shield --exec -- ./pr/scripts/ci/run_benchmarks.sh -f ~/benchmarks -r $(pwd)/base_results -s $(pwd)/pr -b ~/benchmarks/build -o ~/data.osm.pbf -g ~/gps_traces.csv
+          sudo umount ~/benchmarks
+          sudo cset shield --reset
       - name: Post Benchmark Results
         run: |
           python3 pr/scripts/ci/post_benchmark_results.py base_results pr_results

diff --git a/include/customizer/cell_customizer.hpp b/include/customizer/cell_customizer.hpp
@@ -116,8 +116,8 @@ class CellCustomizer
                    const std::vector<bool> &allowed_nodes,
                    CellMetric &metric) const
     {
-        Heap heap_exemplar(graph.GetNumberOfNodes());
-        HeapPtr heaps(heap_exemplar);
+        const auto number_of_nodes = graph.GetNumberOfNodes();
+        HeapPtr heaps([number_of_nodes] { return Heap{number_of_nodes}; });
 
         for (std::size_t level = 1; level < partition.GetNumberOfLevels(); ++level)
         {

diff --git a/include/util/pool_allocator.hpp b/include/util/pool_allocator.hpp
@@ -0,0 +1,158 @@
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <bit>
+#include <boost/assert.hpp>
+#include <cstddef>
+#include <cstdlib>
+#include <memory>
+#include <mutex>
+#include <new>
+#include <vector>
+
+namespace osrm::util
+{
+
+inline size_t align_up(size_t n, size_t alignment)
+{
+    return (n + alignment - 1) & ~(alignment - 1);
+}
+
+inline size_t get_next_power_of_two_exponent(size_t n)
+{
+    BOOST_ASSERT(n > 0);
+    return (sizeof(size_t) * 8) - std::countl_zero(n - 1);
+}
+
+class MemoryPool
+{
+  private:
+    constexpr static size_t MIN_CHUNK_SIZE_BYTES = 4096;
+
+  public:
+    static std::shared_ptr<MemoryPool> instance()
+    {
+        static thread_local std::shared_ptr<MemoryPool> instance;
+        if (!instance)
+        {
+            instance = std::shared_ptr<MemoryPool>(new MemoryPool());
+        }
+        return instance;
+    }
+
+    template <typename T> T *allocate(std::size_t items_count)
+    {
+        static_assert(alignof(T) <= alignof(std::max_align_t),
+                      "Type is over-aligned for this allocator.");
+
+        size_t free_list_index = get_next_power_of_two_exponent(items_count * sizeof(T));
+        auto &free_list = free_lists_[free_list_index];
+        if (free_list.empty())
+        {
+            size_t block_size_in_bytes = 1u << free_list_index;
+            block_size_in_bytes = align_up(block_size_in_bytes, alignof(std::max_align_t));
+            // check if there is space in current memory chunk
+            if (current_chunk_left_bytes_ < block_size_in_bytes)
+            {
+                allocate_chunk(block_size_in_bytes);
+            }
+
+            free_list.push_back(current_chunk_ptr_);
+            current_chunk_left_bytes_ -= block_size_in_bytes;
+            current_chunk_ptr_ += block_size_in_bytes;
+        }
+        auto ptr = reinterpret_cast<T *>(free_list.back());
+        free_list.pop_back();
+        return ptr;
+    }
+
+    template <typename T> void deallocate(T *p, std::size_t n) noexcept
+    {
+        size_t free_list_index = get_next_power_of_two_exponent(n * sizeof(T));
+        // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion)
+        free_lists_[free_list_index].push_back(reinterpret_cast<void *>(p));
+    }
+
+    ~MemoryPool()
+    {
+        for (auto chunk : chunks_)
+        {
+            // NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
+            std::free(chunk);
+        }
+    }
+
+  private:
+    MemoryPool() = default;
+    MemoryPool(const MemoryPool &) = delete;
+    MemoryPool &operator=(const MemoryPool &) = delete;
+
+    void allocate_chunk(size_t bytes)
+    {
+        auto chunk_size = std::max(bytes, MIN_CHUNK_SIZE_BYTES);
+        // NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
+        void *chunk = std::malloc(chunk_size);
+        if (!chunk)
+        {
+            throw std::bad_alloc();
+        }
+        chunks_.push_back(chunk);
+        current_chunk_ptr_ = static_cast<uint8_t *>(chunk);
+        current_chunk_left_bytes_ = chunk_size;
+    }
+
+    // we have 64 free lists, one for each possible power of two
+    std::array<std::vector<void *>, sizeof(std::size_t) * 8> free_lists_;
+
+    // list of allocated memory chunks, we don't free them until the pool is destroyed
+    std::vector<void *> chunks_;
+
+    uint8_t *current_chunk_ptr_ = nullptr;
+    size_t current_chunk_left_bytes_ = 0;
+};
+
+template <typename T> class PoolAllocator
+{
+  public:
+    using value_type = T;
+
+    PoolAllocator() noexcept : pool(MemoryPool::instance()){};
+
+    template <typename U>
+    PoolAllocator(const PoolAllocator<U> &) noexcept : pool(MemoryPool::instance())
+    {
+    }
+
+    template <typename U> struct rebind
+    {
+        using other = PoolAllocator<U>;
+    };
+
+    T *allocate(std::size_t n) { return pool->allocate<T>(n); }
+
+    void deallocate(T *p, std::size_t n) noexcept { pool->deallocate<T>(p, n); }
+
+    PoolAllocator(const PoolAllocator &) = default;
+    PoolAllocator &operator=(const PoolAllocator &) = default;
+    PoolAllocator(PoolAllocator &&) noexcept = default;
+    PoolAllocator &operator=(PoolAllocator &&) noexcept = default;
+
+  private:
+    // using shared_ptr guarantees that memory pool won't be destroyed before all allocators using
+    // it (important if there are static instances of PoolAllocator)
+    std::shared_ptr<MemoryPool> pool;
+};
+template <typename T, typename U>
+bool operator==(const PoolAllocator<T> &, const PoolAllocator<U> &)
+{
+    return true;
+}
+
+template <typename T, typename U>
+bool operator!=(const PoolAllocator<T> &, const PoolAllocator<U> &)
+{
+    return false;
+}
+
+} // namespace osrm::util
diff --git a/include/util/query_heap.hpp b/include/util/query_heap.hpp
@@ -1,17 +1,16 @@
 #ifndef OSRM_UTIL_QUERY_HEAP_HPP
 #define OSRM_UTIL_QUERY_HEAP_HPP
 
+#include "util/pool_allocator.hpp"
+#include <algorithm>
 #include <boost/assert.hpp>
 #include <boost/heap/d_ary_heap.hpp>
-
-#include <algorithm>
 #include <cstdint>
 #include <limits>
 #include <map>
 #include <optional>
 #include <unordered_map>
 #include <vector>
-
 namespace osrm::util
 {
 
@@ -56,7 +55,11 @@ template <typename NodeID, typename Key> class UnorderedMapStorage
     void Clear() { nodes.clear(); }
 
   private:
-    std::unordered_map<NodeID, Key> nodes;
+    template <typename K, typename V>
+    using UnorderedMap = std::
+        unordered_map<K, V, std::hash<K>, std::equal_to<K>, PoolAllocator<std::pair<const K, V>>>;
+
+    UnorderedMap<NodeID, Key> nodes;
 };
 
 template <typename NodeID,
@@ -142,10 +145,12 @@ class QueryHeap
             return weight > other.weight;
         }
     };
+
     using HeapContainer = boost::heap::d_ary_heap<HeapData,
                                                   boost::heap::arity<4>,
                                                   boost::heap::mutable_<true>,
-                                                  boost::heap::compare<std::greater<HeapData>>>;
+                                                  boost::heap::compare<std::greater<HeapData>>,
+                                                  boost::heap::allocator<PoolAllocator<HeapData>>>;
     using HeapHandle = typename HeapContainer::handle_type;
 
   public:
@@ -160,6 +165,9 @@ class QueryHeap
         Data data;
     };
 
+    QueryHeap(const QueryHeap &other) = delete;
+    QueryHeap(QueryHeap &&other) = delete;
+
     template <typename... StorageArgs> explicit QueryHeap(StorageArgs... args) : node_index(args...)
     {
         Clear();