diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 4352a0bca53..d3bad4d7c48 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -93,6 +93,7 @@ add_exported_library(drmemtrace_simulator STATIC simulator/cache.cpp simulator/cache_lru.cpp simulator/cache_fifo.cpp + simulator/cache_miss_analyzer.cpp simulator/caching_device.cpp simulator/caching_device_stats.cpp simulator/cache_stats.cpp @@ -174,6 +175,7 @@ install_client_nonDR_header(drmemtrace tools/histogram_create.h) install_client_nonDR_header(drmemtrace tools/reuse_time_create.h) install_client_nonDR_header(drmemtrace tools/basic_counts_create.h) install_client_nonDR_header(drmemtrace tools/opcode_mix_create.h) +install_client_nonDR_header(drmemtrace simulator/cache_simulator.h) install_client_nonDR_header(drmemtrace simulator/cache_simulator_create.h) install_client_nonDR_header(drmemtrace simulator/tlb_simulator_create.h) install_client_nonDR_header(drmemtrace tracer/raw2trace.h) @@ -443,10 +445,24 @@ if (BUILD_TESTS) add_win32_flags(tool.drcachesim.unit_tests) add_test(NAME tool.drcachesim.unit_tests COMMAND tool.drcachesim.unit_tests) + # FIXME i#2007: fails to link on A64 # XXX i#1997: dynamorio_static is not supported on Mac yet # FIXME i#2949: gcc 7.3 fails to link certain configs if (NOT AARCH64 AND NOT APPLE AND NOT DISABLE_FOR_BUG_2949) + # Tests for the cache miss analyzer. + add_executable(tool.drcachesim.miss_analyzer_unit_test tests/cache_miss_analyzer_test.cpp) + if (ZLIB_FOUND) + target_link_libraries(tool.drcachesim.miss_analyzer_unit_test drmemtrace_simulator + drmemtrace_static drmemtrace_analyzer ${ZLIB_LIBRARIES}) + else () + target_link_libraries(tool.drcachesim.miss_analyzer_unit_test drmemtrace_simulator + drmemtrace_static drmemtrace_analyzer) + endif () + add_win32_flags(tool.drcachesim.miss_analyzer_unit_test) + add_test(NAME tool.drcachesim.miss_analyzer_unit_test + COMMAND tool.drcachesim.miss_analyzer_unit_test) + add_executable(tool.drcacheoff.burst_static tests/burst_static.cpp) configure_DynamoRIO_static(tool.drcacheoff.burst_static) use_DynamoRIO_static_client(tool.drcacheoff.burst_static drmemtrace_static) diff --git a/clients/drcachesim/simulator/cache_miss_analyzer.cpp b/clients/drcachesim/simulator/cache_miss_analyzer.cpp new file mode 100644 index 00000000000..c9044cb8d3e --- /dev/null +++ b/clients/drcachesim/simulator/cache_miss_analyzer.cpp @@ -0,0 +1,173 @@ +/* ********************************************************** + * Copyright (c) 2015-2018 Google, LLC All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE, LLC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#include "cache_miss_analyzer.h" + +#include + +const char *cache_miss_stats_t::kNTA = "nta"; +const char *cache_miss_stats_t::kT0 = "t0"; + +analysis_tool_t * +cache_miss_analyzer_create(const cache_simulator_knobs_t &knobs, + unsigned int miss_count_threshold, double miss_frac_threshold, + double confidence_threshold) +{ + return new cache_miss_analyzer_t(knobs, miss_count_threshold, miss_frac_threshold, + confidence_threshold); +} + +cache_miss_stats_t::cache_miss_stats_t(bool warmup_enabled, unsigned int line_size, + unsigned int miss_count_threshold, + double miss_frac_threshold, + double confidence_threshold) + : cache_stats_t("", warmup_enabled) + , kLineSize(line_size) + , kMissCountThreshold(miss_count_threshold) + , kMissFracThreshold(miss_frac_threshold) + , kConfidenceThreshold(confidence_threshold) +{ + // Setting this variable to true ensures that the dump_miss() function below + // gets called during cache simulation on a cache miss. + dump_misses = true; +} + +void +cache_miss_stats_t::reset() +{ + cache_stats_t::reset(); + pc_cache_misses.clear(); + total_misses = 0; +} + +void +cache_miss_stats_t::dump_miss(const memref_t &memref) +{ + // If the operation causing the LLC miss is a memory read (load), insert + // the miss into the pc_cache_misses hash map and update + // the total_misses counter. + if (memref.data.type != TRACE_TYPE_READ) { + return; + } + + const addr_t pc = memref.data.pc; + const addr_t addr = memref.data.addr / kLineSize; + pc_cache_misses[pc].push_back(addr); + total_misses++; +} + +std::vector +cache_miss_stats_t::generate_recommendations() +{ + unsigned int miss_count_threshold = + static_cast(kMissFracThreshold * total_misses); + if (miss_count_threshold > kMissCountThreshold) { + miss_count_threshold = kMissCountThreshold; + } + + // Find loads that should be analyzed and analyze them. + std::vector recommendations; + for (auto &pc_cache_misses_it : pc_cache_misses) { + std::vector &cache_misses = pc_cache_misses_it.second; + + if (cache_misses.size() >= miss_count_threshold) { + const int stride = check_for_constant_stride(cache_misses); + if (stride != 0) { + prefetching_recommendation_t *recommendation = + new prefetching_recommendation_t; + recommendation->pc = pc_cache_misses_it.first; + recommendation->stride = stride; + recommendation->locality = kNTA; + recommendations.push_back(recommendation); + } + } + } + + return recommendations; +} + +int +cache_miss_stats_t::check_for_constant_stride( + const std::vector &cache_misses) const +{ + std::unordered_map stride_counts; + + // Find and count all strides in the misses stream. + for (unsigned int i = 1; i < cache_misses.size(); ++i) { + int stride = static_cast(cache_misses[i] - cache_misses[i - 1]); + if (stride != 0) { + stride_counts[stride]++; + } + } + + // Find the most occurring stride. + int max_count = 0; + int max_count_stride = 0; + for (auto &stride_count : stride_counts) { + if (stride_count.second > max_count) { + max_count = stride_count.second; + max_count_stride = stride_count.first; + } + } + + // Return the most occurring stride if it meets the confidence threshold. + stride_counts.clear(); + if (max_count >= static_cast(kConfidenceThreshold * cache_misses.size())) { + return max_count_stride; + } else { + return 0; + } +} + +cache_miss_analyzer_t::cache_miss_analyzer_t(const cache_simulator_knobs_t &knobs, + unsigned int miss_count_threshold, + double miss_frac_threshold, + double confidence_threshold) + : cache_simulator_t(knobs) +{ + if (!success) { + return; + } + bool warmup_enabled = (knobs.warmup_refs > 0 || knobs.warmup_fraction > 0.0); + + delete llcaches["LL"]->get_stats(); + ll_stats = + new cache_miss_stats_t(warmup_enabled, knobs.line_size, miss_count_threshold, + miss_frac_threshold, confidence_threshold); + llcaches["LL"]->set_stats(ll_stats); +} + +std::vector +cache_miss_analyzer_t::generate_recommendations() +{ + return ll_stats->generate_recommendations(); +} diff --git a/clients/drcachesim/simulator/cache_miss_analyzer.h b/clients/drcachesim/simulator/cache_miss_analyzer.h new file mode 100644 index 00000000000..7ca1cde242b --- /dev/null +++ b/clients/drcachesim/simulator/cache_miss_analyzer.h @@ -0,0 +1,154 @@ +/* ********************************************************** + * Copyright (c) 2015-2018 Google, LLC All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE, LLC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* cache_miss_analyzer: finds the load instructions suffering from + * a significant number of last-level cache (LLC) misses. In addition, + * it analyzes the data memory addresses accessed by these load instructions + * and identifies patterns that can be used in SW prefetching. + */ + +#ifndef _CACHE_MISS_ANALYZER_H_ +#define _CACHE_MISS_ANALYZER_H_ 1 + +#include +#include +#include +#include + +#include "cache_simulator.h" +#include "cache_stats.h" +#include "../common/memref.h" + +// Represents the SW prefetching recommendation passed to the compiler. +struct prefetching_recommendation_t { + addr_t pc; // Load instruction's address. + int stride; // Prefetching stride/delta distance. + std::string locality; // Prefetching locality: one of "nta" or "t0". +}; + +class cache_miss_stats_t : public cache_stats_t { +public: + // Constructor - params description: + // - warmup_enabled: Indicates whether the caches need to be warmed up + // before stats and misses start being collected. + // - line_size: The cache line size in bytes. + // - miss_count_threshold: Threshold of misses count by a load instruction + // to be eligible for analysis. + // - miss_frac_threshold: Threshold of misses fraction by a load + // instruction to be eligible for analysis. + // - confidence_threshold: Confidence threshold to include a discovered + // pattern in the output results. + // Confidence in a discovered pattern for a load instruction is calculated + // as the fraction of the load's misses with the discovered pattern over + // all the load's misses. + cache_miss_stats_t(bool warmup_enabled = false, unsigned int line_size = 64, + unsigned int miss_count_threshold = 50000, + double miss_frac_threshold = 0.005, + double confidence_threshold = 0.75); + + cache_miss_stats_t & + operator=(const cache_miss_stats_t &) + { + return *this; + } + + virtual void + reset(); + + std::vector + generate_recommendations(); + +protected: + virtual void + dump_miss(const memref_t &memref); + +private: + // Two locality levels for prefetching are supported: nta and t0. + static const char *kNTA; + static const char *kT0; + + // Cache line size. + const unsigned int kLineSize; + + // A load instruction should be analyzed if its total number/fraction of LLC + // misses is equal to or larger than one of the two threshold values below: + const unsigned int kMissCountThreshold; // Absolute count. + const double kMissFracThreshold; // Fraction of all LLC misses. + + // Confidence threshold for recording a cache misses stride. + // Confidence in a discovered pattern for a load instruction is calculated + // as the fraction of the load's misses with the discovered pattern over + // all the load's misses. + const double kConfidenceThreshold; + + // A function to analyze cache misses in search of a constant stride. + // The function returns a nonzero stride value if it finds one that + // satisfies the confidence threshold and returns 0 otherwise. + int + check_for_constant_stride(const std::vector &cache_misses) const; + + // A hash map storing the data cache line addresses accessed by load + // instructions that miss in the LLC. + // Key is the PC of the load instruction. + // Value is a vector of data memory cache line addresses. + std::unordered_map> pc_cache_misses; + + // Total number of LLC misses added to the hash map above. + int total_misses = 0; +}; + +class cache_miss_analyzer_t : public cache_simulator_t { +public: + // Constructor: + // - cache_simulator_knobs_t: Encapsulates the cache simulator params. + // - miss_count_threshold: Threshold of miss count by a load instruction + // to be eligible for analysis. + // - miss_frac_threshold: Threshold of miss fraction by a load + // instruction to be eligible for analysis. + // - confidence_threshold: Confidence threshold to include a discovered + // pattern in the output results. + // Confidence in a discovered pattern for a load instruction is calculated + // as the fraction of the load's misses with the discovered pattern over + // all the load's misses. + cache_miss_analyzer_t(const cache_simulator_knobs_t &knobs, + unsigned int miss_count_threshold = 50000, + double miss_frac_threshold = 0.005, + double confidence_threshold = 0.75); + + std::vector + generate_recommendations(); + +private: + cache_miss_stats_t *ll_stats; +}; + +#endif /* _CACHE_MISS_ANALYZER_H_ */ diff --git a/clients/drcachesim/simulator/cache_simulator_create.h b/clients/drcachesim/simulator/cache_simulator_create.h index 9300f867a68..a292d645f7e 100644 --- a/clients/drcachesim/simulator/cache_simulator_create.h +++ b/clients/drcachesim/simulator/cache_simulator_create.h @@ -99,4 +99,10 @@ cache_simulator_create(const cache_simulator_knobs_t &knobs); analysis_tool_t * cache_simulator_create(const std::string &config_file); +/** Creates an instance of a cache miss analyzer. */ +analysis_tool_t * +cache_miss_analyzer_create(const cache_simulator_knobs_t &knobs, + uint64_t miss_count_threshold, double miss_frac_threshold, + double confidence_threshold); + #endif /* _CACHE_SIMULATOR_CREATE_H_ */ diff --git a/clients/drcachesim/tests/cache_miss_analyzer_test.cpp b/clients/drcachesim/tests/cache_miss_analyzer_test.cpp new file mode 100644 index 00000000000..bd01ddbfea0 --- /dev/null +++ b/clients/drcachesim/tests/cache_miss_analyzer_test.cpp @@ -0,0 +1,206 @@ +/* ********************************************************** + * Copyright (c) 2015-2018 Google, LLC All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE, LLC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#include + +#include "../simulator/cache_miss_analyzer.h" +#include "../simulator/cache_simulator.h" +#include "../common/memref.h" + +static memref_t +generate_mem_ref(const addr_t addr, const addr_t pc) +{ + memref_t memref; + memref.data.type = TRACE_TYPE_READ; + memref.data.pid = 11111; + memref.data.tid = 22222; + memref.data.addr = addr; + memref.data.size = 8; + memref.data.pc = pc; + return memref; +} + +// A test with no dominant stride. +void +no_dominant_stride() +{ + const unsigned int kLineSize = 64; + + // Create the cache simulator knobs object. + cache_simulator_knobs_t knobs; + knobs.line_size = 64; + knobs.LL_size = 1024 * 1024; + knobs.data_prefetcher = "none"; + + // Create the cache miss analyzer object. + cache_miss_analyzer_t analyzer(knobs, 1000, 0.01, 0.75); + + // Analyze a stream of memory load references with no dominant stride. + addr_t addr = 0x1000; + for (int i = 0; i < 100000; ++i) { + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += kLineSize; + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += (kLineSize * 3); + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += (kLineSize * 5); + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += (kLineSize * 7); + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += (kLineSize * 5); + } + + // Generate the analyzer's result and check it. + std::vector recommendations = + analyzer.generate_recommendations(); + if (recommendations.empty()) { + std::cout << "no_dominant_stride test passed." << std::endl; + } else { + std::cerr << "no_dominant_stride test failed." << std::endl; + } +} + +// A test with one dominant stride. +void +one_dominant_stride() +{ + const int kStride = 7; + const unsigned int kLineSize = 64; + + // Create the cache simulator knobs object. + cache_simulator_knobs_t knobs; + knobs.line_size = kLineSize; + knobs.LL_size = 1024 * 1024; + knobs.data_prefetcher = "none"; + + // Create the cache miss analyzer object. + cache_miss_analyzer_t analyzer(knobs, 1000, 0.01, 0.75); + + // Analyze a stream of memory load references with one dominant stride. + addr_t addr = 0x1000; + for (int i = 0; i < 100000; ++i) { + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += (kLineSize * kStride); + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += (kLineSize * kStride); + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += (kLineSize * kStride); + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += (kLineSize * kStride); + analyzer.process_memref(generate_mem_ref(addr, 0xAAAA)); + addr += 1000; + } + + // Generate the analyzer's result and check it. + std::vector recommendations = + analyzer.generate_recommendations(); + if (recommendations.size() == 1) { + if (recommendations[0]->pc == 0xAAAA && recommendations[0]->stride == kStride) { + std::cout << "one_dominant_stride test passed." << std::endl; + } else { + std::cerr << "one_dominant_stride test failed: wrong recommendation: " + << "pc=" << recommendations[0]->pc + << ", stride=" << recommendations[0]->stride << std::endl; + } + } else { + std::cerr << "one_dominant_stride test failed: number of recommendations " + << "should be exactly 1, but was " << recommendations.size() + << std::endl; + } +} + +// A test with two dominant strides. +void +two_dominant_strides() +{ + const int kStride1 = 3; + const int kStride2 = 11; + const unsigned int kLineSize = 64; + + // Create the cache simulator knobs object. + cache_simulator_knobs_t knobs; + knobs.line_size = kLineSize; + knobs.LL_size = 1024 * 1024; + knobs.data_prefetcher = "none"; + + // Create the cache miss analyzer object. + cache_miss_analyzer_t analyzer(knobs, 1000, 0.01, 0.75); + + // Analyze a stream of memory load references with two dominant strides. + addr_t addr1 = 0x1000; + addr_t addr2 = 0x2000; + for (int i = 0; i < 100000; ++i) { + analyzer.process_memref(generate_mem_ref(addr1, 0xAAAA)); + addr1 += (kLineSize * kStride1); + analyzer.process_memref(generate_mem_ref(addr1, 0xAAAA)); + addr1 += (kLineSize * kStride1); + analyzer.process_memref(generate_mem_ref(addr2, 0xBBBB)); + addr2 += (kLineSize * kStride2); + analyzer.process_memref(generate_mem_ref(addr1, 0xAAAA)); + addr1 += (kLineSize * kStride1); + analyzer.process_memref(generate_mem_ref(addr2, 0xBBBB)); + addr2 += (kLineSize * kStride2); + analyzer.process_memref(generate_mem_ref(addr2, 0xBBBB)); + addr2 += (kLineSize * kStride2); + } + + // Generate the analyzer's result and check it. + std::vector recommendations = + analyzer.generate_recommendations(); + if (recommendations.size() == 2) { + if ((recommendations[0]->pc == 0xAAAA && recommendations[0]->stride == kStride1 && + recommendations[1]->pc == 0xBBBB && + recommendations[1]->stride == kStride2) || + (recommendations[1]->pc == 0xAAAA && recommendations[1]->stride == kStride1 && + recommendations[0]->pc == 0xBBBB && + recommendations[0]->stride == kStride2)) { + std::cout << "two_dominant_strides test passed." << std::endl; + } else { + std::cerr << "two_dominant_strides test failed: wrong recommendations." + << std::endl; + } + } else { + std::cerr << "two_dominant_strides test failed: number of recommendations " + << "should be exactly 2, but was " << recommendations.size() + << std::endl; + } +} + +int +main(int argc, const char *argv[]) +{ + no_dominant_stride(); + one_dominant_stride(); + two_dominant_strides(); + + return 0; +}