From b1c7f88394b97f9a70ea840269ae4dae3192ac76 Mon Sep 17 00:00:00 2001 From: Eladash <18193363+elad335@users.noreply.github.com> Date: Sun, 10 Mar 2024 21:03:39 +0200 Subject: [PATCH] PPU LLVM: Recycle identical functions --- rpcs3/Emu/Cell/PPUAnalyser.cpp | 134 +++++++++++++++++++++++++++++++ rpcs3/Emu/Cell/PPUAnalyser.h | 2 + rpcs3/Emu/Cell/PPUThread.cpp | 52 ++++++++++-- rpcs3/Emu/Cell/PPUTranslator.cpp | 33 +++++++- rpcs3/Emu/Cell/PPUTranslator.h | 3 + 5 files changed, 214 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUAnalyser.cpp b/rpcs3/Emu/Cell/PPUAnalyser.cpp index 9e6ae1190674..6f989ef9bceb 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.cpp +++ b/rpcs3/Emu/Cell/PPUAnalyser.cpp @@ -2046,6 +2046,140 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b } ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size()); + + std::unordered_map> duplicate_data_map; + duplicate_map.clear(); + + for (auto& func : funcs) + { + if (func.size == 0 || func.size > 10000u) + { + continue; + } + + auto& data = duplicate_data_map[std::string_view{get_ptr(func.addr), func.size}]; + + const usz count = data.first; + + if (!count) + { + data.first++; + data.second = func.addr; + continue; + } + + if (!data.second) + { + continue; + } + + if (count == 1) + { + const u32 faddr = func.addr; + const u32 fend = func.addr + func.size; + + bool fail = false; + + //for (const auto [addr, size] : func.blocks) + const u32 addr = func.addr; + const u32 size = func.size; + { + if (size == 0) + { + continue; + } + + auto i_ptr = ensure(get_ptr(addr)); + + for (u32 i = addr; i < addr + size; i += 4, i_ptr++) + { + const ppu_opcode_t op{*i_ptr}; + const auto itype = s_ppu_itype.decode(op.opcode); + + if (itype != ppu_itype::BC && itype != ppu_itype::B) + { + if (i == fend - 4) + { + if (!(itype & ppu_itype::branch) && itype != ppu_itype::SC) + { + // Inserts a branch to following code + fail = true; + break; + } + } + + continue; + } + + const u32 target = (op.aa ? 0 : i) + (itype == ppu_itype::B ? +op.bt24 : +op.bt14); + + if (target >= fend || target < faddr) + { + fail = true; + break; + } + + if (itype == ppu_itype::BC && (op.bo & 0x14) != 0x14) + { + if (i == fend - 4) + { + // Can branch to next + fail = true; + break; + } + } + } + } + + if (fail) + { + data.first = 1; + data.second = 0; + continue; + } + } + + data.first++; + + // Choose the lowest function as the source + data.second = std::min(data.second, func.addr); + } + + usz dups_count = 0; + + for (auto& func : funcs) + { + if (func.size == 0 || func.size > 10000u) + { + continue; + } + + const auto data = ::at32(duplicate_data_map, std::string_view{get_ptr(func.addr), func.size}); + + if (data.first > 1) + { + duplicate_map[func.addr] = data.second; + + for (const auto [addr, size] : func.blocks) + { + if (size == 0 || addr >= func.addr + func.size) + { + continue; + } + + duplicate_map[addr] = data.second + (addr - func.addr); + } + + if (func.addr != data.second) + { + dups_count++; + } + + ppu_log.trace("Found PPU function duplicate: func 0x%x vs 0x%x (%d times) (size=%d)", func.addr, data.second, data.first, func.size); + } + } + + ppu_log.success("Function duplication count: %d/%d (%g%)", dups_count, duplicate_data_map.size(), dups_count * 100.0 / duplicate_data_map.size()); return true; } diff --git a/rpcs3/Emu/Cell/PPUAnalyser.h b/rpcs3/Emu/Cell/PPUAnalyser.h index c1515c65c1ff..114a7b00c23e 100644 --- a/rpcs3/Emu/Cell/PPUAnalyser.h +++ b/rpcs3/Emu/Cell/PPUAnalyser.h @@ -95,6 +95,7 @@ struct ppu_module std::vector funcs{}; std::deque> allocations; std::map addr_to_seg_index; + std::unordered_map duplicate_map; // Copy info without functions void copy_part(const ppu_module& info) @@ -107,6 +108,7 @@ struct ppu_module secs = info.secs; allocations = info.allocations; addr_to_seg_index = info.addr_to_seg_index; + duplicate_map = info.duplicate_map; } bool analyse(u32 lib_toc, u32 entry, u32 end, const std::basic_string& applied, std::function check_aborted = {}); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 49b8aa747eab..13d743ada108 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -4146,7 +4146,7 @@ extern void ppu_precompile(std::vector& dir_queue, std::vectorsecond; + } + // Fixup some information - entry.name = fmt::format("__0x%x", entry.addr - reloc); + entry.name = fmt::format("__0x%x", og_func - reloc); if (has_mfvscr && g_cfg.core.ppu_set_sat_bit) { @@ -4808,7 +4815,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size) settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented"); // Write version, hash, CPU, settings - fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); + fmt::append(obj_name, "v7-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu)); } if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped()) @@ -5037,6 +5044,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size) bool early_exit = false; + std::map func_ptr_map; + // Get and install function addresses for (const auto& func : info.funcs) { @@ -5054,12 +5063,29 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size) break; } - const auto name = fmt::format("__0x%x", func.addr - reloc); + u32 og_func = func.addr; + + if (auto it = info.duplicate_map.find(func.addr); it != info.duplicate_map.end()) + { + og_func = it->second; + } + + const auto name = fmt::format("__0x%x", og_func - reloc); + + ppu_intrp_func_t dummy{}; + ppu_intrp_func_t& func_ptr = is_first ? func_ptr_map[name] : dummy; // Try to locate existing function if it is not the first time - const auto addr = is_first ? ensure(reinterpret_cast(jit->get(name))) - : reinterpret_cast(ensure(jit_mod.funcs[index])); + const auto addr = is_first ? (func_ptr ? func_ptr : (reinterpret_cast(jit->get(name)))) + : reinterpret_cast(jit_mod.funcs[index]); + + if (!addr) + { + ppu_log.fatal("Failed to retrieve symbol address at 0x%x (duplicate=0x%x)", func.addr, info.duplicate_map.contains(func.addr) ? og_func : 0); + ensure(addr); + } + func_ptr = addr; jit_mod.funcs.emplace_back(addr); if (func.size == 4 && !BLR_func && *info.get_ptr(func.addr) == ppu_instructions::BLR()) @@ -5148,6 +5174,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co { if (func.size) { + if (auto it = module_part.duplicate_map.find(func.addr); it != module_part.duplicate_map.end() && it->second != it->first) + { + continue; + } + const auto f = cast(_module->getOrInsertFunction(func.name, _func).getCallee()); f->setCallingConv(CallingConv::GHC); f->addParamAttr(1, llvm::Attribute::NoAlias); @@ -5194,6 +5225,15 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co if (module_part.funcs[fi].size) { + const u32 faddr = module_part.funcs[fi].addr; + auto it = module_part.duplicate_map.find(faddr); + + if (it != module_part.duplicate_map.end() && it->second != faddr) + { + ppu_log.trace("LLVM: Function 0x%x was skipped (duplicate)", faddr); + continue; + } + // Translate if (const auto func = translator.Translate(module_part.funcs[fi])) { diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 6441349a1912..74381914d591 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -148,6 +148,7 @@ Function* PPUTranslator::Translate(const ppu_function& info) const u64 base = m_reloc ? m_reloc->addr : 0; m_addr = info.addr - base; m_attr = info.attr; + m_func_base = m_addr; // Don't emit check in small blocks without terminator bool need_check = info.size >= 16; @@ -304,13 +305,29 @@ Value* PPUTranslator::VecHandleResult(Value* val) Value* PPUTranslator::GetAddr(u64 _add) { - if (m_reloc) + const auto old_cia = std::exchange(m_cia, nullptr); + + const bool is_duplicate = m_info.duplicate_map.contains(m_func_base); + const auto cia_add = is_duplicate ? ZExt(RegLoad(m_cia)) : nullptr; + const u32 inst_diff = is_duplicate ? m_addr - m_func_base : m_addr; + + // Restore value + m_cia = old_cia; + + Value* addr = nullptr; + + if (is_duplicate) + { + // Add to current CIA + return m_ir->CreateAdd(m_ir->getInt64(inst_diff + _add), cia_add); + } + else if (m_reloc) { // Load segment address from global variable, compute actual instruction address - return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_seg0); + return m_ir->CreateAdd(m_ir->getInt64(inst_diff + _add), m_seg0); } - return m_ir->getInt64(m_addr + _add); + return m_ir->getInt64(inst_diff + _add); } Type* PPUTranslator::ScaleType(Type* type, s32 pow2) @@ -419,7 +436,15 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) if (!indirect) { - callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target_last - base), type); + const auto it = m_info.duplicate_map.find(target_last); + const u32 first_func = it == m_info.duplicate_map.end() ? target_last : it->second; + + if (base) + { + ensure(first_func >= base && target_last >= base); + } + + callee = m_module->getOrInsertFunction(fmt::format("__0x%x", first_func - base), type); cast(callee.getCallee())->setCallingConv(CallingConv::GHC); } } diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index d72dc1de20c7..805aaa1df229 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -28,6 +28,9 @@ class PPUTranslator final : public cpu_translator // Current position-independent address u64 m_addr = 0; + // Function start + u64 m_func_base = 0; + // Function attributes bs_t m_attr{};