Skip to content

Commit

Permalink
SPU LLVM: Small FCGT optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
Whatcookie authored and elad335 committed Feb 22, 2025
1 parent 86a832d commit bd49c6b
Showing 1 changed file with 13 additions and 10 deletions.
23 changes: 13 additions & 10 deletions rpcs3/Emu/Cell/SPULLVMRecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6234,14 +6234,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
const value_t<f32[4]> ab[2]{a, b};

std::bitset<2> safe_int_compare(0);
std::bitset<2> safe_nonzero_compare(0);
std::bitset<2> safe_finite_compare(0);

for (u32 i = 0; i < 2; i++)
{
if (auto [ok, data] = get_const_vector(ab[i].value, m_pos, __LINE__ + i); ok)
{
safe_int_compare.set(i);
safe_nonzero_compare.set(i);
safe_finite_compare.set(i);

for (u32 j = 0; j < 4; j++)
{
Expand All @@ -6256,7 +6256,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// we don't produce "extended range" values the same way as real hardware, it's not safe to apply
// this optimization for values outside of the range of x86 floating point hardware.
safe_int_compare.reset(i);
if (!exponent) safe_nonzero_compare.reset(i);
if ((value & 0x7fffffffu) >= 0x7f7ffffeu) safe_finite_compare.reset(i);
}
}
}
Expand All @@ -6267,17 +6267,20 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
}

const auto ai = eval(bitcast<s32[4]>(a));
const auto bi = eval(bitcast<s32[4]>(b));

if (!safe_nonzero_compare.any())
if (safe_finite_compare.test(1))
{
return eval(sext<s32[4]>(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi)));
return eval(sext<s32[4]>(fcmp_uno(clamp_negative_smax(a) > b)));
}
else

if (safe_finite_compare.test(0))
{
return eval(sext<s32[4]>(select((ai & bi) >= 0, ai > bi, ai < bi)));
return eval(sext<s32[4]>(fcmp_ord(a > clamp_smax(b))));
}

const auto ai = eval(bitcast<s32[4]>(a));
const auto bi = eval(bitcast<s32[4]>(b));

return eval(sext<s32[4]>(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi)));
});

set_vr(op.rt, fcgt(get_vr<f32[4]>(op.ra), get_vr<f32[4]>(op.rb)));
Expand Down

0 comments on commit bd49c6b

Please sign in to comment.