From 9c8b0678a5c15b745251accf63ebaa0f474d2482 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 13 Jun 2024 14:07:01 -0700 Subject: [PATCH] [a64] Optimize `OPCODE_SPLAT` with `MOVI`/`FMOV` Moves the `FMOV` constant functions into `a64_util` so it is available to other translation units. Optimize constant-splats with conditional use of `MOVI` and `FMOV`. --- src/xenia/cpu/backend/a64/a64_emitter.cc | 69 +------------------- src/xenia/cpu/backend/a64/a64_seq_vector.cc | 48 +++++++++++--- src/xenia/cpu/backend/a64/a64_util.h | 72 ++++++++++++++++++++- 3 files changed, 110 insertions(+), 79 deletions(-) diff --git a/src/xenia/cpu/backend/a64/a64_emitter.cc b/src/xenia/cpu/backend/a64/a64_emitter.cc index 925e8bb9f5..e835f4affc 100644 --- a/src/xenia/cpu/backend/a64/a64_emitter.cc +++ b/src/xenia/cpu/backend/a64/a64_emitter.cc @@ -8,6 +8,7 @@ */ #include "xenia/cpu/backend/a64/a64_emitter.h" +#include "xenia/cpu/backend/a64/a64_util.h" #include @@ -810,74 +811,6 @@ uintptr_t A64Emitter::GetVConstPtr(VConst id) const { return GetVConstPtr() + GetVConstOffset(id); } -// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV -// returns false if the value cannot be represented -// C2.2.3 Modified immediate constants in A64 floating-point instructions -// abcdefgh -// V -// aBbbbbbc defgh000 00000000 00000000 -// B = NOT(b) -static bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) { - const uint32_t sign = (u32 >> 31) & 1; - int32_t exp = ((u32 >> 23) & 0xff) - 127; - int64_t mantissa = u32 & 0x7fffff; - - // Too many mantissa bits - if (mantissa & 0x7ffff) { - return false; - } - // Too many exp bits - if (exp < -3 || exp > 4) { - return false; - } - - // mantissa = (16 + e:f:g:h) / 16. - mantissa >>= 19; - if ((mantissa & 0b1111) != mantissa) { - return false; - } - - // exp = (NOT(b):c:d) - 3 - exp = ((exp + 3) & 0b111) ^ 0b100; - - fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa)); - return true; -} - -// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV -// returns false if the value cannot be represented -// C2.2.3 Modified immediate constants in A64 floating-point instructions -// abcdefgh -// V -// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000 -// B = NOT(b) -static bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) { - const uint32_t sign = (u64 >> 63) & 1; - int32_t exp = ((u64 >> 52) & 0x7ff) - 1023; - int64_t mantissa = u64 & 0xfffffffffffffULL; - - // Too many mantissa bits - if (mantissa & 0xffffffffffffULL) { - return false; - } - // Too many exp bits - if (exp < -3 || exp > 4) { - return false; - } - - // mantissa = (16 + e:f:g:h) / 16. - mantissa >>= 48; - if ((mantissa & 0b1111) != mantissa) { - return false; - } - - // exp = (NOT(b):c:d) - 3 - exp = ((exp + 3) & 0b111) ^ 0b100; - - fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa)); - return true; -} - // Implies possible StashV(0, ...)! void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) { if (!v.low && !v.high) { diff --git a/src/xenia/cpu/backend/a64/a64_seq_vector.cc b/src/xenia/cpu/backend/a64/a64_seq_vector.cc index 6828437235..abc4688ace 100644 --- a/src/xenia/cpu/backend/a64/a64_seq_vector.cc +++ b/src/xenia/cpu/backend/a64/a64_seq_vector.cc @@ -8,6 +8,7 @@ */ #include "xenia/cpu/backend/a64/a64_sequences.h" +#include "xenia/cpu/backend/a64/a64_util.h" #include #include @@ -1026,12 +1027,7 @@ EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32); struct SPLAT_I8 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { - if (i.src1.constant() <= 0xFF) { - e.MOVI(i.dest.reg().B16(), i.src1.constant()); - return; - } - e.MOV(W0, i.src1.constant()); - e.DUP(i.dest.reg().B16(), W0); + e.MOVI(i.dest.reg().B16(), i.src1.constant()); } else { e.DUP(i.dest.reg().B16(), i.src1); } @@ -1040,9 +1036,12 @@ struct SPLAT_I8 : Sequence> { struct SPLAT_I16 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { - if (i.src1.constant() <= 0xFF) { + if ((i.src1.constant() & 0xFF'00) == 0) { e.MOVI(i.dest.reg().H8(), i.src1.constant()); return; + } else if ((i.src1.constant() & 0x00'FF) == 0) { + e.MOVI(i.dest.reg().H8(), i.src1.constant(), oaknut::util::LSL, 8); + return; } e.MOV(W0, i.src1.constant()); e.DUP(i.dest.reg().H8(), W0); @@ -1054,9 +1053,22 @@ struct SPLAT_I16 : Sequence> { struct SPLAT_I32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { - if (i.src1.constant() <= 0xFF) { + oaknut::FImm8 fp8(0); + if (f32_to_fimm8(i.src1.value->constant.u32, fp8)) { + e.FMOV(i.dest.reg().S4(), fp8); + return; + } else if ((i.src1.constant() & 0xFF'FF'FF'00) == 0) { e.MOVI(i.dest.reg().S4(), i.src1.constant()); return; + } else if ((i.src1.constant() & 0xFF'FF'00'FF) == 0) { + e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 8); + return; + } else if ((i.src1.constant() & 0xFF'00'FF'FF) == 0) { + e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 16); + return; + } else if ((i.src1.constant() & 0x00'FF'FF'FF) == 0) { + e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 24); + return; } e.MOV(W0, i.src1.constant()); e.DUP(i.dest.reg().S4(), W0); @@ -1068,8 +1080,24 @@ struct SPLAT_I32 : Sequence> { struct SPLAT_F32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { - if (i.src1.value->constant.i32 <= 0xFF) { - e.MOVI(i.dest.reg().S4(), i.src1.value->constant.i32); + oaknut::FImm8 fp8(0); + if (f32_to_fimm8(i.src1.value->constant.u32, fp8)) { + e.FMOV(i.dest.reg().S4(), fp8); + return; + } else if ((i.src1.value->constant.u32 & 0xFF'FF'FF'00) == 0) { + e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32); + return; + } else if ((i.src1.value->constant.u32 & 0xFF'FF'00'FF) == 0) { + e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL, + 8); + return; + } else if ((i.src1.value->constant.u32 & 0xFF'00'FF'FF) == 0) { + e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL, + 16); + return; + } else if ((i.src1.value->constant.u32 & 0x00'FF'FF'FF) == 0) { + e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL, + 24); return; } e.MOV(W0, i.src1.value->constant.i32); diff --git a/src/xenia/cpu/backend/a64/a64_util.h b/src/xenia/cpu/backend/a64/a64_util.h index e3a34ac00d..0b950b8ae0 100644 --- a/src/xenia/cpu/backend/a64/a64_util.h +++ b/src/xenia/cpu/backend/a64/a64_util.h @@ -17,7 +17,77 @@ namespace xe { namespace cpu { namespace backend { -namespace a64 {} // namespace a64 +namespace a64 { + +// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV +// returns false if the value cannot be represented +// C2.2.3 Modified immediate constants in A64 ing-point instructions +// abcdefgh +// V +// aBbbbbbc defgh000 00000000 00000000 +// B = NOT(b) +constexpr bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) { + const uint32_t sign = (u32 >> 31) & 1; + int32_t exp = ((u32 >> 23) & 0xff) - 127; + int64_t mantissa = u32 & 0x7fffff; + + // Too many mantissa bits + if (mantissa & 0x7ffff) { + return false; + } + // Too many exp bits + if (exp < -3 || exp > 4) { + return false; + } + + // mantissa = (16 + e:f:g:h) / 16. + mantissa >>= 19; + if ((mantissa & 0b1111) != mantissa) { + return false; + } + + // exp = (NOT(b):c:d) - 3 + exp = ((exp + 3) & 0b111) ^ 0b100; + + fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa)); + return true; +} + +// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV +// returns false if the value cannot be represented +// C2.2.3 Modified immediate constants in A64 floating-point instructions +// abcdefgh +// V +// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000 +// B = NOT(b) +constexpr bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) { + const uint32_t sign = (u64 >> 63) & 1; + int32_t exp = ((u64 >> 52) & 0x7ff) - 1023; + int64_t mantissa = u64 & 0xfffffffffffffULL; + + // Too many mantissa bits + if (mantissa & 0xffffffffffffULL) { + return false; + } + // Too many exp bits + if (exp < -3 || exp > 4) { + return false; + } + + // mantissa = (16 + e:f:g:h) / 16. + mantissa >>= 48; + if ((mantissa & 0b1111) != mantissa) { + return false; + } + + // exp = (NOT(b):c:d) - 3 + exp = ((exp + 3) & 0b111) ^ 0b100; + + fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa)); + return true; +} + +} // namespace a64 } // namespace backend } // namespace cpu } // namespace xe