From cde211c141b0ca717edcc0f4ddf04b892d9511d1 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Wed, 12 Jun 2024 14:41:34 -0700 Subject: [PATCH] [a64] Optimize `OPCODE_SPLAT` byte-constants Byte-sized constants can utilize the `MOVI` instructions. This makes many cases such as zero-splats much faster since this encodes as just a register-rename(similar to `xor` on x64). --- src/xenia/cpu/backend/a64/a64_seq_vector.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/xenia/cpu/backend/a64/a64_seq_vector.cc b/src/xenia/cpu/backend/a64/a64_seq_vector.cc index 43c420b10bd..6828437235a 100644 --- a/src/xenia/cpu/backend/a64/a64_seq_vector.cc +++ b/src/xenia/cpu/backend/a64/a64_seq_vector.cc @@ -1026,6 +1026,10 @@ EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32); struct SPLAT_I8 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { + if (i.src1.constant() <= 0xFF) { + e.MOVI(i.dest.reg().B16(), i.src1.constant()); + return; + } e.MOV(W0, i.src1.constant()); e.DUP(i.dest.reg().B16(), W0); } else { @@ -1036,6 +1040,10 @@ struct SPLAT_I8 : Sequence> { struct SPLAT_I16 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { + if (i.src1.constant() <= 0xFF) { + e.MOVI(i.dest.reg().H8(), i.src1.constant()); + return; + } e.MOV(W0, i.src1.constant()); e.DUP(i.dest.reg().H8(), W0); } else { @@ -1046,6 +1054,10 @@ struct SPLAT_I16 : Sequence> { struct SPLAT_I32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { + if (i.src1.constant() <= 0xFF) { + e.MOVI(i.dest.reg().S4(), i.src1.constant()); + return; + } e.MOV(W0, i.src1.constant()); e.DUP(i.dest.reg().S4(), W0); } else { @@ -1056,6 +1068,10 @@ struct SPLAT_I32 : Sequence> { struct SPLAT_F32 : Sequence> { static void Emit(A64Emitter& e, const EmitArgType& i) { if (i.src1.is_constant) { + if (i.src1.value->constant.i32 <= 0xFF) { + e.MOVI(i.dest.reg().S4(), i.src1.value->constant.i32); + return; + } e.MOV(W0, i.src1.value->constant.i32); e.DUP(i.dest.reg().S4(), W0); } else {