Skip to content

Commit

Permalink
[a64] Optimize OPCODE_SPLAT byte-constants
Browse files Browse the repository at this point in the history
Byte-sized constants can utilize the `MOVI` instructions. This makes
many cases such as zero-splats much faster since this encodes as just a
register-rename(similar to `xor` on x64).
  • Loading branch information
Wunkolo committed Jun 23, 2024
1 parent 3acd0a3 commit 539a03d
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions src/xenia/cpu/backend/a64/a64_seq_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,10 @@ EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32);
struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) {
if (i.src1.constant() <= 0xFF) {
e.MOVI(i.dest.reg().B16(), i.src1.constant());
return;
}
e.MOV(W0, i.src1.constant());
e.DUP(i.dest.reg().B16(), W0);
} else {
Expand All @@ -1036,6 +1040,10 @@ struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) {
if (i.src1.constant() <= 0xFF) {
e.MOVI(i.dest.reg().H8(), i.src1.constant());
return;
}
e.MOV(W0, i.src1.constant());
e.DUP(i.dest.reg().H8(), W0);
} else {
Expand All @@ -1046,6 +1054,10 @@ struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) {
if (i.src1.constant() <= 0xFF) {
e.MOVI(i.dest.reg().S4(), i.src1.constant());
return;
}
e.MOV(W0, i.src1.constant());
e.DUP(i.dest.reg().S4(), W0);
} else {
Expand All @@ -1056,6 +1068,10 @@ struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
struct SPLAT_F32 : Sequence<SPLAT_F32, I<OPCODE_SPLAT, V128Op, F32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
if (i.src1.is_constant) {
if (i.src1.value->constant.i32 <= 0xFF) {
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.i32);
return;
}
e.MOV(W0, i.src1.value->constant.i32);
e.DUP(i.dest.reg().S4(), W0);
} else {
Expand Down

0 comments on commit 539a03d

Please sign in to comment.