Skip to content

Commit

Permalink
Merge pull request #281 from SChernykh/fix-x18
Browse files Browse the repository at this point in the history
ARM64 JIT: don't use `x18` register
  • Loading branch information
SChernykh authored Oct 19, 2023
2 parents 2777910 + f72101a commit d3c9648
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 77 deletions.
54 changes: 27 additions & 27 deletions src/jit_compiler_a64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
// and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);

// and w17, w18, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);

codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd;
Expand All @@ -149,16 +149,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
}

// Update spMix2
// eor w18, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);

// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
emit32(ARMV8A::B | (offset / 4), code, codePos);

// and w18, w18, CacheLineAlignMask
// and w20, w20, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
emit32(0x121A0000 | 18 | (18 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
emit32(0x121A0000 | 20 | (20 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);

// and w10, w10, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
Expand All @@ -181,8 +181,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
// and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);

// and w17, w18, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);

codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd;
Expand All @@ -200,8 +200,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
}

// Update spMix2
// eor w18, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);

// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
Expand Down Expand Up @@ -434,7 +434,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
}
else
{
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMovImmediate(tmp_reg, imm, code, k);

// add dst, src, tmp_reg
Expand Down Expand Up @@ -483,7 +483,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
uint32_t k = codePos;

uint32_t imm = instr.getImm32();
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 19;

imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
emitAddImmediate(tmp_reg, src, imm, code, k);
Expand Down Expand Up @@ -537,7 +537,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];

constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);

// add dst, dst, tmp_reg
Expand Down Expand Up @@ -575,7 +575,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];

constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);

// sub dst, dst, tmp_reg
Expand All @@ -594,7 +594,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)

if (src == dst)
{
src = 18;
src = 20;
emitMovImmediate(src, instr.getImm32(), code, k);
}

Expand All @@ -612,7 +612,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];

constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);

// sub dst, dst, tmp_reg
Expand Down Expand Up @@ -643,7 +643,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];

constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);

// umulh dst, dst, tmp_reg
Expand Down Expand Up @@ -674,7 +674,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];

constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);

// smulh dst, dst, tmp_reg
Expand All @@ -692,7 +692,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)

uint32_t k = codePos;

constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
const uint32_t dst = IntRegMap[instr.dst];

constexpr uint64_t N = 1ULL << 63;
Expand All @@ -711,9 +711,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
literalPos -= sizeof(uint64_t);
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);

if (literal_id < 13)
if (literal_id < 12)
{
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };

// mul dst, dst, literal_reg
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
Expand Down Expand Up @@ -751,7 +751,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)

if (src == dst)
{
src = 18;
src = 20;
emitMovImmediate(src, instr.getImm32(), code, k);
}

Expand All @@ -769,7 +769,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];

constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);

// eor dst, dst, tmp_reg
Expand Down Expand Up @@ -807,7 +807,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)

if (src != dst)
{
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;

// sub tmp_reg, xzr, src
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
Expand Down Expand Up @@ -835,7 +835,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)

uint32_t k = codePos;

constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
Expand Down Expand Up @@ -984,7 +984,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)

const uint32_t src = IntRegMap[instr.src];

constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t fpcr_tmp_reg = 8;

// ror tmp_reg, src, imm
Expand All @@ -1008,7 +1008,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)

const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;

uint32_t imm = instr.getImm32();

Expand Down
Loading

0 comments on commit d3c9648

Please sign in to comment.