From c6fcd524bb7716aa2e708c4e75ab6286ce7451fb Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 13 Dec 2024 18:35:58 -0300 Subject: [PATCH] Revamp peephole pass --- llvm/lib/Target/SBF/SBFMIPeephole.cpp | 191 +----------------- llvm/lib/Target/SBF/SBFTargetMachine.cpp | 8 +- .../CodeGen/SBF/32-bit-subreg-cond-select.ll | 4 +- llvm/test/CodeGen/SBF/atomics_sbf.ll | 24 +-- .../test/CodeGen/SBF/peephole-explict-sext.ll | 12 ++ 5 files changed, 37 insertions(+), 202 deletions(-) create mode 100644 llvm/test/CodeGen/SBF/peephole-explict-sext.ll diff --git a/llvm/lib/Target/SBF/SBFMIPeephole.cpp b/llvm/lib/Target/SBF/SBFMIPeephole.cpp index e88bad1e46c396..0a84933ebababd 100644 --- a/llvm/lib/Target/SBF/SBFMIPeephole.cpp +++ b/llvm/lib/Target/SBF/SBFMIPeephole.cpp @@ -51,12 +51,6 @@ struct SBFMIPeephole : public MachineFunctionPass { private: // Initialize class variables. void initialize(MachineFunction &MFParm); - - bool isCopyFrom32Def(MachineInstr *CopyMI); - bool isInsnFrom32Def(MachineInstr *DefInsn); - bool isPhiFrom32Def(MachineInstr *MovMI); - bool isMovFrom32Def(MachineInstr *MovMI); - bool eliminateZExtSeq(); bool eliminateZExt(); std::set PhiInsns; @@ -70,12 +64,7 @@ struct SBFMIPeephole : public MachineFunctionPass { initialize(MF); - // First try to eliminate (zext, lshift, rshift) and then - // try to eliminate zext. - bool ZExtSeqExist, ZExtExist; - ZExtSeqExist = eliminateZExtSeq(); - ZExtExist = eliminateZExt(); - return ZExtSeqExist || ZExtExist; + return eliminateZExt(); } }; @@ -87,159 +76,6 @@ void SBFMIPeephole::initialize(MachineFunction &MFParm) { LLVM_DEBUG(dbgs() << "*** SBF MachineSSA ZEXT Elim peephole pass ***\n\n"); } -bool SBFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI) -{ - MachineOperand &opnd = CopyMI->getOperand(1); - - if (!opnd.isReg()) - return false; - - // Return false if getting value from a 32bit physical register. - // Most likely, this physical register is aliased to - // function call return value or current function parameters. - Register Reg = opnd.getReg(); - if (!Reg.isVirtual()) - return false; - - if (MRI->getRegClass(Reg) == &SBF::GPRRegClass) - return false; - - MachineInstr *DefInsn = MRI->getVRegDef(Reg); - if (!isInsnFrom32Def(DefInsn)) - return false; - - return true; -} - -bool SBFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI) -{ - for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) { - MachineOperand &opnd = PhiMI->getOperand(i); - - if (!opnd.isReg()) - return false; - - MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); - if (!PhiDef) - return false; - if (PhiDef->isPHI()) { - if (PhiInsns.find(PhiDef) != PhiInsns.end()) - return false; - PhiInsns.insert(PhiDef); - if (!isPhiFrom32Def(PhiDef)) - return false; - } - if (PhiDef->getOpcode() == SBF::COPY && !isCopyFrom32Def(PhiDef)) - return false; - } - - return true; -} - -// The \p DefInsn instruction defines a virtual register. -bool SBFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn) -{ - if (!DefInsn) - return false; - - if (DefInsn->isPHI()) { - if (PhiInsns.find(DefInsn) != PhiInsns.end()) - return false; - PhiInsns.insert(DefInsn); - if (!isPhiFrom32Def(DefInsn)) - return false; - } else if (DefInsn->getOpcode() == SBF::COPY) { - if (!isCopyFrom32Def(DefInsn)) - return false; - } - - return true; -} - -bool SBFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) -{ - MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg()); - - LLVM_DEBUG(dbgs() << " Def of Mov Src:"); - LLVM_DEBUG(DefInsn->dump()); - - PhiInsns.clear(); - if (!isInsnFrom32Def(DefInsn)) - return false; - - LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n"); - - return true; -} - -bool SBFMIPeephole::eliminateZExtSeq() { - MachineInstr* ToErase = nullptr; - bool Eliminated = false; - - for (MachineBasicBlock &MBB : *MF) { - for (MachineInstr &MI : MBB) { - // If the previous instruction was marked for elimination, remove it now. - if (ToErase) { - ToErase->eraseFromParent(); - ToErase = nullptr; - } - - // Eliminate the 32-bit to 64-bit zero extension sequence when possible. - // - // MOV_32_64 rB, wA - // SLL_ri rB, rB, 32 - // SRL_ri rB, rB, 32 - if (MI.getOpcode() == SBF::SRL_ri && - MI.getOperand(2).getImm() == 32) { - Register DstReg = MI.getOperand(0).getReg(); - Register ShfReg = MI.getOperand(1).getReg(); - MachineInstr *SllMI = MRI->getVRegDef(ShfReg); - - LLVM_DEBUG(dbgs() << "Starting SRL found:"); - LLVM_DEBUG(MI.dump()); - - if (!SllMI || - SllMI->isPHI() || - SllMI->getOpcode() != SBF::SLL_ri || - SllMI->getOperand(2).getImm() != 32) - continue; - - LLVM_DEBUG(dbgs() << " SLL found:"); - LLVM_DEBUG(SllMI->dump()); - - MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg()); - if (!MovMI || - MovMI->isPHI() || - MovMI->getOpcode() != SBF::MOV_32_64) - continue; - - LLVM_DEBUG(dbgs() << " Type cast Mov found:"); - LLVM_DEBUG(MovMI->dump()); - - Register SubReg = MovMI->getOperand(1).getReg(); - if (!isMovFrom32Def(MovMI)) { - LLVM_DEBUG(dbgs() - << " One ZExt elim sequence failed qualifying elim.\n"); - continue; - } - - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(SBF::SUBREG_TO_REG), DstReg) - .addImm(0).addReg(SubReg).addImm(SBF::sub_32); - - SllMI->eraseFromParent(); - MovMI->eraseFromParent(); - // MI is the right shift, we can't erase it in it's own iteration. - // Mark it to ToErase, and erase in the next iteration. - ToErase = &MI; - ZExtElemNum++; - Eliminated = true; - } - } - } - - return Eliminated; -} - bool SBFMIPeephole::eliminateZExt() { MachineInstr* ToErase = nullptr; bool Eliminated = false; @@ -252,23 +88,15 @@ bool SBFMIPeephole::eliminateZExt() { ToErase = nullptr; } - if (MI.getOpcode() != SBF::MOV_32_64) + if (MI.getOpcode() != SBF::MOV_32_64_no_sext) continue; // Eliminate MOV_32_64 if possible. // MOV_32_64 rA, wB - // - // If wB has been zero extended, replace it with a SUBREG_TO_REG. - // This is to workaround SBF programs where pkt->{data, data_end} - // is encoded as u32, but actually the verifier populates them - // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits. - LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:"); + LLVM_DEBUG(dbgs() << "Candidate MOV_32_64_no_sext instruction:"); LLVM_DEBUG(MI.dump()); - if (!isMovFrom32Def(&MI)) - continue; - - LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n"); + LLVM_DEBUG(dbgs() << "Removing the MOV_32_64_no_sext instruction\n"); Register dst = MI.getOperand(0).getReg(); Register src = MI.getOperand(1).getReg(); @@ -351,14 +179,11 @@ bool SBFMIPreEmitPeephole::eliminateRedundantMov() { // Eliminate identical move: // // MOV rA, rA - // - // Note that we cannot remove - // MOV_32_64 rA, wA - // MOV_rr_32 wA, wA - // as these two instructions having side effects, zeroing out - // top 32 bits of rA. + // MOV wA, wA unsigned Opcode = MI.getOpcode(); - if (Opcode == SBF::MOV_rr) { + if (Opcode == SBF::MOV_rr || + Opcode == SBF::MOV_rr_32_no_sext_v2 || + Opcode == SBF::MOV_32_64_no_sext) { Register dst = MI.getOperand(0).getReg(); Register src = MI.getOperand(1).getReg(); diff --git a/llvm/lib/Target/SBF/SBFTargetMachine.cpp b/llvm/lib/Target/SBF/SBFTargetMachine.cpp index ced3cd528d8957..3c45f983231484 100644 --- a/llvm/lib/Target/SBF/SBFTargetMachine.cpp +++ b/llvm/lib/Target/SBF/SBFTargetMachine.cpp @@ -158,12 +158,10 @@ void SBFPassConfig::addMachineSSAOptimization() { // Peephole ran at last. TargetPassConfig::addMachineSSAOptimization(); -// const SBFSubtarget *Subtarget = getSBFTargetMachine().getSubtargetImpl(); + const SBFSubtarget *Subtarget = getSBFTargetMachine().getSubtargetImpl(); if (!DisableMIPeephole) { -// TODO: The peephole doesn't work with explicit sign extension. A future PR -// will revamp the implementation. -// if (Subtarget->getHasAlu32()) -// addPass(createSBFMIPeepholePass()); + if (Subtarget->getHasAlu32() && Subtarget->getHasExplicitSignExt()) + addPass(createSBFMIPeepholePass()); addPass(createSBFMIPeepholeTruncElimPass()); } } diff --git a/llvm/test/CodeGen/SBF/32-bit-subreg-cond-select.ll b/llvm/test/CodeGen/SBF/32-bit-subreg-cond-select.ll index af88048ccf37f7..7cf5b774631dd2 100644 --- a/llvm/test/CodeGen/SBF/32-bit-subreg-cond-select.ll +++ b/llvm/test/CodeGen/SBF/32-bit-subreg-cond-select.ll @@ -56,8 +56,8 @@ entry: ret i32 %c.d } ; CHECK-LABEL: select_cc_32 -; CHECK: mov64 r{{[0-9]+}}, w{{[0-9]+}} -; CHECK: mov64 r{{[0-9]+}}, w{{[0-9]+}} +; CHECK-NOT: mov64 r{{[0-9]+}}, w{{[0-9]+}} +; CHECK-NOT: mov64 r{{[0-9]+}}, w{{[0-9]+}} ; CHECK: jgt r{{[0-9]+}}, r{{[0-9]+}} ; CHECK-NOT: lsh64 r{{[0-9]+}}, 32 ; CHECK-NOT: rsh64 r{{[0-9]+}}, 32 diff --git a/llvm/test/CodeGen/SBF/atomics_sbf.ll b/llvm/test/CodeGen/SBF/atomics_sbf.ll index 7c1b599e63b78c..f881b1c51f8824 100644 --- a/llvm/test/CodeGen/SBF/atomics_sbf.ll +++ b/llvm/test/CodeGen/SBF/atomics_sbf.ll @@ -67,9 +67,9 @@ entry: ; CHECK-LABEL: test_cas_32 ; CHECK: ldxw w0, [r1 + 0] -; CHECK: mov64 r4, w0 -; CHECK: mov64 r2, w2 -; CHECK: jeq r4, r2, +; CHECK-NOT: mov64 r4, w0 +; CHECK-NOT: mov64 r2, w2 +; CHECK: jeq r0, r2, ; CHECK: mov64 w3, w0 ; CHECK: stxw [r1 + 0], w3 define dso_local i32 @test_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr { @@ -235,10 +235,10 @@ entry: ; CHECK-LABEL: test_umin_32 ; CHECK: ldxw w0, [r1 + 0] -; CHECK: mov64 r4, w2 -; CHECK: mov64 r5, w0 +; CHECK-NOT: mov64 r4, w2 +; CHECK-NOT: mov64 r5, w0 ; CHECK: mov64 w3, w0 -; CHECK: jgt r4, r5, +; CHECK: jgt r2, r0, ; CHECK: mov64 w3, w2 ; CHECK: stxw [r1 + 0], w3 define dso_local i32 @test_umin_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 { @@ -261,10 +261,10 @@ entry: ; CHECK-LABEL: test_umax_32 ; CHECK: ldxw w0, [r1 + 0] -; CHECK: mov64 r4, w0 -; CHECK: mov64 r5, w2 +; CHECK-NOT: mov64 r4, w0 +; CHECK-NOT: mov64 r4, w2 ; CHECK: mov64 w3, w0 -; CHECK: jgt r4, r5 +; CHECK: jgt r0, r2 ; CHECK: mov64 w3, w2 ; CHECK: stxw [r1 + 0], w3 define dso_local i32 @test_umax_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 { @@ -330,9 +330,9 @@ entry: ; CHECK-LABEL: test_weak_cas_32 ; CHECK: ldxw w4, [r1 + 0] -; CHECK: mov64 r5, w4 -; CHECK: mov64 r2, w2 -; CHECK: jeq r5, r2, +; CHECK-NOT: mov64 r5, w4 +; CHECK-NOT: mov64 r2, w2 +; CHECK: jeq r4, r2, ; CHECK: stxw [r1 + 0], w3 define dso_local void @test_weak_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr { entry: diff --git a/llvm/test/CodeGen/SBF/peephole-explict-sext.ll b/llvm/test/CodeGen/SBF/peephole-explict-sext.ll new file mode 100644 index 00000000000000..452c4cf86eac98 --- /dev/null +++ b/llvm/test/CodeGen/SBF/peephole-explict-sext.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=sbf -mattr=+alu32,+explicit-sext | FileCheck %s + +define dso_local i32 @select_cc_32(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: +; CHECK-LABEL: select_cc_32 + %cmp = icmp ugt i32 %a, %b +; CHECK-NOT: mov64 r{{[0-9]+}}, w1 +; CHECK-NOT: mov64 r{{[0-9]+}}, w2 +; CHECK: jgt r1, r2, + %c.d = select i1 %cmp, i32 %c, i32 %d + ret i32 %c.d +}