Skip to content

Commit

Permalink
[SOL] Fix ALU32 instructions with explicit sign extension (#116)
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasSte authored Dec 6, 2024
1 parent f602770 commit f082558
Show file tree
Hide file tree
Showing 8 changed files with 227 additions and 58 deletions.
45 changes: 35 additions & 10 deletions llvm/lib/Target/SBF/SBFISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,11 @@ SBFTargetLowering::SBFTargetLowering(const TargetMachine &TM,

if (STI.getHasAlu32()) {
setOperationAction(ISD::BSWAP, MVT::i32, Promote);
setOperationAction(ISD::BR_CC, MVT::i32, Promote);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
}

setOperationAction(ISD::CTTZ, MVT::i64, Expand);
Expand Down Expand Up @@ -763,6 +767,30 @@ SDValue SBFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
if (!getHasJmpExt())
NegateCC(LHS, RHS, CC);

bool IsSignedCmp = (CC == ISD::SETGT ||
CC == ISD::SETGE ||
CC == ISD::SETLT ||
CC == ISD::SETLE);
bool Is32Num = LHS.getValueType() == MVT::i32 ||
RHS.getValueType() == MVT::i32;

if (getHasAlu32() && Is32Num) {
if (isIntOrFPConstant(RHS) || isIntOrFPConstant(LHS)) {
// Immediate values are sign extended in SBF, so we sign extend the
// registers for a correct comparison.
LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, LHS);
RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, RHS);
} else if (IsSignedCmp) {
// If the comparison is signed, we sign extend registers
LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, LHS);
RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, RHS);
} else {
// If the comparison is unsigned, we zero extend registers
LHS = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, LHS);
RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RHS);
}
}

return DAG.getNode(SBFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
DAG.getConstant(CC, DL, MVT::i64), Dest);
}
Expand Down Expand Up @@ -941,7 +969,7 @@ SBFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
if (!isSigned) {
unsigned MovOp =
Subtarget->getHasExplicitSignExt()
? SBF::MOV_rr : SBF::MOV_32_64;
? SBF::MOV_32_64_no_sext : SBF::MOV_32_64;
Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
BuildMI(BB, DL, TII.get(MovOp), PromotedReg0).addReg(Reg);
return PromotedReg0;
Expand Down Expand Up @@ -1087,15 +1115,12 @@ SBFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
CC == ISD::SETLT ||
CC == ISD::SETLE);

// eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
// to be promoted, however if the 32-bit comparison operands are destination
// registers then they are implicitly zero-extended already, there is no
// need of explicit zero-extend sequence for them.
//
// We simply do extension for all situations in this method, but we will
// try to remove those unnecessary in SBFMIPeephole pass.
// SBF at the moment only has 64-bit comparison. Any 32-bit comparison needs
// to be promoted. If we are comparing against an immediate value, we must
// sign extend the registers. Likewise for signed comparisons. Unsigned
// comparisons will zero extent registers.
if (is32BitCmp)
LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp || !isSelectRROp);

if (isSelectRROp) {
Register RHS = MI.getOperand(2).getReg();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/SBF/SBFInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,12 @@ let isCodeGenOnly = 1 in {
def MOV_32_64_addr : MATH_RI<SBF_ALU, SBF_MOV,
(outs GPR:$dst), (ins u64imm:$imm),
"mov32 $dst, $imm", []>, Requires<[SBFNoLddw]>;

def MOV_32_64_no_sext : MATH_RR<SBF_ALU64, SBF_MOV,
(outs GPR:$dst),
(ins GPR32:$src),
"mov64 $dst, $src",
[]>;
}

let DecoderNamespace = "SBFv2", Predicates = [SBFNoLddw] in {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/SBF/SBFSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ SBFSubtarget &SBFSubtarget::initializeSubtargetDependencies(const Triple &TT,
void SBFSubtarget::initializeEnvironment(const Triple &TT) {
assert(TT.getArch() == Triple::sbf && "expected Triple::sbf");
HasJmpExt = false;
HasAlu32 = false;
UseDwarfRIS = false;

// SBFv2 features
Expand All @@ -49,6 +48,8 @@ void SBFSubtarget::initializeEnvironment(const Triple &TT) {
HasPqrClass = false;
NewCallConvention = false;
HasStoreImm = false;
HasAlu32 = false;
HasExplicitSignExt = false;
}

void SBFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/SBF/SBFTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,12 @@ void SBFPassConfig::addMachineSSAOptimization() {
// Peephole ran at last.
TargetPassConfig::addMachineSSAOptimization();

const SBFSubtarget *Subtarget = getSBFTargetMachine().getSubtargetImpl();
// const SBFSubtarget *Subtarget = getSBFTargetMachine().getSubtargetImpl();
if (!DisableMIPeephole) {
if (Subtarget->getHasAlu32())
addPass(createSBFMIPeepholePass());
// TODO: The peephole doesn't work with explicit sign extension. A future PR
// will revamp the implementation.
// if (Subtarget->getHasAlu32())
// addPass(createSBFMIPeepholePass());
addPass(createSBFMIPeepholeTruncElimPass());
}
}
Expand Down
16 changes: 12 additions & 4 deletions llvm/test/CodeGen/SBF/32-bit-subreg-cond-select.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -O2 -march=sbf -mattr=+alu32 < %s | FileCheck %s
; RUN: llc -O2 -march=sbf -mattr=+alu32,+explicit-sext -verify-machineinstrs < %s | FileCheck %s
;
; unsigned int select_cc_32 (unsigned a, unsigned b, int c, int d)
; {
Expand Down Expand Up @@ -56,19 +56,23 @@ entry:
ret i32 %c.d
}
; CHECK-LABEL: select_cc_32
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: mov64 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: mov64 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: jgt r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
; CHECK-NOT: rsh64 r{{[0-9]+}}, 32

; Function Attrs: norecurse nounwind readnone
define dso_local i64 @select_cc_32_64(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 {
entry:
%cmp = icmp ugt i32 %a, %b
%cmp = icmp sgt i32 %a, %b
%c.d = select i1 %cmp, i64 %c, i64 %d
ret i64 %c.d
}
; CHECK-LABEL: select_cc_32_64
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: jsgt r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
; CHECK-NOT: rsh64 r{{[0-9]+}}, 32

Expand All @@ -80,6 +84,7 @@ entry:
ret i32 %c.d
}
; CHECK-LABEL: select_cc_64_32
; CHECK: jsgt r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32

; Function Attrs: norecurse nounwind readnone
Expand All @@ -91,18 +96,20 @@ entry:
}
; CHECK-LABEL: selecti_cc_32
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: jgt r{{[0-9]+}}, 10
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
; CHECK-NOT: rsh64 r{{[0-9]+}}, 32

; Function Attrs: norecurse nounwind readnone
define dso_local i64 @selecti_cc_32_64(i32 %a, i64 %c, i64 %d) local_unnamed_addr #0 {
entry:
%cmp = icmp ugt i32 %a, 11
%cmp = icmp sgt i32 %a, 11
%c.d = select i1 %cmp, i64 %c, i64 %d
ret i64 %c.d
}
; CHECK-LABEL: selecti_cc_32_64
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: jsgt r{{[0-9]+}}, 11,
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
; CHECK-NOT: rsh64 r{{[0-9]+}}, 32

Expand All @@ -114,4 +121,5 @@ entry:
ret i32 %c.d
}
; CHECK-LABEL: selecti_cc_64_32
; CHECK: jsgt r{{[0-9]+}}, 12
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
76 changes: 37 additions & 39 deletions llvm/test/CodeGen/SBF/atomics_sbf.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=sbf -mcpu=v3 -verify-machineinstrs | tee -i /tmp/log | FileCheck %s
; RUN: llc < %s -march=sbf -mattr=+alu32,+explicit-sext -verify-machineinstrs | tee -i /tmp/log | FileCheck %s
;
; CHECK-LABEL: test_load_add_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: add32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_add_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand All @@ -25,7 +25,7 @@ entry:

; CHECK-LABEL: test_load_sub_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: sub32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand Down Expand Up @@ -67,8 +67,10 @@ entry:

; CHECK-LABEL: test_cas_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: jeq r0, r2,
; CHECK: mov32 w3, w0
; CHECK: mov64 r4, w0
; CHECK: mov64 r2, w2
; CHECK: jeq r4, r2,
; CHECK: mov64 w3, w0
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr {
entry:
Expand All @@ -91,7 +93,7 @@ entry:

; CHECK-LABEL: test_load_and_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: and32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_and_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand All @@ -113,7 +115,7 @@ entry:

; CHECK-LABEL: test_load_nand_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: and32 w3, w2
; CHECK: xor32 w3, -1
; CHECK: stxw [r1 + 0], w3
Expand All @@ -137,7 +139,7 @@ entry:

; CHECK-LABEL: test_load_or_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: or32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_or_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand All @@ -159,7 +161,7 @@ entry:

; CHECK-LABEL: test_load_xor_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: xor32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_xor_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand All @@ -181,15 +183,11 @@ entry:

; CHECK-LABEL: test_min_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov64 r4, r0
; CHECK: lsh64 r4, 32
; CHECK: arsh64 r4, 32
; CHECK: mov32 r5, w2
; CHECK: lsh64 r5, 32
; CHECK: arsh64 r5, 32
; CHECK: mov32 w3, w0
; CHECK: jslt r4, r5, LBB16_2
; CHECK: mov32 w3, w2
; CHECK: mov32 r4, w2
; CHECK: mov32 r5, w0
; CHECK: mov64 w3, w0
; CHECK: jsgt r4, r5, LBB16_2
; CHECK: mov64 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_min_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 {
entry:
Expand All @@ -200,7 +198,7 @@ entry:
; CHECK-LABEL: test_min_64
; CHECK: ldxdw r0, [r1 + 0]
; CHECK: mov64 r3, r0
; CHECK: jslt r0, r2,
; CHECK: jsgt r2, r0,
; CHECK: mov64 r3, r2
; CHECK: stxdw [r1 + 0], r3
define dso_local i64 @test_min_64(i64* nocapture %ptr, i64 %v) local_unnamed_addr #0 {
Expand All @@ -211,15 +209,11 @@ entry:

; CHECK-LABEL: test_max_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov64 r4, r0
; CHECK: lsh64 r4, 32
; CHECK: arsh64 r4, 32
; CHECK: mov32 r4, w0
; CHECK: mov32 r5, w2
; CHECK: lsh64 r5, 32
; CHECK: arsh64 r5, 32
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: jsgt r4, r5, LBB18_2
; CHECK: mov32 w3, w2
; CHECK: mov64 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_max_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 {
entry:
Expand All @@ -241,10 +235,11 @@ entry:

; CHECK-LABEL: test_umin_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 r4, w2
; CHECK: mov32 w3, w0
; CHECK: jlt r0, r4,
; CHECK: mov32 w3, w2
; CHECK: mov64 r4, w2
; CHECK: mov64 r5, w0
; CHECK: mov64 w3, w0
; CHECK: jgt r4, r5,
; CHECK: mov64 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_umin_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 {
entry:
Expand All @@ -255,7 +250,7 @@ entry:
; CHECK-LABEL: test_umin_64
; CHECK: ldxdw r0, [r1 + 0]
; CHECK: mov64 r3, r0
; CHECK: jlt r0, r2,
; CHECK: jgt r2, r0,
; CHECK: mov64 r3, r2
; CHECK: stxdw [r1 + 0], r3
define dso_local i64 @test_umin_64(i64* nocapture %ptr, i64 %v) local_unnamed_addr #0 {
Expand All @@ -266,10 +261,11 @@ entry:

; CHECK-LABEL: test_umax_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 r4, w2
; CHECK: mov32 w3, w0
; CHECK: jgt r0, r4,
; CHECK: mov32 w3, w2
; CHECK: mov64 r4, w0
; CHECK: mov64 r5, w2
; CHECK: mov64 w3, w0
; CHECK: jgt r4, r5
; CHECK: mov64 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_umax_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 {
entry:
Expand Down Expand Up @@ -305,8 +301,9 @@ entry:
; CHECK-LABEL: test_load_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w2, 0
; CHECK: jeq r0, 0, LBB25_2
; CHECK: mov32 w2, w0
; CHECK: mov32 r3, w0
; CHECK: jeq r3, 0, LBB25_2
; CHECK: mov64 w2, w0
; CHECK: LBB25_2:
; CHECK: stxw [r1 + 0], w2
define dso_local i32 @test_load_32(ptr nocapture %p) local_unnamed_addr {
Expand All @@ -333,8 +330,9 @@ entry:

; CHECK-LABEL: test_weak_cas_32
; CHECK: ldxw w4, [r1 + 0]
; CHECK: mov32 r2, w2
; CHECK: jeq r4, r2,
; CHECK: mov64 r5, w4
; CHECK: mov64 r2, w2
; CHECK: jeq r5, r2,
; CHECK: stxw [r1 + 0], w3
define dso_local void @test_weak_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr {
entry:
Expand Down
Loading

0 comments on commit f082558

Please sign in to comment.