Skip to content

Commit

Permalink
Update the JIT to support rewriting more complex intrinsics as user c…
Browse files Browse the repository at this point in the history
…alls (#102702)

* Update the JIT to support rewriting more complex intrinsics as user calls

* Updating the shuffle hwintrinsic to always be imported as an intrinsic

* Ensure multi-reg returns are initialized for rewritten hwintrinsics

* Apply suggestions from code review

Co-authored-by: Jakob Botsch Nielsen <[email protected]>

* Adding function headers to SetMethodHandle and SetEntryPoint

* Apply formatting patch

---------

Co-authored-by: Jakob Botsch Nielsen <[email protected]>
  • Loading branch information
tannergooding and jakobbotsch authored May 29, 2024
1 parent eaa10eb commit ec5963d
Show file tree
Hide file tree
Showing 9 changed files with 418 additions and 100 deletions.
83 changes: 79 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4128,6 +4128,8 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp)
// first tree to be evaluated, and "lvl2" - the second.
if (multiOp->IsReverseOp())
{
assert(!multiOp->AsHWIntrinsic()->IsUserCall());

level = gtSetEvalOrder(multiOp->Op(2));
lvl2 = gtSetEvalOrder(multiOp->Op(1));
}
Expand All @@ -4140,11 +4142,18 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp)
// We want the more complex tree to be evaluated first.
if (level < lvl2)
{
bool canSwap = multiOp->IsReverseOp() ? gtCanSwapOrder(multiOp->Op(2), multiOp->Op(1))
: gtCanSwapOrder(multiOp->Op(1), multiOp->Op(2));
bool canSwap = false;

if (!multiOp->AsHWIntrinsic()->IsUserCall())
{
canSwap = multiOp->IsReverseOp() ? gtCanSwapOrder(multiOp->Op(2), multiOp->Op(1))
: gtCanSwapOrder(multiOp->Op(1), multiOp->Op(2));
}

if (canSwap)
{
assert(!multiOp->AsHWIntrinsic()->IsUserCall());

if (multiOp->IsReverseOp())
{
multiOp->ClearReverseOp();
Expand Down Expand Up @@ -6563,7 +6572,7 @@ bool GenTree::OperSupportsReverseOpEvalOrder(Compiler* comp) const
#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)
if (OperIsMultiOp())
{
return AsMultiOp()->GetOperandCount() == 2;
return (AsMultiOp()->GetOperandCount() == 2) && !AsMultiOp()->IsUserCall();
}
#endif // FEATURE_SIMD || FEATURE_HW_INTRINSICS
return false;
Expand Down Expand Up @@ -9711,6 +9720,11 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree)
tree->AsHWIntrinsic()->GetHWIntrinsicId(),
tree->AsHWIntrinsic()->GetSimdBaseJitType(), tree->AsHWIntrinsic()->GetSimdSize());
copy->AsHWIntrinsic()->SetAuxiliaryJitType(tree->AsHWIntrinsic()->GetAuxiliaryJitType());

if (tree->AsHWIntrinsic()->IsUserCall())
{
copy->AsHWIntrinsic()->SetMethodHandle(this, tree->AsHWIntrinsic()->GetMethodHandle());
}
goto CLONE_MULTIOP_OPERANDS;
#endif
#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)
Expand Down Expand Up @@ -19570,6 +19584,67 @@ void GenTreeMultiOp::InitializeOperands(GenTree** operands, size_t operandCount)
SetOperandCount(operandCount);
}

//------------------------------------------------------------------------
// GenTreeJitIntrinsic::SetMethodHandle: Sets the method handle for an intrinsic
// so that it can be rewritten back to a user call in a later phase
//
// Arguments:
// comp - The compiler instance
// methodHandle - The method handle representing the fallback handling for the intrinsic
//
// Notes:
// We need to ensure that the operands are not tracked inline so that we can track the
// underlying method handle. See the comment in GenTreeJitIntrinsic around why the union
// of fields exists.
//
void GenTreeJitIntrinsic::SetMethodHandle(Compiler* comp, CORINFO_METHOD_HANDLE methodHandle)
{
assert(OperIsHWIntrinsic() && !IsUserCall());
gtFlags |= GTF_HW_USER_CALL;

size_t operandCount = GetOperandCount();

if ((operandCount != 0) && (operandCount <= ArrLen(gtInlineOperands)))
{
GenTree** oldOperands = GetOperandArray();
GenTree** newOperands = comp->getAllocator(CMK_ASTNode).allocate<GenTree*>(operandCount);

ResetOperandArray(operandCount, comp, newOperands, operandCount);
assert(GetOperandArray() == newOperands);

for (size_t i = 0; i < operandCount; i++)
{
newOperands[i] = oldOperands[i];
}
}

gtMethodHandle = methodHandle;
gtEntryPoint = nullptr;
}

#if defined(FEATURE_READYTORUN)
//------------------------------------------------------------------------
// GenTreeJitIntrinsic::SetEntryPoint: Sets the entry point for an intrinsic
// so that it can be rewritten back to a user call in a later phase for R2R
// scenarios
//
// Arguments:
// comp - The compiler instance
// entryPoint - The entry point information required for R2R scenarios
//
// Notes:
// This requires SetMethodHandle to have been called first to ensure we aren't
// overwriting any inline operands
//
void GenTreeJitIntrinsic::SetEntryPoint(Compiler* comp, CORINFO_CONST_LOOKUP entryPoint)
{
assert(IsUserCall());
assert(gtEntryPoint == nullptr);

gtEntryPoint = new (comp, CMK_ASTNode) CORINFO_CONST_LOOKUP(entryPoint);
}
#endif // FEATURE_READYTORUN

var_types GenTreeJitIntrinsic::GetAuxiliaryType() const
{
CorInfoType auxiliaryJitType = GetAuxiliaryJitType();
Expand Down Expand Up @@ -27040,7 +27115,7 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const
}
}

return false;
return IsUserCall();
}

//------------------------------------------------------------------------------
Expand Down
52 changes: 51 additions & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ enum GenTreeFlags : unsigned int

#ifdef FEATURE_HW_INTRINSICS
GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask
GTF_HW_USER_CALL = 0x20000000, // GT_HWINTRINSIC -- node is implemented via a user call
#endif // FEATURE_HW_INTRINSICS
};

Expand Down Expand Up @@ -6089,6 +6090,15 @@ struct GenTreeMultiOp : public GenTree
}
#endif

bool IsUserCall() const
{
#if defined(FEATURE_HW_INTRINSICS)
return OperIs(GT_HWINTRINSIC) && (gtFlags & GTF_HW_USER_CALL) != 0;
#else
return false;
#endif
}

GenTree*& Op(size_t index)
{
size_t actualIndex = index - 1;
Expand Down Expand Up @@ -6217,7 +6227,29 @@ class IntrinsicNodeBuilder final
struct GenTreeJitIntrinsic : public GenTreeMultiOp
{
protected:
GenTree* gtInlineOperands[2];
union
{
// We don't have enough space to carry both the inline operands
// and the necessary information required to support rewriting
// the intrinsic back into a user call. As such, we union the
// data instead and use the GTF_HW_USER_CALL flag to indicate
// which fields are valid to access. -- Tracking the fields
// independently causes TREE_NODE_SZ_LARGE to increase and for
// GenTreeJitIntrinsic to become the largest node, which is
// undesirable, so this approach helps keep things pay-for-play.

GenTree* gtInlineOperands[2];

struct
{
CORINFO_METHOD_HANDLE gtMethodHandle;

#if defined(FEATURE_READYTORUN)
// Call target lookup info for method call from a Ready To Run module
CORINFO_CONST_LOOKUP* gtEntryPoint;
#endif // FEATURE_READYTORUN
};
};
regNumberSmall gtOtherReg; // The second register for multi-reg intrinsics.
MultiRegSpillFlags gtSpillFlags; // Spill flags for multi-reg intrinsics.
unsigned char gtAuxiliaryJitType; // For intrinsics than need another type (e.g. Avx2.Gather* or SIMD (by element))
Expand All @@ -6226,6 +6258,24 @@ struct GenTreeJitIntrinsic : public GenTreeMultiOp
NamedIntrinsic gtHWIntrinsicId;

public:
CORINFO_METHOD_HANDLE GetMethodHandle() const
{
assert(IsUserCall());
return gtMethodHandle;
}

void SetMethodHandle(Compiler* comp, CORINFO_METHOD_HANDLE methodHandle);

#if defined(FEATURE_READYTORUN)
CORINFO_CONST_LOOKUP GetEntryPoint() const
{
assert(IsUserCall());
return *gtEntryPoint;
}

void SetEntryPoint(Compiler* comp, CORINFO_CONST_LOOKUP entryPoint);
#endif // FEATURE_READYTORUN

//-----------------------------------------------------------
// GetRegNumByIdx: Get regNumber of i'th position.
//
Expand Down
9 changes: 8 additions & 1 deletion src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1869,7 +1869,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

if (!indices->IsVectorConst())
{
// TODO-ARM64-CQ: Handling non-constant indices is a bit more complex
assert(sig->numArgs == 2);

op2 = impSIMDPopStack();
op1 = impSIMDPopStack();

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);

retNode->AsHWIntrinsic()->SetMethodHandle(this, method);
break;
}

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ HARDWARE_INTRINSIC(Vector64, op_UnsignedRightShift,
HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, ShiftRightArithmetic, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, ShiftRightLogical, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, Store, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
Expand Down Expand Up @@ -220,7 +220,7 @@ HARDWARE_INTRINSIC(Vector128, op_UnaryPlus,
HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
Expand Down
Loading

0 comments on commit ec5963d

Please sign in to comment.