From 03e838287d32df8256036760b4f7e296144934db Mon Sep 17 00:00:00 2001 From: Micah Snyder Date: Fri, 2 Aug 2024 20:11:11 -0400 Subject: [PATCH] Remove unused opt passes Remove ClamBCChangeMallocArgSize and ClamBCRemoveUndefs. Also removed a commented out call to clambc-remove-pointer-phis. This pass is performed later on. --- clambcc/clambc-compiler.py | 43 ---- libclambcc/ClamBCChangeMallocArgSize.cpp | 177 --------------- libclambcc/ClamBCRemoveUndefs.cpp | 272 ----------------------- 3 files changed, 492 deletions(-) delete mode 100644 libclambcc/ClamBCChangeMallocArgSize.cpp delete mode 100644 libclambcc/ClamBCRemoveUndefs.cpp diff --git a/clambcc/clambc-compiler.py b/clambcc/clambc-compiler.py index c7348eca57..27758e5297 100755 --- a/clambcc/clambc-compiler.py +++ b/clambcc/clambc-compiler.py @@ -549,16 +549,6 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: "function(mem2reg)", 'verify', - # Remove 'undef' values. - # - # The optimizer in llvm-8 was replacing unused parameters with 'undef' values in the IR. - # This was causing issues in the writer, not knowing what value to put in the signature. - # The llvm-16 optimizer no longer does this. - # - # TODO: This pass may be removed in a future release. - # 'clambc-remove-undefs', - # 'verify', - # Prevent undefined or poison values from being inserted into the function calls by the 'O3' pass. # This pass should probably be renamed, since the ABI is not really changed. # Undefined values cause issues with the ClamBCWriter, and with ClamAV's runtime. @@ -578,15 +568,6 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: 'clambc-preserve-abis' , 'verify', - # This pass removes pointer phis, and replaces them with an index calculation to get the same offset. - # - # We will call this later, instead of here. - # - # Note: This is only needed for 0.103 on Windows where we're using an older vendored version of LLVM for the runtime. - # This can be removed when 0.103 support is no longer required. - # 'clambc-remove-pointer-phis', - # 'verify', - # Remove calls to smin intrinsics. # Smin intrinsics are not supported by the ClamAV runtime, so this pass creates it's own smin functions, and replaces # calls to intrinsics with calls to the newly created functions. @@ -700,13 +681,6 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: 'clambc-outline-endianness-calls', 'verify', - # Change malloc argument size. - # - # TODO: This was added because the legacy llvm runtime had issues with 32-bit phi nodes being used in calls to malloc. - # It appears that this is no longer necessary. - # 'clambc-change-malloc-arg-size', - # 'verify', - # Extends all integer phi nodes to use 64-bit values. # # TODO: I don't remember what the reason was that I needed to add this. It should be re-evaluated for the next release. @@ -742,15 +716,6 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: OPTIMIZE_LOADS=[ f"--load {SHARED_OBJ_DIR}/libClamBCCommon.so", - # libClamBCRemoveUndefs.so is no longer being run. - # - # This was added because the optimizer in llvm-8 was replacing unused parameters with 'undef' values in the IR. - # This was causing issues in the writer, not knowing what value to put in the signature. - # The llvm-16 optimizer no longer does this. - # - # TODO: This pass may be removed in a future release. - # f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveUndefs.so", - f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCPreserveABIs.so", f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveUnsupportedICMPIntrinsics.so", f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveUSUB.so", @@ -775,14 +740,6 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRebuild.so", f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCTrace.so", f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCOutlineEndiannessCalls.so", - - # libClamBCChangeMallocArgSize.so is no longer being run. - # At one time, the legacy llvm runtime had issues with 32-bit phi nodes being used in calls to malloc. - # It appears as though this change is no longer necessary. - # - # TODO: May be removed in the future. - # f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCChangeMallocArgSize.so", - f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCExtendPHIsTo64Bit.so", f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCConvertIntrinsicsTo32Bit.so", f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCPrepareGEPsForWriter.so", diff --git a/libclambcc/ClamBCChangeMallocArgSize.cpp b/libclambcc/ClamBCChangeMallocArgSize.cpp deleted file mode 100644 index c8e9da5972..0000000000 --- a/libclambcc/ClamBCChangeMallocArgSize.cpp +++ /dev/null @@ -1,177 +0,0 @@ -#include "clambc.h" - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -using namespace llvm; - -namespace ChangeMallocArgSize -{ -class ChangeMallocArgSize : public PassInfoMixin -{ - protected: - std::vector changeValues; - - Module* pMod = nullptr; - IntegerType* dstType = nullptr; - - void addChangeValue(PHINode* pv) - { - if (llvm::isa(pv)) { - return; - } - - if (changeValues.end() == std::find(changeValues.begin(), changeValues.end(), pv)) { - changeValues.push_back(pv); - } - } - - void findSizes(BasicBlock* pBB) - { - for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { - CallInst* pCall = llvm::dyn_cast(i); - if (pCall) { - Function* pFunc = pCall->getCalledFunction(); - if (pFunc && ("malloc" == pFunc->getName())) { - Value* pv = pCall->getOperand(0); - if (PHINode* pn = llvm::dyn_cast(pv)) { - addChangeValue(pn); - } - } - } - } - } - - void findSizes(Function* pFunc) - { - for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { - findSizes(llvm::cast(i)); - } - } - - void findSizes() - { - for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - findSizes(llvm::cast(i)); - } - } - - /* Yes, I know there is a "getTerminator" function, but I have come across blocks - * that have more than one branch instruction (I think it is a bug in the runtime), but - * until that is resolved, I want to use this function. - */ - Instruction* findTerminator(BasicBlock* pb) - { - Instruction* inst = nullptr; - for (auto i = pb->begin(), e = pb->end(); i != e; i++) { - inst = llvm::cast(i); - if (llvm::isa(inst) || llvm::isa(inst)) { - break; - } - } - assert(inst && "Impossible, there is always a terminator."); - assert(inst == pb->getTerminator() && "How did this happen"); - - return inst; - } - - PHINode* getNewPHI(PHINode* pn) - { - - PHINode* newPN = PHINode::Create(dstType, pn->getNumIncomingValues(), "ChangeMallocArgSize_", pn); - for (size_t i = 0; i < pn->getNumIncomingValues(); i++) { - Value* pv = pn->getIncomingValue(i); - BasicBlock* pb = pn->getIncomingBlock(i); - Instruction* bTerm = findTerminator(pb); - - Instruction* pNew = CastInst::CreateZExtOrBitCast(pv, dstType, "ChangeMallocArgSize_zext_", bTerm); - newPN->addIncoming(pNew, pb); - } - - return newPN; - } - - void fixBitWidths() - { - - for (size_t i = 0; i < changeValues.size(); i++) { - PHINode* pn = changeValues[i]; - - if (dstType != pn->getType()) { - PHINode* pRep = getNewPHI(pn); - - std::vector insts; - - for (auto i = pn->user_begin(), e = pn->user_end(); i != e; i++) { - Instruction* inst = llvm::cast(*i); - insts.push_back(inst); - } - for (size_t i = 0; i < insts.size(); i++) { - Instruction* inst = insts[i]; - - if (PHINode* pn2 = llvm::dyn_cast(inst)) { - DEBUGERR << *pn2 << "\n"; - assert(0 && "SHOULD NEVER HAPPEN"); - } else { - auto* val = CastInst::CreateTruncOrBitCast(pRep, pn->getType(), "ChangeMallocArgSize_trunc_", inst); - - for (size_t j = 0; j < inst->getNumOperands(); j++) { - if (inst->getOperand(j) == pn) { - inst->setOperand(j, val); - break; - } - } - } - } - - pn->eraseFromParent(); - } - } - } - - public: - ChangeMallocArgSize() - { - } - - virtual PreservedAnalyses run(Module& m, ModuleAnalysisManager& MAM) - { - pMod = &m; - dstType = Type::getInt64Ty(pMod->getContext()); - - findSizes(); - - fixBitWidths(); - - return PreservedAnalyses::none(); - } -}; // end of struct ChangeMallocArgSize -} // namespace ChangeMallocArgSize - -// This part is the new way of registering your pass -extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK -llvmGetPassPluginInfo() -{ - return { - LLVM_PLUGIN_API_VERSION, "ChangeMallocArgSize", "v0.1", - [](PassBuilder& PB) { - PB.registerPipelineParsingCallback( - [](StringRef Name, ModulePassManager& FPM, - ArrayRef) { - if (Name == "clambc-change-malloc-arg-size") { - FPM.addPass(ChangeMallocArgSize::ChangeMallocArgSize()); - return true; - } - return false; - }); - }}; -} diff --git a/libclambcc/ClamBCRemoveUndefs.cpp b/libclambcc/ClamBCRemoveUndefs.cpp deleted file mode 100644 index da10e179c7..0000000000 --- a/libclambcc/ClamBCRemoveUndefs.cpp +++ /dev/null @@ -1,272 +0,0 @@ - -#include "clambc.h" -#include "ClamBCUtilities.h" - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -using namespace llvm; - -/* TODO: THIS APPEARS TO NO LONGER BE NEEDED. */ - -namespace -{ -/* - * This pass requires -mem2reg before it (TEMPORARILY) - * and must be run before -O3. - * - * This will remove storing parameters in stack variables and loading from there. - * - * ; Function Attrs: noinline nounwind uwtable - define dso_local i32 @decrypt_config(i32 %config_location, %struct._state* %state, i32 %sizeof_state) #0 { - entry: - ... - %state.addr = alloca %struct._state*, align 8 - %sizeof_state.addr = alloca i32, align 4 - ... - store %struct._state* %state, %struct._state** %state.addr, align 8 - store i32 %sizeof_state, i32* %sizeof_state.addr, align 4 - */ -struct ClamBCRemoveUndefs : public PassInfoMixin { - protected: - llvm::Module *pMod = nullptr; - std::map aborts; - - bool bChanged = false; - - std::vector delLst; - - BasicBlock *getAbortBB(unsigned MDDbgKind, BasicBlock *BB) - { - Function *pFunc = BB->getParent(); - - auto iter = aborts.find(pFunc); - if (aborts.end() != iter) { - return iter->second; - } - - FunctionType *abrtTy = FunctionType::get( - Type::getVoidTy(BB->getContext()), false); - FunctionType *rterrTy = FunctionType::get( - Type::getInt32Ty(BB->getContext()), - {Type::getInt32Ty(BB->getContext())}, false); - FunctionCallee func_abort = BB->getParent()->getParent()->getOrInsertFunction("abort", abrtTy); - FunctionCallee func_rterr = - BB->getParent()->getParent()->getOrInsertFunction("bytecode_rt_error", rterrTy); - BasicBlock *abort = BasicBlock::Create(BB->getContext(), "rterr.trig", BB->getParent()); - Constant *PN = ConstantInt::get(Type::getInt32Ty(BB->getContext()), 99); - if (MDDbgKind) { - CallInst *RtErrCall = CallInst::Create(func_rterr, PN, "", abort); - RtErrCall->setCallingConv(CallingConv::C); - RtErrCall->setTailCall(true); - RtErrCall->setDoesNotThrow(); - } - CallInst *AbrtC = CallInst::Create(func_abort, "", abort); - AbrtC->setCallingConv(CallingConv::C); - AbrtC->setTailCall(true); - AbrtC->setDoesNotReturn(); - AbrtC->setDoesNotThrow(); - new UnreachableInst(BB->getContext(), abort); - - aborts[pFunc] = abort; - - return abort; - } - - virtual Type *getTargetType(Value *v1, Value *v2) - { - - IntegerType *v1t = llvm::dyn_cast(v1->getType()); - IntegerType *v2t = llvm::dyn_cast(v2->getType()); - - assert((v1t and v2t) and "This function is only for integer types."); - - if (v1t->getBitWidth() > v2t->getBitWidth()) { - return v1t; - } - - return v2t; - } - - virtual void insertChecks(GetElementPtrInst *pgepi, Value *vsize) - { - - std::set insts; - std::set globs; - getDependentValues(pgepi, insts, globs); - - /*Make sure that a pointer is actually accessed (loaded or written to) - * before adding runtime checks.*/ - bool bPtrUsed = false; - for (auto i : insts) { - if (LoadInst *li = llvm::dyn_cast(i)) { - if (isSamePointer(pgepi, li->getPointerOperand())) { - bPtrUsed = true; - break; - } - } else if (StoreInst *si = llvm::dyn_cast(i)) { - if (isSamePointer(pgepi, si->getPointerOperand())) { - bPtrUsed = true; - break; - } - } - } - - if (bPtrUsed) { - - Value *pIdx = pgepi->getOperand(pgepi->getNumOperands() - 1); - - BasicBlock *old = pgepi->getParent(); - BasicBlock *abortBB = getAbortBB(0, old); - BasicBlock *pSplit = old->splitBasicBlock(pgepi, "ClamBCRemoveUndefs_"); - - Instruction *term = old->getTerminator(); - - Type *pTargetType = getTargetType(pIdx, vsize); - if (pIdx->getType() != pTargetType) { - pIdx = CastInst::CreateZExtOrBitCast(pIdx, pTargetType, "ClamBCRemoveUndefs_", term); - } - - if (vsize->getType() != pTargetType) { - vsize = CastInst::CreateZExtOrBitCast(vsize, pTargetType, "ClamBCRemoveUndefs_", term); - } - - Value *cond = new ICmpInst(term, ICmpInst::ICMP_UGE, pIdx, vsize); - BranchInst::Create(abortBB, pSplit, cond, term); - - delLst.push_back(term); - bChanged = true; - } - } - - /*Returns true if ptr1 and ptr2 reference the same pointer.*/ - virtual bool isSamePointer(Value *ptr1, Value *ptr2, std::set &visited) - { - - if (visited.end() != std::find(visited.begin(), visited.end(), ptr1)) { - return false; - } - visited.insert(ptr1); - - if (ptr1 == ptr2) { - return true; - } - - if (User *pu = llvm::dyn_cast(ptr1)) { - - for (size_t i = 0; i < pu->getNumOperands(); i++) { - if (isSamePointer(pu->getOperand(i), ptr2, visited)) { - return true; - } - } - } - return false; - } - - virtual bool isSamePointer(Value *ptr1, Value *ptr2) - { - std::set visited; - return isSamePointer(ptr1, ptr2, visited); - } - - virtual void insertChecks(Value *ptr, Value *size) - { - - std::set insts; - std::set globs; - getDependentValues(ptr, insts, globs); - - for (auto i : insts) { - if (GetElementPtrInst *pgepi = llvm::dyn_cast(i)) { - if (isSamePointer(pgepi->getPointerOperand(), ptr)) { - insertChecks(pgepi, size); - } - } - } - } - - virtual void processFunction(Function *pFunc) - { - for (auto i = pFunc->arg_begin(), e = pFunc->arg_end(); i != e; i++) { - Argument *pArg = llvm::cast(i); - - Type *pType = pArg->getType(); - if (not pType->isPointerTy()) { - continue; - } - - i++; - if (i == pFunc->arg_end()) { - break; - } - - Argument *pArgSize = llvm::cast(i); - - Type *pArgSizeType = pArgSize->getType(); - assert(pArgSizeType->isIntegerTy() && "This needs to be the size of the pointer"); - - insertChecks(pArg, pArgSize); - } - } - - public: - ClamBCRemoveUndefs() {} - - virtual ~ClamBCRemoveUndefs() {} - - PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) - { - /* TODO: This no longer appears to be needed. Remove it? */ - return PreservedAnalyses::all(); - - pMod = &m; - - for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - Function *pFunc = llvm::cast(i); - if (pFunc->isDeclaration()) { - continue; - } - - processFunction(pFunc); - } - - for (size_t i = 0; i < delLst.size(); i++) { - delLst[i]->eraseFromParent(); - } - - if (bChanged) { - return PreservedAnalyses::none(); - } - return PreservedAnalyses::all(); - } -}; // end of struct ClamBCRemoveUndefs - -} // end of anonymous namespace - -// This part is the new way of registering your pass -extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK -llvmGetPassPluginInfo() -{ - return { - LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUndefs", "v0.1", - [](PassBuilder &PB) { - PB.registerPipelineParsingCallback( - [](StringRef Name, ModulePassManager &FPM, - ArrayRef) { - if (Name == "clambc-remove-undefs") { - FPM.addPass(ClamBCRemoveUndefs()); - return true; - } - return false; - }); - }}; -}