Skip to content

Commit

Permalink
Fix parameter verification and PHI node not at top of block errors
Browse files Browse the repository at this point in the history
O3 removes unused arguments for internal (static) functions. In cases
where the arguments are not removed, it replaces the calling argument
with 'undef' values in the call instruction.  Additionally, our llvm
runtime adds ZExt instructions for all phi nodes to i64 when they are
not already 64-bit.  This commit fixes those issues by adding fake
function calls that use all parameters before O3 is run, and removing
them after O3 is run.  It addresses the PHI node issue by making all of
them 64-bit.  It also has clang-format run across the source.
  • Loading branch information
ragusaa committed Jun 16, 2021
1 parent cd4062f commit dc5d30b
Show file tree
Hide file tree
Showing 8 changed files with 738 additions and 407 deletions.
148 changes: 84 additions & 64 deletions clambcc/clambc-compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,72 +435,92 @@ def optimize(inFile, outFile, sigFile, inputSourceFile, standardCompiler):
internalizeAPIList += ",main"

#TODO: Modify ClamBCRemoveUndefs to not require mem2reg to be run before it.
cmd = f'opt-{CLANG_VERSION} \
-S \
-verify-each \
-load "{SHARED_OBJ_FILE}" \
{inFile} \
-o {outFile} \
-mem2reg \
-clambc-remove-undefs \
-O3 \
-clambc-remove-pointer-phis \
-dce \
-disable-loop-vectorization \
-disable-slp-vectorization \
-globaldce \
-strip-dead-prototypes \
-constmerge \
-mem2reg \
-always-inline \
-globalopt \
-lowerswitch \
-lowerinvoke \
-globalopt \
-simplifycfg \
-indvars \
-constprop \
-clambc-lowering-notfinal \
-lowerswitch \
-clambc-verifier \
-clambc-lowering-notfinal \
-dce \
-simplifycfg \
-mem2reg \
-clambc-lcompiler \
-internalize -internalize-public-api-list="{internalizeAPIList}" \
-globaldce \
-instcombine \
-clambc-rebuild \
-verify \
-simplifycfg \
-dce \
-lowerswitch \
-clambc-verifier \
-verify \
-strip-debug-declare \
-clambc-gepsplitter-placeholder \
-clambc-lowering-final \
-clambc-trace \
-dce \
-clambc-module \
-verify \
-globalopt \
-remove-selects \
-clambc-outline-endianness-calls \
-clambc-change-malloc-arg-size \
-globalopt \
-clambc-prepare-geps-for-writer \
-globalopt \
-clambc-convert-intrinsics \
-clambc-writer \
-clambc-writer-input-source={inputSourceFile} \
-clambc-sigfile={sigFile} \
'
cmd = (f'opt-{CLANG_VERSION}'
f' -S'
f' -verify-each'
f' -load "{SHARED_OBJ_FILE}"'
f' {inFile}'
f' -o {outFile}'
f' -mem2reg'
f' -clambc-remove-undefs' #add pointer bounds checking.
f' -clambc-preserve-abis' #add fake function calls that use all of
#the arguments so that O3 doesn't change
#the argument lists
f' -O3'
f' -clambc-preserve-abis' #remove fake function calls because O3 has already run
f' -clambc-remove-pointer-phis'
f' -dce'
f' -disable-loop-vectorization'
f' -disable-slp-vectorization'
f' -globaldce'
f' -strip-dead-prototypes'
f' -constmerge'
f' -mem2reg'
f' -always-inline'
f' -globalopt'
f' -lowerswitch'
f' -lowerinvoke'
f' -globalopt'
f' -simplifycfg'
f' -indvars'
f' -constprop'
f' -clambc-lowering-notfinal' # perform lowering pass
f' -lowerswitch'
f' -clambc-verifier'
f' -clambc-lowering-notfinal' # perform lowering pass
f' -dce'
f' -simplifycfg'
f' -mem2reg'
f' -clambc-lcompiler' #compile the logical_trigger function to a
#logical signature.
f' -internalize -internalize-public-api-list="{internalizeAPIList}"'
f' -globaldce'
f' -instcombine'
f' -clambc-rebuild'
f' -verify'
f' -simplifycfg'
f' -dce'
f' -lowerswitch'
f' -clambc-verifier'
f' -verify'
f' -strip-debug-declare'
f' -clambc-gepsplitter-placeholder'
f' -clambc-lowering-final'
f' -clambc-trace'
f' -dce'
f' -clambc-module'
f' -verify'
f' -globalopt'
f' -remove-selects'
f' -clambc-outline-endianness-calls' #outline the endianness calls
#because otherwise the call
#is replaced with a constant
#that is based on where the
#signature was compiled, and
#won't always be accurate.
f' -clambc-change-malloc-arg-size' #make sure we always use the
#64-bit malloc.
f' -globalopt'
f' -clambc-extend-phis-to-64bit' #make all integer phi nodes 64-bit
#because the llvm runtime inserts a
#cast after phi nodes without
#verifying that there is not
#another phi node after it.
f' -clambc-prepare-geps-for-writer' #format gep indexes to not not
#have more than 2, because
#otherwise the writer gets
#unhappy.
f' -globalopt'
f' -clambc-convert-intrinsics' #convert all memset intrinsics to
#the 32-bit instead of the 64-bit
#intrinsic
f' -clambc-writer' #write the bytecode
f' -clambc-writer-input-source={inputSourceFile}'
f' -clambc-sigfile={sigFile}'
)

if standardCompiler:
cmd += f"-clambc-standard-compiler \
"
cmd += f" -clambc-standard-compiler"
return run(cmd)


Expand Down
2 changes: 2 additions & 0 deletions libclambcc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ target_sources(clambcc_obj
ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp
ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp
ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp
ClamBCPreserveABIs/ClamBCPreserveABIs.cpp
ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp
)

target_include_directories(clambcc_obj
Expand Down
150 changes: 150 additions & 0 deletions libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Compile LLVM bytecode to ClamAV bytecode.
*
* Copyright (C) 2009-2010 Sourcefire, Inc.
*
* Authors: Andy Ragusa
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "../Common/bytecode_api.h"
#include "clambc.h"
#include "ClamBCModule.h"
#include "ClamBCAnalyzer/ClamBCAnalyzer.h"
#include "Common/ClamBCUtilities.h"

#include <llvm/Support/DataTypes.h>
//#include "ClamBCTargetMachine.h"
#include <llvm/ADT/STLExtras.h>
#include <llvm/Analysis/ConstantFolding.h>
#include <llvm/IR/DebugInfo.h>
#include <llvm/IR/Dominators.h>
#include <llvm/Analysis/LoopInfo.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/Analysis/ValueTracking.h>
#include <llvm/IR/Attributes.h>
#include <llvm/IR/CallingConv.h>
#include <llvm/CodeGen/IntrinsicLowering.h>
//#include "llvm/Config/config.h"
#include <llvm/IR/Constants.h>
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/IntrinsicInst.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Metadata.h>
#include <llvm/IR/Module.h>
#include <llvm/Pass.h>

#include <llvm/Analysis/ValueTracking.h>

using namespace llvm;

class ClamBCExtendPHIsTo64Bit : public ModulePass
{
protected:
llvm::Module *pMod = nullptr;

virtual void convertPHIs(Function *pFunc)
{
std::vector<PHINode *> phis;
for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) {
BasicBlock *bb = llvm::cast<BasicBlock>(i);
for (auto bi = bb->begin(), be = bb->end(); bi != be; bi++) {
if (PHINode *phi = llvm::dyn_cast<PHINode>(bi)) {
phis.push_back(phi);
}
}
}

for (size_t i = 0; i < phis.size(); i++) {
convertPHI(phis[i]);
}
}

virtual void convertPHI(PHINode *pn)
{
IntegerType *dstType = IntegerType::get(pMod->getContext(), 64);
IntegerType *origType = llvm::dyn_cast<IntegerType>(pn->getType());
if ((dstType == origType) || (nullptr == origType)) {
return;
}

PHINode *newNode = PHINode::Create(dstType, pn->getNumIncomingValues(), "ClamBCConvertPHINodes_", pn);
for (size_t i = 0; i < pn->getNumIncomingValues(); i++) {
Value *incomingValue = pn->getIncomingValue(i);
BasicBlock *incomingBlock = pn->getIncomingBlock(i);

if (ConstantInt *ci = llvm::dyn_cast<ConstantInt>(incomingValue)) {
Constant *newCi = ConstantInt::get(dstType, ci->getLimitedValue());
newNode->addIncoming(newCi, incomingBlock);
} else {
Instruction *insPt = llvm::cast<Instruction>(--(incomingBlock->end()));
Instruction *inst = CastInst::CreateIntegerCast(pn->getIncomingValue(i), dstType, true, "ClamBCConvertPHINodes_", insPt);

newNode->addIncoming(inst, incomingBlock);
}
}
Instruction *insPt = nullptr;
for (auto i = pn->getParent()->begin(), e = pn->getParent()->end(); i != e; i++) {
if (llvm::isa<PHINode>(i)) {
continue;
}

//Not allowed in bytecode sigs, but no reason not to support it.
if (llvm::isa<LandingPadInst>(i)) {
continue;
}

insPt = llvm::cast<Instruction>(i);
break;
}

Instruction *cast = CastInst::CreateIntegerCast(newNode, origType, true, "ClamBCConvertPHINodes_", insPt);
pn->replaceAllUsesWith(cast);
pn->eraseFromParent();
}

public:
static char ID;

explicit ClamBCExtendPHIsTo64Bit()
: ModulePass(ID) {}

virtual ~ClamBCExtendPHIsTo64Bit() {}

virtual bool runOnModule(Module &m)
{

pMod = &m;

for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) {
Function *pFunc = llvm::cast<Function>(i);
convertPHIs(pFunc);
}

return true;
}
};

char ClamBCExtendPHIsTo64Bit::ID = 0;
static RegisterPass<ClamBCExtendPHIsTo64Bit> X("clambc-extend-phis-to-64bit", "ClamBCExtendPHIsTo64Bit Pass",
false /* Only looks at CFG */,
false /* Analysis Pass */);

llvm::ModulePass *createClamBCExtendPHIsTo64Bit()
{
return new ClamBCExtendPHIsTo64Bit();
}
Loading

0 comments on commit dc5d30b

Please sign in to comment.