Skip to content

Commit

Permalink
Merge branch 'CLAM-1496-LLVM-sigmanager_source.c' into 'main'
Browse files Browse the repository at this point in the history
Fix parameter verification and PHI node not at top of block errors

See merge request clamav/clamav-bytecode-compiler!21
  • Loading branch information
ragusaa committed Jun 17, 2021
2 parents cd4062f + dc5d30b commit ce8e733
Show file tree
Hide file tree
Showing 8 changed files with 738 additions and 407 deletions.
148 changes: 84 additions & 64 deletions clambcc/clambc-compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,72 +435,92 @@ def optimize(inFile, outFile, sigFile, inputSourceFile, standardCompiler):
internalizeAPIList += ",main"

#TODO: Modify ClamBCRemoveUndefs to not require mem2reg to be run before it.
cmd = f'opt-{CLANG_VERSION} \
-S \
-verify-each \
-load "{SHARED_OBJ_FILE}" \
{inFile} \
-o {outFile} \
-mem2reg \
-clambc-remove-undefs \
-O3 \
-clambc-remove-pointer-phis \
-dce \
-disable-loop-vectorization \
-disable-slp-vectorization \
-globaldce \
-strip-dead-prototypes \
-constmerge \
-mem2reg \
-always-inline \
-globalopt \
-lowerswitch \
-lowerinvoke \
-globalopt \
-simplifycfg \
-indvars \
-constprop \
-clambc-lowering-notfinal \
-lowerswitch \
-clambc-verifier \
-clambc-lowering-notfinal \
-dce \
-simplifycfg \
-mem2reg \
-clambc-lcompiler \
-internalize -internalize-public-api-list="{internalizeAPIList}" \
-globaldce \
-instcombine \
-clambc-rebuild \
-verify \
-simplifycfg \
-dce \
-lowerswitch \
-clambc-verifier \
-verify \
-strip-debug-declare \
-clambc-gepsplitter-placeholder \
-clambc-lowering-final \
-clambc-trace \
-dce \
-clambc-module \
-verify \
-globalopt \
-remove-selects \
-clambc-outline-endianness-calls \
-clambc-change-malloc-arg-size \
-globalopt \
-clambc-prepare-geps-for-writer \
-globalopt \
-clambc-convert-intrinsics \
-clambc-writer \
-clambc-writer-input-source={inputSourceFile} \
-clambc-sigfile={sigFile} \
'
cmd = (f'opt-{CLANG_VERSION}'
f' -S'
f' -verify-each'
f' -load "{SHARED_OBJ_FILE}"'
f' {inFile}'
f' -o {outFile}'
f' -mem2reg'
f' -clambc-remove-undefs' #add pointer bounds checking.
f' -clambc-preserve-abis' #add fake function calls that use all of
#the arguments so that O3 doesn't change
#the argument lists
f' -O3'
f' -clambc-preserve-abis' #remove fake function calls because O3 has already run
f' -clambc-remove-pointer-phis'
f' -dce'
f' -disable-loop-vectorization'
f' -disable-slp-vectorization'
f' -globaldce'
f' -strip-dead-prototypes'
f' -constmerge'
f' -mem2reg'
f' -always-inline'
f' -globalopt'
f' -lowerswitch'
f' -lowerinvoke'
f' -globalopt'
f' -simplifycfg'
f' -indvars'
f' -constprop'
f' -clambc-lowering-notfinal' # perform lowering pass
f' -lowerswitch'
f' -clambc-verifier'
f' -clambc-lowering-notfinal' # perform lowering pass
f' -dce'
f' -simplifycfg'
f' -mem2reg'
f' -clambc-lcompiler' #compile the logical_trigger function to a
#logical signature.
f' -internalize -internalize-public-api-list="{internalizeAPIList}"'
f' -globaldce'
f' -instcombine'
f' -clambc-rebuild'
f' -verify'
f' -simplifycfg'
f' -dce'
f' -lowerswitch'
f' -clambc-verifier'
f' -verify'
f' -strip-debug-declare'
f' -clambc-gepsplitter-placeholder'
f' -clambc-lowering-final'
f' -clambc-trace'
f' -dce'
f' -clambc-module'
f' -verify'
f' -globalopt'
f' -remove-selects'
f' -clambc-outline-endianness-calls' #outline the endianness calls
#because otherwise the call
#is replaced with a constant
#that is based on where the
#signature was compiled, and
#won't always be accurate.
f' -clambc-change-malloc-arg-size' #make sure we always use the
#64-bit malloc.
f' -globalopt'
f' -clambc-extend-phis-to-64bit' #make all integer phi nodes 64-bit
#because the llvm runtime inserts a
#cast after phi nodes without
#verifying that there is not
#another phi node after it.
f' -clambc-prepare-geps-for-writer' #format gep indexes to not not
#have more than 2, because
#otherwise the writer gets
#unhappy.
f' -globalopt'
f' -clambc-convert-intrinsics' #convert all memset intrinsics to
#the 32-bit instead of the 64-bit
#intrinsic
f' -clambc-writer' #write the bytecode
f' -clambc-writer-input-source={inputSourceFile}'
f' -clambc-sigfile={sigFile}'
)

if standardCompiler:
cmd += f"-clambc-standard-compiler \
"
cmd += f" -clambc-standard-compiler"
return run(cmd)


Expand Down
2 changes: 2 additions & 0 deletions libclambcc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ target_sources(clambcc_obj
ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp
ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp
ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp
ClamBCPreserveABIs/ClamBCPreserveABIs.cpp
ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp
)

target_include_directories(clambcc_obj
Expand Down
150 changes: 150 additions & 0 deletions libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Compile LLVM bytecode to ClamAV bytecode.
*
* Copyright (C) 2009-2010 Sourcefire, Inc.
*
* Authors: Andy Ragusa
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "../Common/bytecode_api.h"
#include "clambc.h"
#include "ClamBCModule.h"
#include "ClamBCAnalyzer/ClamBCAnalyzer.h"
#include "Common/ClamBCUtilities.h"

#include <llvm/Support/DataTypes.h>
//#include "ClamBCTargetMachine.h"
#include <llvm/ADT/STLExtras.h>
#include <llvm/Analysis/ConstantFolding.h>
#include <llvm/IR/DebugInfo.h>
#include <llvm/IR/Dominators.h>
#include <llvm/Analysis/LoopInfo.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/Analysis/ValueTracking.h>
#include <llvm/IR/Attributes.h>
#include <llvm/IR/CallingConv.h>
#include <llvm/CodeGen/IntrinsicLowering.h>
//#include "llvm/Config/config.h"
#include <llvm/IR/Constants.h>
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/IntrinsicInst.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Metadata.h>
#include <llvm/IR/Module.h>
#include <llvm/Pass.h>

#include <llvm/Analysis/ValueTracking.h>

using namespace llvm;

class ClamBCExtendPHIsTo64Bit : public ModulePass
{
protected:
llvm::Module *pMod = nullptr;

virtual void convertPHIs(Function *pFunc)
{
std::vector<PHINode *> phis;
for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) {
BasicBlock *bb = llvm::cast<BasicBlock>(i);
for (auto bi = bb->begin(), be = bb->end(); bi != be; bi++) {
if (PHINode *phi = llvm::dyn_cast<PHINode>(bi)) {
phis.push_back(phi);
}
}
}

for (size_t i = 0; i < phis.size(); i++) {
convertPHI(phis[i]);
}
}

virtual void convertPHI(PHINode *pn)
{
IntegerType *dstType = IntegerType::get(pMod->getContext(), 64);
IntegerType *origType = llvm::dyn_cast<IntegerType>(pn->getType());
if ((dstType == origType) || (nullptr == origType)) {
return;
}

PHINode *newNode = PHINode::Create(dstType, pn->getNumIncomingValues(), "ClamBCConvertPHINodes_", pn);
for (size_t i = 0; i < pn->getNumIncomingValues(); i++) {
Value *incomingValue = pn->getIncomingValue(i);
BasicBlock *incomingBlock = pn->getIncomingBlock(i);

if (ConstantInt *ci = llvm::dyn_cast<ConstantInt>(incomingValue)) {
Constant *newCi = ConstantInt::get(dstType, ci->getLimitedValue());
newNode->addIncoming(newCi, incomingBlock);
} else {
Instruction *insPt = llvm::cast<Instruction>(--(incomingBlock->end()));
Instruction *inst = CastInst::CreateIntegerCast(pn->getIncomingValue(i), dstType, true, "ClamBCConvertPHINodes_", insPt);

newNode->addIncoming(inst, incomingBlock);
}
}
Instruction *insPt = nullptr;
for (auto i = pn->getParent()->begin(), e = pn->getParent()->end(); i != e; i++) {
if (llvm::isa<PHINode>(i)) {
continue;
}

//Not allowed in bytecode sigs, but no reason not to support it.
if (llvm::isa<LandingPadInst>(i)) {
continue;
}

insPt = llvm::cast<Instruction>(i);
break;
}

Instruction *cast = CastInst::CreateIntegerCast(newNode, origType, true, "ClamBCConvertPHINodes_", insPt);
pn->replaceAllUsesWith(cast);
pn->eraseFromParent();
}

public:
static char ID;

explicit ClamBCExtendPHIsTo64Bit()
: ModulePass(ID) {}

virtual ~ClamBCExtendPHIsTo64Bit() {}

virtual bool runOnModule(Module &m)
{

pMod = &m;

for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) {
Function *pFunc = llvm::cast<Function>(i);
convertPHIs(pFunc);
}

return true;
}
};

char ClamBCExtendPHIsTo64Bit::ID = 0;
static RegisterPass<ClamBCExtendPHIsTo64Bit> X("clambc-extend-phis-to-64bit", "ClamBCExtendPHIsTo64Bit Pass",
false /* Only looks at CFG */,
false /* Analysis Pass */);

llvm::ModulePass *createClamBCExtendPHIsTo64Bit()
{
return new ClamBCExtendPHIsTo64Bit();
}
Loading

0 comments on commit ce8e733

Please sign in to comment.