From 0c8827d365d090294d0203c9c820f4e46577b350 Mon Sep 17 00:00:00 2001 From: Andy Ragusa Date: Thu, 5 Jan 2023 12:01:39 -0800 Subject: [PATCH] Remove unused global __Copyright Unused globals are causing warnings in the runtime due to the list indices getting out of sync in the list of Globals and ConstantExprs. This PR stores the copyright statement as metadata, so there is no unused global inserted into the runtime. --- clambcc/clambc-compiler.py | 3 +- libclambcc/CMakeLists.txt | 1 + .../ClamBCCopyrightHandler.cpp | 115 ++++++++++++++++++ libclambcc/ClamBCWriter/ClamBCWriter.cpp | 14 ++- 4 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 libclambcc/ClamBCCopyrightHandler/ClamBCCopyrightHandler.cpp diff --git a/clambcc/clambc-compiler.py b/clambcc/clambc-compiler.py index 7bf72ab6a8..eade62cc31 100755 --- a/clambcc/clambc-compiler.py +++ b/clambcc/clambc-compiler.py @@ -493,7 +493,7 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: def optimize(clangLLVM: ClangLLVM, inFile: str, outFile: str, sigFile: str, inputSourceFile: str, standardCompiler: bool) -> int: - internalizeAPIList = "_Z10entrypointv,entrypoint,__clambc_kind,__clambc_virusname_prefix,__clambc_virusnames,__clambc_filesize,__clambc_match_counts,__clambc_match_offsets,__clambc_pedata,__Copyright" + internalizeAPIList = "_Z10entrypointv,entrypoint,__clambc_kind,__clambc_virusname_prefix,__clambc_virusnames,__clambc_filesize,__clambc_match_counts,__clambc_match_offsets,__clambc_pedata" if standardCompiler: internalizeAPIList += ",main" @@ -509,6 +509,7 @@ def optimize(clangLLVM: ClangLLVM, inFile: str, outFile: str, sigFile: str, inpu f' -clambc-preserve-abis' #add fake function calls that use all of #the arguments so that O3 doesn't change #the argument lists + f' -clambc-copyright-handler' f' -O3' f' -clambc-preserve-abis' #remove fake function calls because O3 has already run f' -clambc-remove-pointer-phis' diff --git a/libclambcc/CMakeLists.txt b/libclambcc/CMakeLists.txt index 4812a8f87c..620c679f55 100644 --- a/libclambcc/CMakeLists.txt +++ b/libclambcc/CMakeLists.txt @@ -9,6 +9,7 @@ target_sources(clambcc_obj ClamBCLowering/ClamBCLowering.cpp ClamBCVerifier/ClamBCVerifier.cpp ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp + ClamBCCopyrightHandler/ClamBCCopyrightHandler.cpp ClamBCRebuild/ClamBCRebuild.cpp ClamBCTrace/ClamBCTrace.cpp ClamBCModule/ClamBCModule.cpp diff --git a/libclambcc/ClamBCCopyrightHandler/ClamBCCopyrightHandler.cpp b/libclambcc/ClamBCCopyrightHandler/ClamBCCopyrightHandler.cpp new file mode 100644 index 0000000000..4c182e72e6 --- /dev/null +++ b/libclambcc/ClamBCCopyrightHandler/ClamBCCopyrightHandler.cpp @@ -0,0 +1,115 @@ +/* + * Compile LLVM bytecode to logical signatures. + * + * Copyright (C) 2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "ClamBCModule.h" +#include +#include "../Common/bytecode_api.h" +#include "clambc.h" +#include "ClamBCDiagnostics.h" +#include "ClamBCModule.h" +#include "ClamBCCommon.h" +#include "ClamBCUtilities.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Analysis/ConstantFolding.h" +#include +#include "llvm/Analysis/ValueTracking.h" +#include +#include +#include +#include +//#include +#include +#include +#include +#include "llvm/Support/Debug.h" +#include +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" +#include +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/IPO.h" +#include +//#include +#include +#include + +#define DEBUG_TYPE "copyrighthandler" + +using namespace llvm; + +namespace +{ + +class ClamBCCopyrightHandler : public ModulePass +{ + public: + static char ID; + ClamBCCopyrightHandler() + : ModulePass(ID) {} + + virtual bool runOnModule(Module &M); + + private: + llvm::Module *pMod; +}; + +char ClamBCCopyrightHandler::ID = 0; +RegisterPass X("clambc-copyright-handler", + "ClamAV Copyright Handler"); + +bool ClamBCCopyrightHandler::runOnModule(Module &M) +{ + pMod = &M; + bool bRet = false; + + GlobalVariable *gCopyright = pMod->getGlobalVariable("__Copyright"); + std::string copyright; + if (gCopyright && gCopyright->hasDefinitiveInitializer()) { + Constant *C = gCopyright->getInitializer(); + StringRef c; + if (!getConstantStringInfo(C, c)) { + ClamBCStop("Failed to extract copyright string\n", pMod); + } + copyright = c.str(); + } + + if (copyright.length()) { + NamedMDNode *Node = M.getOrInsertNamedMetadata("clambc.copyright"); + MDString *S = MDString::get(M.getContext(), llvm::StringRef(copyright)); + MDNode *N = MDNode::get(M.getContext(), S); + Node->addOperand(N); + bRet = true; + } + + return bRet; +} + +} // namespace +const PassInfo *const ClamBCCopyrightHandlerID = &X; + +llvm::ModulePass *createClamBCCopyrightHandler() +{ + return new ClamBCCopyrightHandler(); +} diff --git a/libclambcc/ClamBCWriter/ClamBCWriter.cpp b/libclambcc/ClamBCWriter/ClamBCWriter.cpp index e1f60a4fba..30a4a0bfa3 100644 --- a/libclambcc/ClamBCWriter/ClamBCWriter.cpp +++ b/libclambcc/ClamBCWriter/ClamBCWriter.cpp @@ -461,6 +461,7 @@ class ClamBCOutputWriter printNumber(Out, 0, false); continue; } + Constant *pConst = llvm::cast(*I); // type of constant uint16_t id = pAnalyzer->getTypeID((*I)->getType()); @@ -540,9 +541,11 @@ class ClamBCOutputWriter printModuleHeader(*pMod, pAnalyzer, maxLineLength + 1); OutReal << Out.str(); - //MemoryBuffer *MB = nullptr; - const char *start = NULL; - std::string copyright = pAnalyzer->getCopyright(); + const char *start = NULL; + + NamedMDNode *copyrightNode = pMod->getNamedMetadata("clambc.copyright"); + std::string copyright = copyrightNode ? cast(copyrightNode->getOperand(0)->getOperand(0))->getString() : ""; + if (copyright.length()) { start = copyright.c_str(); } else { @@ -593,6 +596,10 @@ class ClamBCOutputWriter linelength = 0; } } while (c); + //OutReal doesn't appear to add the last newline unless there is something + //after it, so there is no newline at the end of the source code printout + //without this. + OutReal << "\n"; } void dumpTypes(llvm::raw_ostream &OS) @@ -743,7 +750,6 @@ class ClamBCWriter : public ModulePass, public InstVisitor bool runOnModule(Module &m) { - pMod = &m; pAnalyzer = &getAnalysis(); pOutputWriter = ClamBCOutputWriter::createClamBCOutputWriter(outFile, pMod, pAnalyzer);