From 725f1a1b5923daf0d58041678bd4bf9932d98843 Mon Sep 17 00:00:00 2001 From: AkshayK Date: Thu, 2 Jan 2025 10:43:37 -0500 Subject: [PATCH] codegen: refactor and cleanup --- CMakeLists.txt | 1 - include/patchestry/AST/Codegen.hpp | 10 +- include/patchestry/AST/FunctionBuilder.hpp | 18 +- include/patchestry/AST/OperationBuilder.hpp | 8 +- include/patchestry/AST/TypeBuilder.hpp | 4 +- .../patchestry/Dialect/Pcode/Deserialize.hpp | 18 +- include/patchestry/Dialect/Pcode/Json.hpp | 84 ------ include/patchestry/Dialect/Pcode/Pcode.hpp | 105 ------- include/patchestry/Dialect/Pcode/PcodeDef.h | 77 ----- include/patchestry/Dialect/Pcode/PcodeOps.td | 2 +- include/patchestry/Ghidra/PcodeTypes.hpp | 2 +- lib/patchestry/AST/ASTConsumer.cpp | 3 +- lib/patchestry/AST/Codegen.cpp | 66 ++++- lib/patchestry/AST/FunctionBuilder.cpp | 61 +++- lib/patchestry/AST/OperationBuilder.cpp | 52 ++-- lib/patchestry/AST/OperationStmt.cpp | 217 +++++++------- lib/patchestry/AST/TypeBuilder.cpp | 2 +- lib/patchestry/Dialect/Pcode/Deserialize.cpp | 266 ++---------------- lib/patchestry/Ghidra/PcodeTranslation.cpp | 17 +- tools/pcode-lifter/main.cpp | 17 +- 20 files changed, 346 insertions(+), 684 deletions(-) delete mode 100644 include/patchestry/Dialect/Pcode/Json.hpp delete mode 100644 include/patchestry/Dialect/Pcode/Pcode.hpp delete mode 100644 include/patchestry/Dialect/Pcode/PcodeDef.h diff --git a/CMakeLists.txt b/CMakeLists.txt index fca8f812..395d0936 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,4 @@ # Copyright (c) 2024, Trail of Bits, Inc. -# # This source code is licensed in accordance with the terms specified in the # LICENSE file found in the root directory of this source tree. diff --git a/include/patchestry/AST/Codegen.hpp b/include/patchestry/AST/Codegen.hpp index d8528c45..9b1c48c4 100644 --- a/include/patchestry/AST/Codegen.hpp +++ b/include/patchestry/AST/Codegen.hpp @@ -7,8 +7,11 @@ #pragma once +#include + #include #include + #include #include @@ -17,6 +20,9 @@ namespace llvm { } namespace patchestry::ast { + + using LocationMap = std::unordered_map< void *, std::string >; + class CodeGenerator { public: @@ -29,7 +35,9 @@ namespace patchestry::ast { virtual ~CodeGenerator() {} - void generate_source_ir(clang::ASTContext &ctx, llvm::raw_fd_ostream &os); + void generate_source_ir( + clang::ASTContext &ctx, const LocationMap &locations, llvm::raw_fd_ostream &os + ); private: vast::cc::action_options opts; diff --git a/include/patchestry/AST/FunctionBuilder.hpp b/include/patchestry/AST/FunctionBuilder.hpp index cd9c696f..d1da6e0d 100644 --- a/include/patchestry/AST/FunctionBuilder.hpp +++ b/include/patchestry/AST/FunctionBuilder.hpp @@ -7,17 +7,18 @@ #pragma once -#include "patchestry/AST/TypeBuilder.hpp" +#include +#include +#include + +#include #include #include #include #include -#include -#include -#include +#include #include -#include namespace patchestry::ast { class OpBuilder; @@ -88,6 +89,13 @@ namespace patchestry::ast { clang::FunctionDecl *create_definition(clang::ASTContext &ctx); + template< typename T > + void set_location_key(T *pointer, const std::string &key) { + if (!location_map.get().contains(pointer)) { + location_map.get().emplace(pointer, key); + } + } + private: void create_labels(clang::ASTContext &ctx, clang::FunctionDecl *func_decl); diff --git a/include/patchestry/AST/OperationBuilder.hpp b/include/patchestry/AST/OperationBuilder.hpp index a71f5cf9..f098895f 100644 --- a/include/patchestry/AST/OperationBuilder.hpp +++ b/include/patchestry/AST/OperationBuilder.hpp @@ -154,7 +154,7 @@ namespace patchestry::ast { private: clang::Stmt *create_assign_operation( clang::ASTContext &ctx, clang::Expr *input_expr, clang::Expr *output_expr, - clang::SourceLocation location + const std::string &location_key ); /** @@ -172,8 +172,10 @@ namespace patchestry::ast { clang::ASTContext &ctx, clang::Expr *expr, clang::QualType to_type ); - clang::Stmt * - create_varnode(clang::ASTContext &ctx, const Function &function, const Varnode &vnode); + clang::Stmt *create_varnode( + clang::ASTContext &ctx, const Function &function, const Varnode &vnode, + const std::string &op_key = "" + ); clang::Stmt *create_parameter(clang::ASTContext &ctx, const Varnode &vnode); diff --git a/include/patchestry/AST/TypeBuilder.hpp b/include/patchestry/AST/TypeBuilder.hpp index 4300c56b..e79f18e9 100644 --- a/include/patchestry/AST/TypeBuilder.hpp +++ b/include/patchestry/AST/TypeBuilder.hpp @@ -7,8 +7,10 @@ #pragma once -#include #include + +#include + #include namespace patchestry::ast { diff --git a/include/patchestry/Dialect/Pcode/Deserialize.hpp b/include/patchestry/Dialect/Pcode/Deserialize.hpp index 2156882c..7757b8b8 100644 --- a/include/patchestry/Dialect/Pcode/Deserialize.hpp +++ b/include/patchestry/Dialect/Pcode/Deserialize.hpp @@ -15,12 +15,6 @@ namespace patchestry::pc { - struct program; - struct function; - struct basic_block; - struct instruction; - struct pcode; - using json_arr = llvm::json::Array; using json_obj = llvm::json::Object; using json_val = llvm::json::Value; @@ -36,14 +30,10 @@ namespace patchestry::pc { bld.setInsertionPointToStart(&*reg.begin()); } - void process(const program &prog); - void process_function(const function &func); - void process_block(const basic_block &block); - void process_instruction(const instruction &inst); - void process_pcode(const pcode &code); - - mlir_operation create_int_const(uint32_t offset, uint32_t size); - mlir_operation create_varnode(std::string type, uint32_t offset, uint32_t size); + void process(const json_obj &json); + void process_function(const json_obj &json); + void process_block(const json_obj &json); + void process_instruction(const json_obj &json); }; mlir::OwningOpRef< mlir::ModuleOp > deserialize(const json_obj &json, mcontext_t *mctx); diff --git a/include/patchestry/Dialect/Pcode/Json.hpp b/include/patchestry/Dialect/Pcode/Json.hpp deleted file mode 100644 index 561c5fdd..00000000 --- a/include/patchestry/Dialect/Pcode/Json.hpp +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2024, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#pragma once - -#include "llvm/Support/JSON.h" -#include -#include -#include - -namespace patchestry::pc { - - struct pcode - { - std::string mnemonic; - - struct - { - std::string type; - std::optional< int64_t > offset; - std::optional< int64_t > size; - } output; - - struct input - { - std::string type; - std::optional< int64_t > offset; - std::optional< int64_t > size; - }; - - std::vector< input > inputs; - }; - - struct instruction - { - std::string mnemonic; - std::string address; - std::vector< pcode > pcodes; - }; - - struct basic_block - { - std::string label; - std::vector< instruction > instructions; - }; - - struct function - { - std::string name; - std::vector< basic_block > basic_blocks; - }; - - struct program - { - std::string arch; - std::string os; - std::vector< function > functions; - }; - - class json_parser - { - public: - std::optional< program > parse_program(const llvm::json::Object &root); - - private: - // Function to parse Pcode - std::optional< pcode > parse_pcode(const llvm::json::Object &pcode_obj); - - // Function to parse Instructions - std::optional< instruction > parse_instruction(const llvm::json::Object &inst_obj); - - // Function to parse Basic Blocks - std::optional< basic_block > parse_basic_block(const llvm::json::Object &block_obj); - - // Function to parse Functions - std::optional< function > parse_function(const llvm::json::Object &func_obj); - }; - -} // namespace patchestry::pc diff --git a/include/patchestry/Dialect/Pcode/Pcode.hpp b/include/patchestry/Dialect/Pcode/Pcode.hpp deleted file mode 100644 index 215340c9..00000000 --- a/include/patchestry/Dialect/Pcode/Pcode.hpp +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2024, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#pragma once - -#include -#include -#include -#include -#include - -#include "PcodeDef.h" - -namespace patchestry::pc { - -enum class PCodeMnemonic { -#define X(name) name, - PCODE_MNEMONIC_LIST -#undef X - UNKNOWN -}; - -enum class PCodeVarnodeType { -#define X(name) name##_, - PCODE_VARNODE_TYPE -#undef X - UNKNOWN -}; - -template -struct PCodeStringMapper { - std::array, N> mappings; - - //Convert enum to string - constexpr std::string_view to_string(EnumType val) const { - for (const auto& [pcode, str] : mappings) { - if (pcode == val) { - return str; - } - } - return "UNKNOWN"; - } - - constexpr EnumType from_string(std::string_view s) const { - for (const auto& [val, str] : mappings) { - if (str == s) { - return val; - } - } - return EnumType::UNKNOWN; - } -}; - -// Calculate the number of mnemonics -constexpr size_t NumPCodeMnemonics = []() constexpr { - size_t count = 0; -#define X(name) ++count; - PCODE_MNEMONIC_LIST -#undef X - return count; -}(); - -constexpr size_t NumVarNodeType = []() constexpr { - size_t count = 0; -#define X(name) ++count; - PCODE_VARNODE_TYPE -#undef X - return count; -}(); - -// Instantiate the EnumStringMapper for PCodeMnemonic -constexpr PCodeStringMapper PCodeMnemonicMapper{{ -#define X(name) std::pair{PCodeMnemonic::name, #name}, - PCODE_MNEMONIC_LIST -#undef X -}}; - -constexpr PCodeStringMapper PCodeVarNodeMapper{{ -#define X(name) std::pair{PCodeVarnodeType::name##_, #name}, - PCODE_VARNODE_TYPE -#undef X -}}; - -constexpr std::string_view to_string(PCodeMnemonic mnemonic) { - return PCodeMnemonicMapper.to_string(mnemonic); -} - -constexpr PCodeMnemonic from_string(llvm::StringRef mnemonic_str) { - return PCodeMnemonicMapper.from_string(mnemonic_str); -} - -constexpr std::string_view varnode_to_string(PCodeVarnodeType ty) { - return PCodeVarNodeMapper.to_string(ty); -} - -constexpr PCodeVarnodeType varnode_from_string(llvm::StringRef ty_str) { - return PCodeVarNodeMapper.from_string(ty_str); -} - -} \ No newline at end of file diff --git a/include/patchestry/Dialect/Pcode/PcodeDef.h b/include/patchestry/Dialect/Pcode/PcodeDef.h deleted file mode 100644 index bffb863d..00000000 --- a/include/patchestry/Dialect/Pcode/PcodeDef.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2024, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - - // Definition of ghidra pcode mnemonics - - #pragma once - - #define PCODE_MNEMONIC_LIST \ - X(COPY) \ - X(LOAD) \ - X(STORE) \ - X(BRANCH) \ - X(CBRANCH) \ - X(BRANCHIND) \ - X(CALL) \ - X(CALLIND) \ - X(USERDEFINED) \ - X(RETURN) \ - X(PIECE) \ - X(SUBPIECE) \ - X(INT_EQUAL) \ - X(INT_NOTEQUAL) \ - X(INT_LESS) \ - X(INT_SLESS) \ - X(INT_LESSEQUAL) \ - X(INT_SLESSEQUAL) \ - X(INT_ZEXT) \ - X(INT_SEXT) \ - X(INT_ADD) \ - X(INT_SUB) \ - X(INT_CARRY) \ - X(INT_SCARRY) \ - X(INT_SBORROW) \ - X(INT_2COMP) \ - X(INT_NEGATE) \ - X(INT_XOR) \ - X(INT_AND) \ - X(INT_OR) \ - X(INT_LEFT) \ - X(INT_RIGHT) \ - X(INT_SRIGHT) \ - X(INT_MULT) \ - X(INT_DIV) \ - X(INT_REM) \ - X(INT_SDIV) \ - X(INT_SREM) \ - X(BOOL_NEGATE) \ - X(BOOL_OR) \ - X(FLOAT_EQUAL) \ - X(FLOAT_NOTEQUAL) \ - X(FLOAT_LESS) \ - X(FLOAT_LESSEQUAL) \ - X(FLOAT_ADD) \ - X(FLOAT_SUB) \ - X(FLOAT_MULT) \ - X(FLOAT_DIV) \ - X(FLOAT_NEG) \ - X(FLOAT_ABS) \ - X(FLOAT_SQRT) \ - X(FLOAT_CEIL) \ - X(FLOAT_FLOOR) \ - X(FLOAT_ROUND) \ - X(FLOAT_NAN) \ - X(INT2FLOAT) \ - X(FLOAT2FLOAT) \ - X(TRUNC) - -#define PCODE_VARNODE_TYPE \ - X(unique) \ - X(const) \ - X(register) \ - X(ram) \ No newline at end of file diff --git a/include/patchestry/Dialect/Pcode/PcodeOps.td b/include/patchestry/Dialect/Pcode/PcodeOps.td index 5779ac25..ae5eb468 100644 --- a/include/patchestry/Dialect/Pcode/PcodeOps.td +++ b/include/patchestry/Dialect/Pcode/PcodeOps.td @@ -34,7 +34,7 @@ def Pcode_BlockOp } def Pcode_InstOp - : Pcode_Op< "instruction", [NoTerminator] > + : Pcode_Op< "instruction" > , Arguments<( ins StrAttr:$inst_mnemonic )> { let regions = (region SizedRegion<1>:$semantics); diff --git a/include/patchestry/Ghidra/PcodeTypes.hpp b/include/patchestry/Ghidra/PcodeTypes.hpp index 848b4570..8e53af28 100644 --- a/include/patchestry/Ghidra/PcodeTypes.hpp +++ b/include/patchestry/Ghidra/PcodeTypes.hpp @@ -9,7 +9,7 @@ #include -#include "llvm/Support/JSON.h" +#include namespace patchestry::ghidra { using JsonArray = llvm::json::Array; diff --git a/lib/patchestry/AST/ASTConsumer.cpp b/lib/patchestry/AST/ASTConsumer.cpp index 1049597a..ba803b2f 100644 --- a/lib/patchestry/AST/ASTConsumer.cpp +++ b/lib/patchestry/AST/ASTConsumer.cpp @@ -64,7 +64,7 @@ namespace patchestry::ast { llvm::errs() << "Generate mlir\n"; llvm::raw_fd_ostream file_os(outfile + ".mlir", ec); - codegen->generate_source_ir(ctx, file_os); + codegen->generate_source_ir(ctx, location_map, file_os); } void PcodeASTConsumer::set_sema_context(clang::DeclContext *dc) { sema().CurContext = dc; } @@ -111,6 +111,7 @@ namespace patchestry::ast { var_decl->setDeclContext(ctx.getTranslationUnitDecl()); ctx.getTranslationUnitDecl()->addDecl(var_decl); global_variable_declarations.emplace(variable.key, var_decl); + location_map.emplace(var_decl, variable.key); } } diff --git a/lib/patchestry/AST/Codegen.cpp b/lib/patchestry/AST/Codegen.cpp index 77a74a22..d54e2fc2 100644 --- a/lib/patchestry/AST/Codegen.cpp +++ b/lib/patchestry/AST/Codegen.cpp @@ -99,39 +99,79 @@ namespace patchestry::ast { struct MetaGen final : vast::cg::meta_generator { - MetaGen(clang::ASTContext *actx, mlir::MLIRContext *mctx) : actx(actx), mctx(mctx) {} + MetaGen(clang::ASTContext *actx, mlir::MLIRContext *mctx, const LocationMap &locs) + : actx(actx), mctx(mctx), locations(locs) {} + + void *raw_pointer(const clang::Decl *decl) const { + return static_cast< void * >(const_cast< clang::Decl * >(decl)); + } + + void *raw_pointer(const clang::Stmt *stmt) const { + return static_cast< void * >(const_cast< clang::Stmt * >(stmt)); + } + + void *raw_pointer(const clang::Expr *expr) const { + return static_cast< void * >(const_cast< clang::Expr * >(expr)); + } mlir::Location location(const clang::Decl *decl) const override { - return location(decl->getLocation()); + return location(raw_pointer(decl), decl->getLocation()); } mlir::Location location(const clang::Stmt *stmt) const override { - return location(stmt->getBeginLoc()); + return location(raw_pointer(stmt), stmt->getBeginLoc()); } mlir::Location location(const clang::Expr *expr) const override { - return location(expr->getExprLoc()); + return location(raw_pointer(expr), expr->getExprLoc()); } private: - mlir::Location location(const clang::SourceLocation &loc) const { + uint64_t address_from_location(const std::string &str, char delimiter) const { + std::stringstream ss(str); + std::string token; + int count = 0; + + while (std::getline(ss, token, delimiter)) { + ++count; + if (count == 2) { + return std::stoi(token, nullptr, 16); + } + } + + return 0; + } + + mlir::Location location(void *data, const clang::SourceLocation &loc) const { (void) loc; (void) actx; - auto attr = vast::meta::IdentifierAttr::get(mctx, 0); - return mlir::FusedLoc::get({}, attr, mctx); + if (locations.contains(data)) { + const auto &location_str = locations.at(data); + mlir::StringAttr string_attr = mlir::StringAttr::get(mctx, location_str); + mlir::DictionaryAttr metadata = mlir::DictionaryAttr::get( + mctx, + { + {mlir::StringAttr::get(mctx, "pcode"), string_attr} + } + ); + return mlir::FusedLoc::get({}, metadata, mctx); + } + return mlir::UnknownLoc::get(mctx); } clang::ASTContext *actx; mlir::MLIRContext *mctx; + const LocationMap &locations; }; namespace { - std::optional< vast::owning_mlir_module_ref > - create_module(clang::ASTContext &ctx, vast::cc::action_options &opts) { + std::optional< vast::owning_mlir_module_ref > create_module( + clang::ASTContext &ctx, const LocationMap &locations, vast::cc::action_options &opts + ) { auto &mctx = kMLIR.context(); auto bld = vast::cg::mk_codegen_builder(mctx); - auto mg = std::make_shared< MetaGen >(&ctx, &mctx); + auto mg = std::make_shared< MetaGen >(&ctx, &mctx, locations); auto sg = std::make_shared< vast::cg::default_symbol_generator >(ctx.createMangleContext() ); @@ -159,8 +199,10 @@ namespace patchestry::ast { } } // namespace - void CodeGenerator::generate_source_ir(clang::ASTContext &ctx, llvm::raw_fd_ostream &os) { - auto mod = create_module(ctx, opts); + void CodeGenerator::generate_source_ir( + clang::ASTContext &ctx, const LocationMap &locations, llvm::raw_fd_ostream &os + ) { + auto mod = create_module(ctx, locations, opts); auto flags = mlir::OpPrintingFlags(); flags.enableDebugInfo(true, false); (*mod)->print(os, flags); diff --git a/lib/patchestry/AST/FunctionBuilder.cpp b/lib/patchestry/AST/FunctionBuilder.cpp index 766278f0..db5a5742 100644 --- a/lib/patchestry/AST/FunctionBuilder.cpp +++ b/lib/patchestry/AST/FunctionBuilder.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -20,7 +21,6 @@ #include #include #include -#include namespace patchestry::ast { @@ -91,6 +91,7 @@ namespace patchestry::ast { if (auto *function_decl = create_declaration(ci.getASTContext())) { prev_decl = function_decl; function_list.get().emplace(function.key, prev_decl); + location_map.get().emplace(prev_decl, function.key); } } } @@ -175,6 +176,7 @@ namespace patchestry::ast { ); parameter_vec.push_back(param_decl); local_variables.emplace(param_op->key, param_decl); + location_map.get().emplace(param_decl, param_op->key); } func_decl->setParams(parameter_vec); @@ -294,6 +296,15 @@ namespace patchestry::ast { return parameter_vec; } + /** + * @brief Creates a `clang::FunctionDecl` representing the definition of a function, + * including its body. + * + * @param ctx The `clang::ASTContext` used to create AST nodes. + * @return A pointer to the created `clang::FunctionDecl` representing the function + * definition. Returns `nullptr` if the function name is empty, the function has no basic + * blocks, or if the function definition cannot be created. + */ clang::FunctionDecl *FunctionBuilder::create_definition(clang::ASTContext &ctx) { if (function.get().name.empty() || function.get().basic_blocks.empty()) { LOG(ERROR) << "Can't create function definition. Missing function name or has no " @@ -328,6 +339,12 @@ namespace patchestry::ast { return function_def; } + /** + * @brief Creates and registers label declarations for basic blocks in a function. + * + * @param ctx The Clang ASTContext used to create label declarations. + * @param func_decl The Clang function declaration to which the labels belong. + */ void FunctionBuilder::create_labels(clang::ASTContext &ctx, clang::FunctionDecl *func_decl) { if (function.get().basic_blocks.empty()) { @@ -357,9 +374,24 @@ namespace patchestry::ast { } labels_declaration.emplace(key, label_decl); + location_map.get().emplace(label_decl, key); } } + /** + * @brief Creates the body of a function in the form of a vector of Clang statements. + * + * This method constructs the Abstract Syntax Tree (AST) for a function body based on + * its basic blocks. It processes the function's entry block first, then processes + * remaining basic blocks, assigning labels to each block and appending the generated + * statements in sequence. + * + * @param ctx The ASTContext object used for managing AST nodes. + * @param func_decl The function declaration associated with the function body. + * + * @return A vector of Clang statements representing the function body. + * Returns an empty vector if the function has no basic blocks. + */ std::vector< clang::Stmt * > FunctionBuilder::create_function_body( clang::ASTContext &ctx, clang::FunctionDecl *func_decl ) { @@ -406,6 +438,20 @@ namespace patchestry::ast { return stmt_vec; } + /** + * @brief Generates a vector of `clang::Stmt*` representing the operations in a basic block. + * + * This function iterates over the `ordered_operations` of a `BasicBlock` object to create + * corresponding `clang::Stmt` objects. Each statement is created using the + * `create_operation` method and added to the vector `stmt_vec` unless the operation is + * flagged to merge with the next. + * + * @param ctx The Clang ASTContext used for creating statements. + * @param block The BasicBlock containing the operations to be processed. + * + * @return A vector of `clang::Stmt*` representing the operations in the basic block. + * + */ std::vector< clang::Stmt * > FunctionBuilder::create_basic_block(clang::ASTContext &ctx, const BasicBlock &block) { if (block.ordered_operations.empty()) { @@ -424,6 +470,7 @@ namespace patchestry::ast { const auto &operation = block.operations.at(operation_key); if (auto [stmt, should_merge_to_next] = create_operation(ctx, operation); stmt) { operation_stmts.emplace(operation.key, stmt); + location_map.get().emplace(stmt, operation.key); if (!should_merge_to_next) { stmt_vec.push_back(stmt); } @@ -433,6 +480,18 @@ namespace patchestry::ast { return stmt_vec; } + /** + * @brief Creates a Clang AST representation of a given operation based on its mnemonic. + * + * This function takes an operation and generates the corresponding Clang AST node, + * which represents the operation in the target AST. + * + * @param ctx The Clang ASTContext for creating AST nodes. + * @param op The operation to be translated into a Clang AST node. + * @return A pair consisting of: + * - A pointer to the generated Clang Stmt representing the operation. + * - A boolean indicating if stmt should be merged with previous one. + */ std::pair< clang::Stmt *, bool > FunctionBuilder::create_operation(clang::ASTContext &ctx, const Operation &op) { if (op.mnemonic == Mnemonic::OP_UNKNOWN) { diff --git a/lib/patchestry/AST/OperationBuilder.cpp b/lib/patchestry/AST/OperationBuilder.cpp index 7a16964f..9a45e204 100644 --- a/lib/patchestry/AST/OperationBuilder.cpp +++ b/lib/patchestry/AST/OperationBuilder.cpp @@ -64,26 +64,37 @@ namespace patchestry::ast { } clang::Stmt *OpBuilder::create_varnode( - clang::ASTContext &ctx, const Function &function, const Varnode &vnode + clang::ASTContext &ctx, const Function &function, const Varnode &vnode, + const std::string &op_key ) { - switch (vnode.kind) { - case Varnode::VARNODE_UNKNOWN: - break; - case Varnode::VARNODE_GLOBAL: - return create_global(ctx, vnode); - case Varnode::VARNODE_PARAM: - return create_parameter(ctx, vnode); - case Varnode::VARNODE_FUNCTION: - return create_function(ctx, vnode); - case Varnode::VARNODE_LOCAL: - return create_local(ctx, function, vnode); - case Varnode::VARNODE_TEMPORARY: - return create_temporary(ctx, function, vnode); - case Varnode::VARNODE_CONSTANT: - return create_constant(ctx, vnode); + auto varnode_operation = [&](clang::ASTContext &ctx, const Function &function, + const Varnode &vnode) -> clang::Stmt * { + switch (vnode.kind) { + case Varnode::VARNODE_UNKNOWN: + break; + case Varnode::VARNODE_GLOBAL: + return create_global(ctx, vnode); + case Varnode::VARNODE_PARAM: + return create_parameter(ctx, vnode); + case Varnode::VARNODE_FUNCTION: + return create_function(ctx, vnode); + case Varnode::VARNODE_LOCAL: + return create_local(ctx, function, vnode); + case Varnode::VARNODE_TEMPORARY: + return create_temporary(ctx, function, vnode); + case Varnode::VARNODE_CONSTANT: + return create_constant(ctx, vnode); + } + + return nullptr; + }; + + if (auto *expr = varnode_operation(ctx, function, vnode)) { + function_builder().set_location_key(expr, op_key); + return expr; } - return nullptr; + return {}; } clang::Stmt *OpBuilder::create_parameter(clang::ASTContext &ctx, const Varnode &vnode) { @@ -142,6 +153,9 @@ namespace patchestry::ast { if (auto maybe_operation = operation_from_key(function, vnode.operation.value())) { auto [stmt, _] = function_builder().create_operation(ctx, *maybe_operation); + if (stmt != nullptr) { + function_builder().location_map.get().emplace(stmt, maybe_operation->key); + } return stmt; } @@ -157,10 +171,12 @@ namespace patchestry::ast { if (function_builder().function_list.get().contains(*vnode.function)) { auto *function_decl = function_builder().function_list.get().at(*vnode.function); - return clang::DeclRefExpr::Create( + auto *function_ref = clang::DeclRefExpr::Create( ctx, clang::NestedNameSpecifierLoc(), clang::SourceLocation(), function_decl, false, clang::SourceLocation(), function_decl->getType(), clang::VK_PRValue ); + function_builder().set_location_key(function_ref, *vnode.function); + return function_ref; } return {}; diff --git a/lib/patchestry/AST/OperationStmt.cpp b/lib/patchestry/AST/OperationStmt.cpp index 84011dd9..310af342 100644 --- a/lib/patchestry/AST/OperationStmt.cpp +++ b/lib/patchestry/AST/OperationStmt.cpp @@ -116,7 +116,7 @@ namespace patchestry::ast { clang::Stmt *OpBuilder::create_assign_operation( clang::ASTContext &ctx, clang::Expr *input_expr, clang::Expr *output_expr, - clang::SourceLocation location + const std::string &location_key ) { if ((input_expr == nullptr) || (output_expr == nullptr)) { return {}; @@ -127,9 +127,14 @@ namespace patchestry::ast { // Handle exact type match: no cast required if (ctx.hasSameUnqualifiedType(input_type, output_type)) { - auto assign_operation = - sema().CreateBuiltinBinOp(location, clang::BO_Assign, output_expr, input_expr); + auto assign_operation = sema().CreateBuiltinBinOp( + source_location_from_key(ctx, location_key), clang::BO_Assign, output_expr, + input_expr + ); assert(!assign_operation.isInvalid()); + function_builder().set_location_key( + assign_operation.getAs< clang::Expr >(), location_key + ); return assign_operation.getAs< clang::Stmt >(); } @@ -141,9 +146,13 @@ namespace patchestry::ast { assert(!casted_expr.isInvalid()); auto assign_operation = sema().CreateBuiltinBinOp( - location, clang::BO_Assign, output_expr, casted_expr.getAs< clang::Expr >() + source_location_from_key(ctx, location_key), clang::BO_Assign, output_expr, + casted_expr.getAs< clang::Expr >() ); assert(!assign_operation.isInvalid()); + function_builder().set_location_key( + assign_operation.getAs< clang::Expr >(), location_key + ); return assign_operation.getAs< clang::Stmt >(); } @@ -158,10 +167,13 @@ namespace patchestry::ast { if (!implicit_cast.isInvalid()) { auto assign_operation = sema().CreateBuiltinBinOp( - location, clang::BO_Assign, output_expr, + source_location_from_key(ctx, location_key), clang::BO_Assign, output_expr, implicit_cast.getAs< clang::Expr >() ); assert(!assign_operation.isInvalid()); + function_builder().set_location_key( + assign_operation.getAs< clang::Expr >(), location_key + ); return assign_operation.getAs< clang::Stmt >(); } } @@ -175,11 +187,13 @@ namespace patchestry::ast { ); if (!implicit_cast.isInvalid()) { auto assign_operation = sema().CreateBuiltinBinOp( - location, clang::BO_Assign, output_expr, + source_location_from_key(ctx, location_key), clang::BO_Assign, output_expr, implicit_cast.getAs< clang::Expr >() ); assert(!assign_operation.isInvalid()); - + function_builder().set_location_key( + assign_operation.getAs< clang::Expr >(), location_key + ); return assign_operation.getAs< clang::Stmt >(); } } @@ -188,6 +202,7 @@ namespace patchestry::ast { auto addr_of_expr = sema().CreateBuiltinUnaryOp(clang::SourceLocation(), clang::UO_AddrOf, input_expr); assert(!addr_of_expr.isInvalid()); + function_builder().set_location_key(addr_of_expr.getAs< clang::Expr >(), location_key); auto to_pointer_type = ctx.getPointerType(output_expr->getType()); auto casted_expr = sema().BuildCStyleCastExpr( @@ -195,16 +210,22 @@ namespace patchestry::ast { clang::SourceLocation(), addr_of_expr.getAs< clang::Expr >() ); assert(!casted_expr.isInvalid()); + function_builder().set_location_key(casted_expr.getAs< clang::Expr >(), location_key); auto derefed_expr = sema().CreateBuiltinUnaryOp( clang::SourceLocation(), clang::UO_Deref, casted_expr.getAs< clang::Expr >() ); assert(!derefed_expr.isInvalid()); + function_builder().set_location_key(derefed_expr.getAs< clang::Expr >(), location_key); auto assign_operation = sema().CreateBuiltinBinOp( - location, clang::BO_Assign, output_expr, derefed_expr.getAs< clang::Expr >() + source_location_from_key(ctx, location_key), clang::BO_Assign, output_expr, + derefed_expr.getAs< clang::Expr >() ); assert(!assign_operation.isInvalid()); + function_builder().set_location_key( + assign_operation.getAs< clang::Expr >(), location_key + ); return assign_operation.getAs< clang::Stmt >(); } @@ -217,8 +238,9 @@ namespace patchestry::ast { return { nullptr, false }; } - auto *input_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs.front())); + auto *input_expr = clang::dyn_cast< clang::Expr >( + create_varnode(ctx, function, op.inputs.front(), op.key) + ); if (input_expr == nullptr) { LOG(ERROR) << "Failed to create input expression for copy operaion. key: " << op.key << "\n"; @@ -232,17 +254,14 @@ namespace patchestry::ast { } auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); if (output_expr == nullptr) { LOG(ERROR) << "Failed to create output expression for copy operaion. key: " << op.key << "\n"; return { nullptr, false }; } - return { create_assign_operation( - ctx, input_expr, output_expr, source_location_from_key(ctx, op.key) - ), - false }; + return { create_assign_operation(ctx, input_expr, output_expr, op.key), false }; } std::pair< clang::Stmt *, bool > OpBuilder::create_load( @@ -255,7 +274,7 @@ namespace patchestry::ast { auto merge_to_next = !op.output.has_value(); auto *input_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); if (input_expr == nullptr) { LOG(ERROR) << "Skipping, load operation with invalid expression. key: " << op.key << "\n"; @@ -273,12 +292,9 @@ namespace patchestry::ast { auto *result_expr = derefed_expr.getAs< clang::Expr >(); auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); - return { create_assign_operation( - ctx, result_expr, output_expr, source_location_from_key(ctx, op.key) - ), - false }; + return { create_assign_operation(ctx, result_expr, output_expr, op.key), false }; } std::pair< clang::Stmt *, bool > OpBuilder::create_store( @@ -290,11 +306,13 @@ namespace patchestry::ast { } if (op.inputs.size() == 2) { - auto *lhs_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + auto *lhs_expr = clang::dyn_cast< clang::Expr >( + create_varnode(ctx, function, op.inputs[0], op.key) + ); - auto *rhs_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1])); + auto *rhs_expr = clang::dyn_cast< clang::Expr >( + create_varnode(ctx, function, op.inputs[1], op.key) + ); auto deref_result = sema().CreateBuiltinUnaryOp( clang::SourceLocation(), clang::UO_Deref, @@ -303,8 +321,7 @@ namespace patchestry::ast { assert(!deref_result.isInvalid()); return { create_assign_operation( - ctx, rhs_expr, deref_result.getAs< clang::Expr >(), - source_location_from_key(ctx, op.key) + ctx, rhs_expr, deref_result.getAs< clang::Expr >(), op.key ), false }; } @@ -335,14 +352,18 @@ namespace patchestry::ast { return {}; } - return std::make_pair( - new (ctx) clang::GotoStmt( - function_builder().labels_declaration.at(*op.target_block), - source_location_from_key(ctx, op.key), - source_location_from_key(ctx, *op.target_block) - ), - false + auto *expr = new (ctx) clang::GotoStmt( + function_builder().labels_declaration.at(*op.target_block), + source_location_from_key(ctx, op.key), + source_location_from_key(ctx, *op.target_block) ); + if (expr == nullptr) { + LOG(ERROR) << "Failed to create goto statement. key " << op.key << "\n"; + return {}; + } + + function_builder().set_location_key(expr, op.key); + return { expr, false }; } std::pair< clang::Stmt *, bool > OpBuilder::create_cbranch( @@ -357,7 +378,8 @@ namespace patchestry::ast { // TODO(kumarak): Could there be case where conditional statement is missing?? In // such case treat it as branch instruction. auto *condition_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.condition)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.condition, op.key) + ); clang::Stmt *taken_stmt = nullptr; clang::Stmt *not_taken_stmt = nullptr; @@ -386,6 +408,10 @@ namespace patchestry::ast { not_taken_stmt = new (ctx) clang::NullStmt(clang::SourceLocation(), false); } + function_builder().set_location_key(condition_expr, op.key); + function_builder().set_location_key(taken_stmt, op.key); + function_builder().set_location_key(not_taken_stmt, op.key); + return std::make_pair( clang::IfStmt::Create( ctx, clang::SourceLocation(), clang::IfStatementKind::Ordinary, nullptr, @@ -422,7 +448,7 @@ namespace patchestry::ast { std::vector< clang::Expr * > arguments; for (const auto &input : op.inputs) { auto *arg_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, input)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, input, op.key)); arguments.push_back(clang::dyn_cast< clang::Expr >(arg_expr)); } @@ -435,6 +461,7 @@ namespace patchestry::ast { function_builder().function_list.get().at(*op.target->function); call_expr = create_function_call(ctx, call_target, arguments); + function_builder().set_location_key(call_expr, op.key); if (!op.output || call_target->getReturnType()->isVoidType()) { return std::make_pair(clang::dyn_cast< clang::Expr >(call_expr), false); } @@ -446,25 +473,30 @@ namespace patchestry::ast { nullptr, clang::dyn_cast< clang::Expr >(stmt), clang::SourceLocation(), arguments, clang::SourceLocation() ); + assert(!result.isInvalid()); call_expr = result.getAs< clang::Expr >(); + function_builder().set_location_key(call_expr, op.key); if (!operation->output || call_expr->getType()->isVoidType()) { return std::make_pair(clang::dyn_cast< clang::Expr >(call_expr), false); } } auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); auto rty_type = type_builder().get_serialized_types().at(*op.type); auto casted_result = sema().ImpCastExprToType(call_expr, rty_type, clang::CastKind::CK_BitCast); + assert(!casted_result.isInvalid()); + function_builder().set_location_key(casted_result.getAs< clang::Expr >(), op.key); auto result = sema().CreateBuiltinBinOp( source_location_from_key(ctx, op.key), clang::BO_Assign, output_expr, casted_result.getAs< clang::Expr >() ); assert(!result.isInvalid()); + function_builder().set_location_key(result.getAs< clang::Expr >(), op.key); return { result.getAs< clang::Expr >(), false }; } @@ -488,7 +520,7 @@ namespace patchestry::ast { ) { if (!op.inputs.empty()) { auto varnode = op.inputs.size() == 1 ? op.inputs.front() : op.inputs.at(1); - auto *ret_expr = create_varnode(ctx, function, varnode); + auto *ret_expr = create_varnode(ctx, function, varnode, op.key); return std::make_pair( clang::ReturnStmt::Create( ctx, clang::SourceLocation(), llvm::dyn_cast< clang::Expr >(ret_expr), @@ -518,9 +550,9 @@ namespace patchestry::ast { } auto *input0_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); auto *input1_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1], op.key)); // TODO(kumarak): It should be the size of input1 field in bits; At the moment consider // it as 4 bytes but should be fixed. @@ -552,11 +584,10 @@ namespace patchestry::ast { return std::make_pair(or_result.getAs< clang::Expr >(), merge_to_next); } - auto *output_expr = create_varnode(ctx, function, *op.output); + auto *output_expr = create_varnode(ctx, function, *op.output, op.key); return { create_assign_operation( ctx, or_result.getAs< clang::Expr >(), - clang::dyn_cast< clang::Expr >(output_expr), - source_location_from_key(ctx, op.key) + clang::dyn_cast< clang::Expr >(output_expr), op.key ), false }; } @@ -580,10 +611,10 @@ namespace patchestry::ast { const auto &op_type = type_builder().get_serialized_types().at(*op.type); auto *shift_value = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1], op.key)); auto *expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); if (!ctx.hasSameUnqualifiedType(expr->getType(), op_type)) { if (auto *casted_expr = perform_explicit_cast(ctx, expr, op_type)) { @@ -625,16 +656,18 @@ namespace patchestry::ast { assert(false); return std::make_pair(nullptr, false); } + function_builder().set_location_key(result.getAs< clang::Expr >(), op.key); auto *result_expr = new (ctx) clang::ParenExpr( clang::SourceLocation(), clang::SourceLocation(), result.getAs< clang::Expr >() ); + function_builder().set_location_key(result_expr, op.key); if (merge_to_next) { return std::make_pair(result_expr, merge_to_next); } - auto *out_expr = create_varnode(ctx, function, *op.output); + auto *out_expr = create_varnode(ctx, function, *op.output, op.key); auto out_result = sema().CreateBuiltinBinOp( source_location_from_key(ctx, op.key), clang::BO_Assign, clang::dyn_cast< clang::Expr >(out_expr), result_expr @@ -659,7 +692,7 @@ namespace patchestry::ast { auto merge_to_next = !op.output.has_value(); auto *input_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); auto target_type = type_builder().get_serialized_types().at(*op.type); @@ -678,18 +711,16 @@ namespace patchestry::ast { } else { input_expr = implicit_result.getAs< clang::Expr >(); } + function_builder().set_location_key(input_expr, op.key); if (merge_to_next) { return { input_expr, merge_to_next }; } auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); - return { create_assign_operation( - ctx, input_expr, output_expr, source_location_from_key(ctx, op.key) - ), - false }; + return { create_assign_operation(ctx, input_expr, output_expr, op.key), false }; } std::pair< clang::Stmt *, bool > OpBuilder::create_int_sext( @@ -703,7 +734,7 @@ namespace patchestry::ast { auto merge_to_next = !op.output.has_value(); auto *input_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); auto target_type = type_builder().get_serialized_types().at(*op.type); @@ -723,17 +754,16 @@ namespace patchestry::ast { input_expr = implicit_result.getAs< clang::Expr >(); } + function_builder().set_location_key(input_expr, op.key); + if (merge_to_next) { return { input_expr, merge_to_next }; } auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); - return { create_assign_operation( - ctx, input_expr, output_expr, source_location_from_key(ctx, op.key) - ), - false }; + return { create_assign_operation(ctx, input_expr, output_expr, op.key), false }; } std::pair< clang::Stmt *, bool > OpBuilder::create_int_carry( @@ -776,23 +806,23 @@ namespace patchestry::ast { } auto *input_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); // TODO(kumarak): Should check the operation type before creating unary operation??? auto unary_operation = sema().CreateBuiltinUnaryOp(clang::SourceLocation(), kind, input_expr); assert(!unary_operation.isInvalid()); + function_builder().set_location_key(unary_operation.getAs< clang::Expr >(), op.key); if (!op.output.has_value()) { return { unary_operation.getAs< clang::Stmt >(), true }; } auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); return { create_assign_operation( - ctx, unary_operation.getAs< clang::Expr >(), output_expr, - source_location_from_key(ctx, op.key) + ctx, unary_operation.getAs< clang::Expr >(), output_expr, op.key ), false }; } @@ -807,29 +837,30 @@ namespace patchestry::ast { return {}; } - auto *lhs = clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + auto *lhs = + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); - auto *rhs = clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1])); + auto *rhs = + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1], op.key)); auto result = sema().CreateBuiltinBinOp( source_location_from_key(ctx, op.key), kind, clang::dyn_cast< clang::Expr >(lhs), clang::dyn_cast< clang::Expr >(rhs) ); - assert(!result.isInvalid() && "Invalid result from binary operation"); + function_builder().set_location_key(result.getAs< clang::Expr >(), op.key); if (!op.output) { return std::make_pair(result.getAs< clang::Stmt >(), true); } auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); - return { create_assign_operation( - ctx, result.getAs< clang::Expr >(), output_expr, - source_location_from_key(ctx, op.key) - ), - false }; + return { + create_assign_operation(ctx, result.getAs< clang::Expr >(), output_expr, op.key), + false + }; } std::pair< clang::Stmt *, bool > OpBuilder::create_float_abs( @@ -907,7 +938,7 @@ namespace patchestry::ast { const auto &op_type = type_builder().get_serialized_types().at(*op.type); auto *input_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); auto implicit_cast_result = sema().PerformImplicitConversion(input_expr, op_type, clang::Sema::AA_Converting); @@ -920,12 +951,9 @@ namespace patchestry::ast { } auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); - return { create_assign_operation( - ctx, implicit_cast, output_expr, source_location_from_key(ctx, op.key) - ), - false }; + return { create_assign_operation(ctx, implicit_cast, output_expr, op.key), false }; } std::pair< clang::Stmt *, bool > OpBuilder::create_ptrsub( @@ -946,7 +974,7 @@ namespace patchestry::ast { const auto &op_type = type_builder().get_serialized_types().at(*op.type); auto *input_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); auto implicit_cast = sema().PerformImplicitConversion(input_expr, op_type, clang::Sema::AA_Converting); @@ -955,7 +983,7 @@ namespace patchestry::ast { auto *ptr_expr = implicit_cast.getAs< clang::Expr >(); auto *byte_offset = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1], op.key)); auto add_result = sema().CreateBuiltinBinOp( source_location_from_key(ctx, op.key), clang::BO_Add, ptr_expr, byte_offset @@ -968,12 +996,9 @@ namespace patchestry::ast { } auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); - return { create_assign_operation( - ctx, ptr_add_expr, output_expr, source_location_from_key(ctx, op.key) - ), - false }; + return { create_assign_operation(ctx, ptr_add_expr, output_expr, op.key), false }; } std::pair< clang::Stmt *, bool > OpBuilder::create_ptradd( @@ -988,11 +1013,11 @@ namespace patchestry::ast { auto merge_to_next = !op.output.has_value(); auto *base = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); auto *index = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[1], op.key)); auto *scale = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[2])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[2], op.key)); auto mult_result = sema().CreateBuiltinBinOp(clang::SourceLocation(), clang::BO_Mul, index, scale); @@ -1007,7 +1032,7 @@ namespace patchestry::ast { return std::make_pair(add_result.getAs< clang::Stmt >(), merge_to_next); } - auto *output_stmt = create_varnode(ctx, function, *op.output); + auto *output_stmt = create_varnode(ctx, function, *op.output, op.key); if (output_stmt->getStmtClass() == clang::Stmt::DeclStmtClass) { auto *decl = clang::dyn_cast< clang::DeclStmt >(output_stmt)->getSingleDecl(); auto *ref_expr = clang::DeclRefExpr::Create( @@ -1017,16 +1042,14 @@ namespace patchestry::ast { ); return { create_assign_operation( - ctx, add_result.getAs< clang::Expr >(), ref_expr, - source_location_from_key(ctx, op.key) + ctx, add_result.getAs< clang::Expr >(), ref_expr, op.key ), false }; } auto *output_expr = clang::dyn_cast< clang::Expr >(output_stmt); return { create_assign_operation( - ctx, add_result.getAs< clang::Expr >(), output_expr, - source_location_from_key(ctx, op.key) + ctx, add_result.getAs< clang::Expr >(), output_expr, op.key ), false }; } @@ -1048,7 +1071,7 @@ namespace patchestry::ast { const auto &op_type = type_builder().get_serialized_types().at(*op.type); auto *input_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0])); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); if (!op.output && ctx.hasSameUnqualifiedType(op_type, input_expr->getType())) { return { input_expr, true }; @@ -1067,12 +1090,9 @@ namespace patchestry::ast { auto *casted_expr = casted_result.getAs< clang::Expr >(); auto *output_expr = - clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output)); + clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, *op.output, op.key)); - return { create_assign_operation( - ctx, casted_expr, output_expr, source_location_from_key(ctx, op.key) - ), - false }; + return { create_assign_operation(ctx, casted_expr, output_expr, op.key), false }; } std::pair< clang::Stmt *, bool > @@ -1095,6 +1115,7 @@ namespace patchestry::ast { // add variable declaration to list for future references function_builder().local_variables.emplace(op.key, var_decl); + function_builder().location_map.get().emplace(var_decl, op.key); return std::make_pair(create_decl_stmt(ctx, var_decl), false); } diff --git a/lib/patchestry/AST/TypeBuilder.cpp b/lib/patchestry/AST/TypeBuilder.cpp index 88bff984..30b9cf26 100644 --- a/lib/patchestry/AST/TypeBuilder.cpp +++ b/lib/patchestry/AST/TypeBuilder.cpp @@ -8,8 +8,8 @@ #include #include - #include + #include #include #include diff --git a/lib/patchestry/Dialect/Pcode/Deserialize.cpp b/lib/patchestry/Dialect/Pcode/Deserialize.cpp index 4a497bc3..6f868976 100644 --- a/lib/patchestry/Dialect/Pcode/Deserialize.cpp +++ b/lib/patchestry/Dialect/Pcode/Deserialize.cpp @@ -1,286 +1,72 @@ /* * Copyright (c) 2024, Trail of Bits, Inc. + * All rights reserved. * * This source code is licensed in accordance with the terms specified in * the LICENSE file found in the root directory of this source tree. */ -#include "patchestry/Util/Common.hpp" -#include -#include - #include -#include -#include #include -#include namespace patchestry::pc { - mlir_value - create_bitcast_op(mlir_builder &bld, mlir_value &input_val, mlir_type &output_type) { - return bld.create< mlir::arith::BitcastOp >( - bld.getUnknownLoc(), output_type, input_val - ); - } - - mlir_value - create_truc_op(mlir_builder &bld, mlir_value &input_val, mlir_type &output_type) { - return bld.create< mlir::arith::TruncIOp >(bld.getUnknownLoc(), output_type, input_val); - } - - std::optional< program > json_parser::parse_program(const llvm::json::Object &root) { - program program; - program.arch = root.getString("arch").value_or(""); - program.os = root.getString("os").value_or(""); - - if (const auto *function_array = root.getArray("functions")) { - for (const auto &function : *function_array) { - if (const auto *func_obj = function.getAsObject()) { - if (auto parsed_func = parse_function(*func_obj)) { - program.functions.push_back(*parsed_func); - } - } - } - } - - return program; - } - - std::optional< pcode > json_parser::parse_pcode(const llvm::json::Object &pcode_obj) { - pcode pcode; - pcode.mnemonic = pcode_obj.getString("mnemonic").value_or(""); - - if (const auto *output_obj = pcode_obj.getObject("output")) { - pcode.output.type = output_obj->getString("type").value_or(""); - pcode.output.offset = output_obj->getInteger("offset"); - pcode.output.size = output_obj->getInteger("size"); - } - - if (const auto *inputs_array = pcode_obj.getArray("inputs")) { - for (const auto &input : *inputs_array) { - if (const auto *input_obj = input.getAsObject()) { - pcode::input input; - input.type = input_obj->getString("type").value_or(""); - input.offset = input_obj->getInteger("offset"); - input.size = input_obj->getInteger("size"); - pcode.inputs.push_back(input); - } - } - } - - return pcode; - } - - std::optional< instruction > - json_parser::parse_instruction(const llvm::json::Object &inst_obj) { - instruction inst; - inst.mnemonic = inst_obj.getString("mnemonic").value_or(""); - inst.address = inst_obj.getString("address").value_or(""); - - if (const auto *pcode_array = inst_obj.getArray("pcode")) { - for (const auto &pcode : *pcode_array) { - if (const auto *pcode_obj = pcode.getAsObject()) { - if (auto parsed_pcode = parse_pcode(*pcode_obj)) { - inst.pcodes.push_back(*parsed_pcode); - } - } - } - } - - return inst; - } - - std::optional< basic_block > - json_parser::parse_basic_block(const llvm::json::Object &block_obj) { - basic_block block; - block.label = block_obj.getString("label").value_or(""); - - if (const auto *instructions_array = block_obj.getArray("instructions")) { - for (const auto &instruction : *instructions_array) { - if (const auto *inst_obj = instruction.getAsObject()) { - if (const auto parsed_inst = parse_instruction(*inst_obj)) { - block.instructions.push_back(*parsed_inst); - } - } - } - } - - return block; - } - - std::optional< function > json_parser::parse_function(const llvm::json::Object &func_obj) { - function func; - func.name = func_obj.getString("name").value_or(""); - - if (const auto *blocks_array = func_obj.getArray("basic_blocks")) { - for (const auto &block : *blocks_array) { - if (const auto *block_obj = block.getAsObject()) { - if (auto parsed_block = parse_basic_block(*block_obj)) { - func.basic_blocks.push_back(*parsed_block); - } - } - } - } - - return func; - } - mlir::OwningOpRef< mlir::ModuleOp > deserialize(const json_obj &json, mcontext_t *mctx) { // FIXME: use implicit module creation auto loc = mlir::UnknownLoc::get(mctx); auto mod = mlir::OwningOpRef< mlir::ModuleOp >(mlir::ModuleOp::create(loc)); deserializer des(mod.get()); - auto program = json_parser().parse_program(json); - if (program.has_value()) { - des.process(program.value()); - } else { - mlir::emitError(loc, "Failed to parse JSON object."); - } + des.process(json); return mod; } - mlir_operation deserializer::create_int_const(uint32_t offset, uint32_t size) { - auto const_type = mlir::IntegerType::get(bld.getContext(), size * 8); - auto const_attr = mlir::IntegerAttr::get(const_type, offset); - return bld.create< ConstOp >(bld.getUnknownLoc(), const_attr); - } - - mlir_operation - deserializer::create_varnode(std::string type, uint32_t offset, uint32_t size) { - auto varnode_type = varnode_from_string(type); - switch (varnode_type) { - case PCodeVarnodeType::unique_: { - auto mlir_type = bld.getType< VarType >(); - return bld.create< VarOp >(bld.getUnknownLoc(), mlir_type, type, offset, size); - } - case PCodeVarnodeType::const_: { - return bld.create< ConstOp >( - bld.getUnknownLoc(), - mlir::IntegerAttr::get( - mlir::IntegerType::get(bld.getContext(), size * 8), offset - ) - ); - } - case PCodeVarnodeType::register_: { - auto mlir_type = bld.getType< RegType >(); - auto int_type = bld.getI32Type(); - return bld.create< RegOp >(bld.getUnknownLoc(), int_type, type, offset, size); - } - case PCodeVarnodeType::ram_: { - auto mlir_type = bld.getType< MemType >(); - return bld.create< RegOp >(bld.getUnknownLoc(), mlir_type, type, offset, size); - } - default: - break; - } - return {}; - } - - void deserializer::process(const program &prog) { - if (prog.functions.empty()) { - mlir::emitError(bld.getUnknownLoc(), "No function to process!"); - return; - } - - for (const auto &func : prog.functions) { - process_function(func); - } + void deserializer::process(const json_obj &json) { + // FIXME: implement multi-function support + process_function(json); } - void deserializer::process_function(const function &func) { - if (func.name.empty()) { - mlir::emitError(bld.getUnknownLoc(), "Function name is missing."); + void deserializer::process_function(const json_obj &json) { + if (!json.getString("name")) { + mlir::emitError(bld.getUnknownLoc(), "Function JSON missing 'name' field."); return; } auto _ = insertion_guard(bld); - auto fn = bld.create< pc::FuncOp >(bld.getUnknownLoc(), func.name); + auto fn = bld.create< pc::FuncOp >(bld.getUnknownLoc(), json.getString("name").value()); bld.setInsertionPointToStart(bld.createBlock(&fn.getBlocks())); - for (const auto &block : func.basic_blocks) { - process_block(block); + if (auto blocks = json.getArray("basic_blocks")) { + for (const auto &block : *blocks) { + process_block(*block.getAsObject()); + } } } - void deserializer::process_block(const basic_block &block) { - if (block.label.empty()) { - mlir::emitError(bld.getUnknownLoc(), "Basic block is missing label name."); + void deserializer::process_block(const json_obj &json) { + if (!json.getString("label")) { + mlir::emitError(bld.getUnknownLoc(), "Block JSON missing 'label' field."); return; } - auto _ = insertion_guard(bld); - auto mlir_block = bld.create< pc::BlockOp >(bld.getUnknownLoc(), block.label); + auto _ = insertion_guard(bld); + auto block = + bld.create< pc::BlockOp >(bld.getUnknownLoc(), json.getString("label").value()); - bld.createBlock(&mlir_block.getInstructions()); - if (block.instructions.empty()) { - mlir::emitError(bld.getUnknownLoc(), "Block does not have instruction."); - return; - } + bld.createBlock(&block.getInstructions()); - for (const auto &inst : block.instructions) { - process_instruction(inst); - } - } - - void deserializer::process_instruction(const instruction &inst) { - if (inst.mnemonic.empty()) { - mlir::emitError(bld.getUnknownLoc(), "Instruction mnemonic is missing."); + const auto *insts = json.getArray("instructions"); + if (insts == nullptr) { + mlir::emitError(bld.getUnknownLoc(), "Block JSON missing 'instructions' field."); return; } - auto _ = insertion_guard(bld); - auto block = bld.create< pc::InstOp >(bld.getUnknownLoc(), inst.mnemonic); - - bld.createBlock(&block.getSemantics()); - if (inst.pcodes.empty()) { - mlir::emitError(bld.getUnknownLoc(), "Instruction has no pcode"); - return; - } - - for (const auto &pcode : inst.pcodes) { - process_pcode(pcode); + for (const auto &inst : *insts) { + process_instruction(*inst.getAsObject()); } } - void deserializer::process_pcode(const pcode &code) { - if (code.mnemonic.empty()) { - mlir::emitError(bld.getUnknownLoc(), "Pcode mnemonic is missing."); - return; - } - - switch (from_string(code.mnemonic)) { - case PCodeMnemonic::COPY: { - const auto &output = code.output; - const auto &input0 = code.inputs.front(); - - auto *output_op = - create_varnode(output.type, output.offset.value(), output.size.value()); - auto *input_op = - create_varnode(input0.type, input0.offset.value(), input0.size.value()); - - mlir::Type var_type = bld.getI32Type(); - mlir::Value var_result = - bld.create< VarOp >(bld.getUnknownLoc(), var_type, "input", 8, 8) - .getResult(); - bld.create< CopyOp >(bld.getUnknownLoc(), bld.getI32Type(), var_result); - break; - } - case PCodeMnemonic::LOAD: { - break; - } - case PCodeMnemonic::RETURN: { - const auto &input0 = code.inputs.front(); - auto *input_op = - create_varnode(input0.type, input0.offset.value(), input0.size.value()); - bld.create< ReturnOp >(bld.getUnknownLoc(), input_op->getResult(0)); - break; - } - default: - break; - } - } + void deserializer::process_instruction(const json_obj &json) {} } // namespace patchestry::pc diff --git a/lib/patchestry/Ghidra/PcodeTranslation.cpp b/lib/patchestry/Ghidra/PcodeTranslation.cpp index ab42848b..b5ca3845 100644 --- a/lib/patchestry/Ghidra/PcodeTranslation.cpp +++ b/lib/patchestry/Ghidra/PcodeTranslation.cpp @@ -7,8 +7,8 @@ #include -#include #include +#include #include @@ -19,19 +19,16 @@ #include #include -#include - namespace patchestry::ghidra { - static mlir::OwningOpRef< mlir_operation > deserialize( - const llvm::MemoryBuffer *buffer, mcontext_t *mctx - ) { + static mlir::OwningOpRef< mlir_operation > + deserialize(const llvm::MemoryBuffer *buffer, mcontext_t *mctx) { mctx->loadAllAvailableDialects(); - std::cout << buffer->getBuffer().str() << std::endl; auto json = llvm::json::parse(buffer->getBuffer()); if (!json) { - mlir::emitError(mlir::UnknownLoc::get(mctx), "failed to parse PCode JSON: ") << toString(json.takeError()); + mlir::emitError(mlir::UnknownLoc::get(mctx), "failed to parse PCode JSON: ") + << toString(json.takeError()); } return pc::deserialize(*json->getAsObject(), mctx); } @@ -39,11 +36,11 @@ namespace patchestry::ghidra { void register_pcode_translation() { mlir::TranslateToMLIRRegistration( "deserialize-pcode", "translate Ghidra Pcode JSON into Patchestry's Pcode dialect", - [] (llvm::SourceMgr &smgr, mcontext_t *mctx) { + [](llvm::SourceMgr &smgr, mcontext_t *mctx) { assert(smgr.getNumBuffers() == 1 && "expected one buffer"); return deserialize(smgr.getMemoryBuffer(smgr.getMainFileID()), mctx); }, - [] (mlir::DialectRegistry ®istry) { + [](mlir::DialectRegistry ®istry) { registry.insert< patchestry::pc::PcodeDialect >(); } ); diff --git a/tools/pcode-lifter/main.cpp b/tools/pcode-lifter/main.cpp index 6574275d..c0d98549 100644 --- a/tools/pcode-lifter/main.cpp +++ b/tools/pcode-lifter/main.cpp @@ -9,18 +9,15 @@ #include #include -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/TargetInfo.h" -#include "clang/Basic/TargetOptions.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/FrontendOptions.h" -#include "clang/Lex/PreprocessorOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/VirtualFileSystem.h" #include #include +#include +#include +#include +#include +#include +#include +#include #include #include #include