diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index d193eed..87b7e4c 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -8,7 +8,6 @@ jobs: matrix: config: - {name: "Linux", os: ubuntu-latest, cmake-generator: ""} - - {name: "Windows", os: windows-latest, cmake-generator: "-G \"MinGW Makefiles\""} runs-on: ${{ matrix.config.os }} name: ${{ matrix.config.name }} @@ -17,10 +16,15 @@ jobs: - name: Requirements shell: bash run: | + pip install pillow cd .. - git clone https://github.com/tensorflow/tensorflow.git + git clone https://github.com/tensorflow/tflite-micro.git tensorflow cd tensorflow - make -f tensorflow/lite/micro/tools/make/Makefile hello_world + git checkout f474248365ad48654ba8a27ac5bf49a6afbb80e7 + git apply ../tflite_micro_compiler/patches/tflite-micro.patch + git apply ../tflite_micro_compiler/patches/tflite-micro-makefile.patch + make -Bf tensorflow/lite/micro/tools/make/Makefile NO_INTERPRETER=TRUE microlite -j2 + make -Bf tensorflow/lite/micro/tools/make/Makefile microlite -j2 - name: Requirements (Windows) if: matrix.config.os == 'windows-latest' run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index aec255e..5f72a87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,9 +21,9 @@ IF(NOT GET_TF_SRC) ${TFLMD_SRC}/ruy ) IF(WIN32) - SET(TF_LIB ${TFLM_SRC}/tools/make/gen/windows_x86_64/lib/libtensorflow-microlite.a) + SET(TF_LIB ${TFLM_SRC}/tools/make/gen/windows_x86_64_default/lib/libtensorflow-microlite.a) ELSE() - SET(TF_LIB ${TFLM_SRC}/tools/make/gen/linux_x86_64/lib/libtensorflow-microlite.a) + SET(TF_LIB ${TFLM_SRC}/tools/make/gen/linux_x86_64_default/lib/libtensorflow-microlite.a) ENDIF() ELSE() MESSAGE(FATAL_ERROR "\ @@ -45,7 +45,6 @@ SET(COMPILER_HEADERS ${PROJECT_SOURCE_DIR}/src/Compiler.h ${PROJECT_SOURCE_DIR}/src/CustomOperators.h ${PROJECT_SOURCE_DIR}/src/MemMap.h - ${PROJECT_SOURCE_DIR}/src/RecordAllocations.h ${PROJECT_SOURCE_DIR}/src/TypeToString.h ) @@ -54,7 +53,6 @@ SET(COMPILER_SRCS ${PROJECT_SOURCE_DIR}/src/Compiler.cc ${PROJECT_SOURCE_DIR}/src/CustomOperators.cc ${PROJECT_SOURCE_DIR}/src/MemMap.cc - ${PROJECT_SOURCE_DIR}/src/RecordAllocations.cc ${PROJECT_SOURCE_DIR}/src/TypeToString.cc ${PROJECT_SOURCE_DIR}/src/main.cc ) diff --git a/examples/generic_test/CMakeLists.txt b/examples/generic_test/CMakeLists.txt index 254e7d2..3159cbb 100644 --- a/examples/generic_test/CMakeLists.txt +++ b/examples/generic_test/CMakeLists.txt @@ -19,13 +19,18 @@ TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PUBLIC ) IF(WIN32) - TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/windows_x86_64/lib) + TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/windows_x86_64_default/lib) ELSE() - TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/linux_x86_64/lib) + TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/linux_x86_64_default/lib) ENDIF() -TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC tensorflow-microlite) +TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC tensorflow-microlite-no-interpreter) TARGET_COMPILE_DEFINITIONS(${PROJECT_NAME} PUBLIC TF_LITE_STATIC_MEMORY TF_LITE_DISABLE_X86_NEON + NO_INTERPRETER +) + +TARGET_COMPILE_FEATURES(${PROJECT_NAME} PUBLIC + cxx_std_14 ) diff --git a/patches/tflite-micro-makefile.patch b/patches/tflite-micro-makefile.patch new file mode 100644 index 0000000..1d6fcc2 --- /dev/null +++ b/patches/tflite-micro-makefile.patch @@ -0,0 +1,57 @@ +diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile +index e25b3bc..09ce481 100644 +--- a/tensorflow/lite/micro/tools/make/Makefile ++++ b/tensorflow/lite/micro/tools/make/Makefile +@@ -150,6 +150,10 @@ COMMON_FLAGS := \ + $(CC_WARNINGS) \ + $(ADDITIONAL_DEFINES) + ++ifeq ($(NO_INTERPRETER),TRUE) ++ COMMON_FLAGS += -DNO_INTERPRETER ++endif ++ + ifeq ($(TARGET), $(HOST_OS)) + # If we are not doing a cross-compilation then -DTF_LITE_USE_CTIME is what we + # want to have by default. +@@ -233,7 +237,11 @@ endif + + # This library is the main target for this makefile. It will contain a minimal + # runtime that can be linked in to other programs. +-MICROLITE_LIB_NAME := libtensorflow-microlite.a ++ifeq ($(NO_INTERPRETER),TRUE) ++ MICROLITE_LIB_NAME := libtensorflow-microlite-no-interpreter.a ++else ++ MICROLITE_LIB_NAME := libtensorflow-microlite.a ++endif + + # Where compiled objects are stored. + GENDIR := $(MAKEFILE_DIR)/gen/$(TARGET)_$(TARGET_ARCH)_$(BUILD_TYPE)/ +@@ -469,6 +477,28 @@ THIRD_PARTY_KERNEL_CC_SRCS := + # Load custom kernels. + include $(MAKEFILE_DIR)/additional_kernels.inc + ++ifeq ($(NO_INTERPRETER),TRUE) ++ MICRO_LITE_NO_INTERPRETER_SRCS := \ ++ tensorflow/lite/micro/fake_micro_context.cc \ ++ tensorflow/lite/micro/micro_context.cc \ ++ tensorflow/lite/micro/micro_allocator.cc \ ++ tensorflow/lite/micro/micro_graph.cc \ ++ tensorflow/lite/micro/micro_interpreter.cc ++ ++ MICRO_LITE_UNSUPPORTED_KERNELS_SRCS := \ ++ tensorflow/lite/micro/kernels/assign_variable.cc \ ++ tensorflow/lite/micro/kernels/call_once.cc \ ++ tensorflow/lite/micro/kernels/if.cc \ ++ tensorflow/lite/micro/kernels/kernel_runner.cc \ ++ tensorflow/lite/micro/kernels/read_variable.cc \ ++ tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc \ ++ tensorflow/lite/micro/kernels/while.cc \ ++ tensorflow/lite/micro/kernels/var_handle.cc ++ ++ MICROLITE_CC_KERNEL_SRCS := $(filter-out $(MICRO_LITE_UNSUPPORTED_KERNELS_SRCS), $(MICROLITE_CC_KERNEL_SRCS)) ++ MICROLITE_CC_BASE_SRCS := $(filter-out $(MICRO_LITE_NO_INTERPRETER_SRCS), $(MICROLITE_CC_BASE_SRCS)) ++endif ++ + MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_TEST_SRCS), $(MICROLITE_CC_BASE_SRCS)) + MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_BENCHMARK_SRCS), $(MICROLITE_CC_SRCS)) + diff --git a/patches/tflite-micro.patch b/patches/tflite-micro.patch new file mode 100644 index 0000000..0058678 --- /dev/null +++ b/patches/tflite-micro.patch @@ -0,0 +1,52 @@ +diff --git a/tensorflow/lite/micro/micro_context.h b/tensorflow/lite/micro/micro_context.h +index e7be654..2693ff2 100644 +--- a/tensorflow/lite/micro/micro_context.h ++++ b/tensorflow/lite/micro/micro_context.h +@@ -20,6 +20,22 @@ limitations under the License. + #include "tensorflow/lite/micro/micro_allocator.h" + #include "tensorflow/lite/micro/micro_graph.h" + ++#ifdef NO_INTERPRETER ++ ++namespace tflite { ++ struct MicroContext{ ++ TfLiteTensor* (*AllocateTempInputTensor)(const TfLiteNode* node, int index); ++ TfLiteTensor* (*AllocateTempOutputTensor)(const TfLiteNode* node, int index); ++ void (*DeallocateTempTfLiteTensor)(TfLiteTensor* tensor); ++ void* (*external_context)(); ++ }; ++ static inline MicroContext* GetMicroContext(const struct TfLiteContext* context){ ++ return reinterpret_cast(context->impl_); ++ } ++} ++ ++#else ++ + namespace tflite { + // MicroContext is eventually going to become the API between TFLM and the + // kernels, replacing all the functions in TfLiteContext. The end state is code +@@ -158,4 +174,6 @@ void MicroContextReportOpError(struct TfLiteContext* context, + + } // namespace tflite + ++#endif // NO_INTERPRETER ++ + #endif // TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_ +diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h +index ae7fc8f..5c2fd6e 100644 +--- a/tensorflow/lite/micro/micro_interpreter.h ++++ b/tensorflow/lite/micro/micro_interpreter.h +@@ -140,6 +140,13 @@ class MicroInterpreter { + // arena_used_bytes() + 16. + size_t arena_used_bytes() const { return allocator_.used_bytes(); } + ++ size_t operators_size() const { return model_->subgraphs()->Get(0)->operators()->size(); } ++ ++ // For debugging only. ++ const NodeAndRegistration node_and_registration(int node_index) { ++ return graph_.GetAllocations()[0].node_and_registrations[node_index]; ++ } ++ + protected: + const MicroAllocator& allocator() const { return allocator_; } + const TfLiteContext& context() const { return context_; } diff --git a/src/CodeWriter.cc b/src/CodeWriter.cc index 564c0b1..1a84665 100644 --- a/src/CodeWriter.cc +++ b/src/CodeWriter.cc @@ -114,11 +114,25 @@ void tflmc::CodeWriter::writeBuiltin(tflite::BuiltinOperator op, TfLiteAddParams const* p = (TfLiteAddParams const*)data; out_ << to_string(p->activation) << " };"; } break; + case tflite::BuiltinOperator_MEAN: { + out_ << "TfLiteReducerParams " << name << " = { "; + TfLiteReducerParams const* p = (TfLiteReducerParams const*)data; + out_ << p->keep_dims << " };"; + } break; case tflite::BuiltinOperator_MUL: { out_ << "TfLiteMulParams " << name << " = { "; TfLiteMulParams const* p = (TfLiteMulParams const*)data; out_ << to_string(p->activation) << " };"; } break; + case tflite::BuiltinOperator_PACK: { + out_ << "TfLitePackParams " << name << " = { "; + TfLitePackParams const* p = (TfLitePackParams const*)data; + out_ << p->values_count << ", " << p->axis << " };"; + } break; + case tflite::BuiltinOperator_SHAPE: { + out_ << "TfLiteShapeParams " << name << " = { " + << " };"; + } break; case tflite::BuiltinOperator_SUB: { out_ << "TfLiteSubParams " << name << " = { "; TfLiteSubParams const* p = (TfLiteSubParams const*)data; @@ -130,6 +144,19 @@ void tflmc::CodeWriter::writeBuiltin(tflite::BuiltinOperator op, (TfLiteConcatenationParams const*)data; out_ << p->axis << ", " << to_string(p->activation) << " };"; } break; + case tflite::BuiltinOperator_STRIDED_SLICE: { + out_ << "TfLiteStridedSliceParams " << name << " = { "; + TfLiteStridedSliceParams const* p = (TfLiteStridedSliceParams const*)data; + out_ << p->begin_mask << ", " << p->end_mask << ", " << p->ellipsis_mask + << ", " << p->new_axis_mask << ", " << p->shrink_axis_mask << " };"; + } break; + case tflite::BuiltinOperator_TRANSPOSE_CONV: { + out_ << "TfLiteTransposeConvParams " << name << " = { "; + TfLiteTransposeConvParams const* p = + (TfLiteTransposeConvParams const*)data; + out_ << to_string(p->padding) << ", " << p->stride_width << ", " + << p->stride_height << " };"; + } break; default: { size_t datalen = GetBuiltinDataSize(op, subgraph_); uint32_t alignment = datalen >= 4 ? 4 : datalen >= 2 ? 2 : 1; @@ -224,8 +251,7 @@ static void dump_tensor_contents(std::ostream& out_, const TfLiteTensor& t, int outer_dim = t.dims->data[0]; int middle_dim = t.dims->data[t.dims->size - 2]; int inner_dim = t.dims->data[t.dims->size - 1]; - for (int i = 1; i < t.dims->size - 2; ++i) - outer_dim *= t.dims->data[i]; + for (int i = 1; i < t.dims->size - 2; ++i) outer_dim *= t.dims->data[i]; for (int i = 0; i < outer_dim; ++i) { // out_ << "\n "; // uint32_t outer_index = inner_dim * middle_dim; diff --git a/src/CodeWriter.h b/src/CodeWriter.h index 18a659b..e8c69b7 100644 --- a/src/CodeWriter.h +++ b/src/CodeWriter.h @@ -4,7 +4,6 @@ #include #include "tensorflow/lite/micro/micro_interpreter.h" -#include "tensorflow/lite/version.h" namespace tflmc { diff --git a/src/Compiler.cc b/src/Compiler.cc index 7e5ffeb..bfdf0b4 100644 --- a/src/Compiler.cc +++ b/src/Compiler.cc @@ -1,19 +1,17 @@ - #include "Compiler.h" -#include #include +#include #include +#include #include #include "CodeWriter.h" #include "CustomOperators.h" -#include "RecordAllocations.h" #include "TypeToString.h" -#include "tensorflow/lite/version.h" #ifndef SUFFICIENT_ARENA_SIZE -#define SUFFICIENT_ARENA_SIZE (128*1024*1024) +#define SUFFICIENT_ARENA_SIZE (128 * 1024 * 1024) #endif #if TF_LITE_PACKED_QUANTIZED_DATA_VERSION @@ -22,6 +20,133 @@ #endif #endif +static std::vector g_loggedAllocations; +static int g_currentNodeIndex = -1; +static uint8_t *g_arenaPtr = nullptr; +static ptrdiff_t g_arena_size = 0; + +static void *LoggingAllocatePersistentBuffer(struct TfLiteContext *ctx, + size_t bytes) { + void *ptr = tflite::GetMicroContext(ctx)->AllocatePersistentBuffer(bytes); + assert(ptr != nullptr && "Alloc failure"); + g_loggedAllocations.push_back({-(g_arenaPtr - (uint8_t *)ptr + g_arena_size), + bytes, g_currentNodeIndex}); + return ptr; +} + +TfLiteStatus tflmc::AllocateTensors( + std::unique_ptr &interpreter) { + tflite::SubgraphAllocations *allocations = + interpreter->allocator_.StartModelAllocation(interpreter->model_); + + if (allocations == nullptr) { + TF_LITE_REPORT_ERROR(interpreter->error_reporter_, + "Failed starting model allocation.\n"); + interpreter->initialization_status_ = kTfLiteError; + return kTfLiteError; + } + + interpreter->graph_.SetSubgraphAllocations(allocations); + + TF_LITE_ENSURE_STATUS( + interpreter->PrepareNodeAndRegistrationDataFromFlatbuffer()); + + // Only allow AllocatePersistentBuffer in Init stage. + interpreter->context_.AllocatePersistentBuffer = + &LoggingAllocatePersistentBuffer; + interpreter->context_.RequestScratchBufferInArena = nullptr; + interpreter->context_.GetScratchBuffer = nullptr; + interpreter->context_.GetExternalContext = nullptr; + TF_LITE_ENSURE_STATUS(interpreter->graph_.InitSubgraphs()); + + // Both AllocatePersistentBuffer and RequestScratchBufferInArena is + // available in Prepare stage. + interpreter->context_.RequestScratchBufferInArena = + tflite::MicroContextRequestScratchBufferInArena; + // external_context become available in Prepare stage. + interpreter->context_.GetExternalContext = + tflite::MicroContextGetExternalContext; + + TF_LITE_ENSURE_STATUS(interpreter->graph_.PrepareSubgraphs()); + + // Prepare is done, we're ready for Invoke. Memory allocation is no longer + // allowed. Kernels can only fetch scratch buffers via GetScratchBuffer. + interpreter->context_.AllocatePersistentBuffer = nullptr; + interpreter->context_.RequestScratchBufferInArena = nullptr; + interpreter->context_.GetScratchBuffer = tflite::MicroContextGetScratchBuffer; + + TF_LITE_ENSURE_OK( + &interpreter->context_, + interpreter->allocator_.FinishModelAllocation( + interpreter->model_, interpreter->graph_.GetAllocations(), + &interpreter->scratch_buffer_handles_)); + + interpreter->micro_context_.SetScratchBufferHandles( + interpreter->scratch_buffer_handles_); + + // TODO(b/162311891): Drop these allocations when the interpreter supports + // handling buffers from TfLiteEvalTensor. + interpreter->input_tensors_ = reinterpret_cast( + interpreter->allocator_.AllocatePersistentBuffer( + sizeof(TfLiteTensor *) * interpreter->inputs_size())); + if (interpreter->input_tensors_ == nullptr) { + TF_LITE_REPORT_ERROR( + interpreter->error_reporter_, + "Failed to allocate memory for context->input_tensors_, " + "%d bytes required", + sizeof(TfLiteTensor *) * interpreter->inputs_size()); + return kTfLiteError; + } + + for (size_t i = 0; i < interpreter->inputs_size(); ++i) { + interpreter->input_tensors_[i] = + interpreter->allocator_.AllocatePersistentTfLiteTensor( + interpreter->model_, interpreter->graph_.GetAllocations(), + interpreter->inputs().Get(i), 0); + if (interpreter->input_tensors_[i] == nullptr) { + TF_LITE_REPORT_ERROR(interpreter->error_reporter_, + "Failed to initialize input tensor %d", i); + return kTfLiteError; + } + } + + // TODO(b/162311891): Drop these allocations when the interpreter supports + // handling buffers from TfLiteEvalTensor. + interpreter->output_tensors_ = reinterpret_cast( + interpreter->allocator_.AllocatePersistentBuffer( + sizeof(TfLiteTensor *) * interpreter->outputs_size())); + if (interpreter->output_tensors_ == nullptr) { + TF_LITE_REPORT_ERROR( + interpreter->error_reporter_, + "Failed to allocate memory for context->output_tensors_, " + "%d bytes required", + sizeof(TfLiteTensor *) * interpreter->outputs_size()); + return kTfLiteError; + } + + for (size_t i = 0; i < interpreter->outputs_size(); ++i) { + interpreter->output_tensors_[i] = + interpreter->allocator_.AllocatePersistentTfLiteTensor( + interpreter->model_, interpreter->graph_.GetAllocations(), + interpreter->outputs().Get(i), 0); + if (interpreter->output_tensors_[i] == nullptr) { + TF_LITE_REPORT_ERROR(interpreter->error_reporter_, + "Failed to initialize output tensor %d", i); + return kTfLiteError; + } + } + + TF_LITE_ENSURE_STATUS(interpreter->ResetVariableTensors()); + + interpreter->tensors_allocated_ = true; + return kTfLiteOk; +} + +TfLiteTensor *tflmc::GetTensor(tflite::MicroInterpreter *interpreter, int i) { + auto ctx = &interpreter->context_; + return ctx->GetTensor(ctx, i); +} + bool tflmc::CompileFile(const std::string &modelFileName, const std::string &outFileName, const std::string &prefix) { @@ -108,13 +233,19 @@ bool tflmc::Compiler::init(const void *modelData) { // Build an interpreter to run the model with. arena_buf_.resize(SUFFICIENT_ARENA_SIZE); + + g_arena_size = SUFFICIENT_ARENA_SIZE; + std::vector arena_buf(g_arena_size); + g_arenaPtr = arena_buf_.data(); + interpreter_ = std::unique_ptr( - new tflite::MicroInterpreter( - model_, resolver_, arena_buf_.data(), arena_buf_.size(), - µErrReporter_)); + new tflite::MicroInterpreter(model_, resolver_, arena_buf_.data(), + arena_buf_.size(), µErrReporter_)); + + assert(interpreter_->graph_.NumSubgraphs() == 1); // Allocate memory from the tensor_arena for the model's tensors. - TfLiteStatus allocate_status = interpreter_->AllocateTensors(); + TfLiteStatus allocate_status = AllocateTensors(interpreter_); if (allocate_status != kTfLiteOk) { errReporter().Report("AllocateTensors() failed"); return false; @@ -155,6 +286,21 @@ bool tflmc::Compiler::init(const void *modelData) { } } + for (size_t k = 0; k < interpreter_->allocator_.scratch_buffer_request_count_; + k++) { + void *data = interpreter_->micro_context_.GetScratchBuffer(k); + ptrdiff_t offset = (uint8_t *)data - arena_buf_.data(); + tflite::internal::ScratchBufferRequest *requests = + interpreter_->allocator_.GetScratchBufferRequests(); + int bytes = requests[k].bytes; + ptrdiff_t highSize = offset + bytes; + ramTensorBufferSize = std::max(ramTensorBufferSize, highSize); + memMap_.recordRAM(offset, bytes, + "Scratch_idx" + std::to_string((int)k) + "_op" + + std::to_string((int)requests[k].node_idx)); + scratchBufferOffsets.push_back(offset); + } + for (size_t i = 0; i < interpreter_->operators_size(); i++) { auto nodeAndReg = interpreter_->node_and_registration(i); auto node = &nodeAndReg.node; @@ -168,6 +314,9 @@ bool tflmc::Compiler::init(const void *modelData) { regInfo.code = code; if (code == tflite::BuiltinOperator_CUSTOM) { regInfo.custom_name = reg->custom_name; + if (regInfo.custom_name == "TFLite_Detection_PostProcess") { + has_tflite_custom_ops = true; + } has_custom_ops = true; } auto itOp = @@ -180,13 +329,12 @@ bool tflmc::Compiler::init(const void *modelData) { nodes_.push_back(NodeInfo{*node, itOp - registrations_.begin()}); } - auto runtimeAllocations = tflmc::RecordAllocations(model_, SUFFICIENT_ARENA_SIZE); ptrdiff_t minRuntimeOffset = 0; // These are negative so zero start is fine. - for (const auto &alloc : runtimeAllocations) { + for (const auto &alloc : g_loggedAllocations) { minRuntimeOffset = std::min(minRuntimeOffset, alloc.offset); } size_t totalRuntimeAllocSize = 0; - for (const auto &alloc : runtimeAllocations) { + for (const auto &alloc : g_loggedAllocations) { // TODO: This drops the alignment between buffers. Is this fine? totalRuntimeAllocSize += alloc.len; ptrdiff_t offset = alloc.offset - minRuntimeOffset + ramTensorBufferSize; @@ -220,9 +368,15 @@ void tflmc::Compiler::writeSource(std::ostream &out) { CodeWriter wr(out, subgraph_); wr << R"( + #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/kernels/conv.h" +#include "tensorflow/lite/micro/kernels/fully_connected.h" #include "tensorflow/lite/micro/kernels/micro_ops.h" +#include "tensorflow/lite/micro/kernels/reduce.h" +#include "tensorflow/lite/micro/kernels/softmax.h" +#include "tensorflow/lite/micro/micro_context.h" #if defined __GNUC__ #define ALIGN(X) __attribute__((aligned(X))) @@ -240,8 +394,9 @@ namespace ops { namespace micro { )"; for (size_t i = 0; i < registrations_.size(); i++) { - if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM) { - wr << "extern TfLiteRegistration Register_" + if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM && + registrations_[i].custom_name != "TFLite_Detection_PostProcess") { + wr << "extern TfLiteRegistration *Register_" << registrations_[i].custom_name << "(void);\n"; } } @@ -249,6 +404,20 @@ namespace micro { } // namespace ops } // namespace tflite +)"; + } + if (has_tflite_custom_ops) { + wr << R"(namespace tflite { +)"; + for (size_t i = 0; i < registrations_.size(); i++) { + if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM && + registrations_[i].custom_name == "TFLite_Detection_PostProcess") { + wr << "extern TfLiteRegistration " + "*Register_DETECTION_POSTPROCESS(void);\n"; + } + } + wr << R"(} // namespace tflite + )"; } wr << R"(namespace { @@ -316,7 +485,8 @@ TfLiteNode tflNodes[)" wr.writeIntArray(*t->dims, "tensor_dimension" + std::to_string(i)); wr.writeQuantization(t->quantization, "quant" + std::to_string(i)); #if TF_LITE_PACKED_QUANTIZED_DATA_VERSION - wr.writeQuantizationDetails(t->quantization, "quant_details" + std::to_string(i)); + wr.writeQuantizationDetails(t->quantization, + "quant_details" + std::to_string(i)); #endif } for (size_t i = 0; i < nodes_.size(); i++) { @@ -409,6 +579,21 @@ TfLiteNode tflNodes[)" // TODO: This code assumes that persistent allocations are made from the end // (which is true for the current implementation) wr << R"( + +// Scratch buffer variables +int scratch_buffer_idx = 0; +const int scratch_buffer_offsets[)" + << scratchBufferOffsets.size() << R"(] = { )"; + if (scratchBufferOffsets.size() > 0) { + wr << scratchBufferOffsets[0]; + for (int i = 1; i < scratchBufferOffsets.size(); i++) { + wr << ", " << scratchBufferOffsets[i]; + } + } + wr << R"( }; +tflite::MicroContext mc; + +// Functions to be used as function pointers for TfLiteContext and MicroContext static void* AllocatePersistentBuffer(struct TfLiteContext* ctx, size_t bytes) { static uint8_t *AllocPtr = tensor_arena + sizeof(tensor_arena); @@ -421,12 +606,51 @@ static TfLiteEvalTensor *GetEvalTensor(const struct TfLiteContext *context, int tensor_idx) { return &evalTensors[tensor_idx]; } + +static TfLiteStatus RequestScratchBufferInArena(struct TfLiteContext *context, size_t bytes, + int *buffer_idx) { + *buffer_idx = scratch_buffer_idx++; + return kTfLiteOk; +}; + +static void *GetScratchBuffer(struct TfLiteContext *context, + int buffer_idx) { + return tensor_arena + scratch_buffer_offsets[buffer_idx]; +} + +static TfLiteTensor* AllocateTempInputTensor(const TfLiteNode* node, int index) { + return &ctx.tensors[node->inputs->data[index]]; +} + +static TfLiteTensor* AllocateTempOutputTensor(const TfLiteNode* node, int index) { + return &ctx.tensors[node->outputs->data[index]]; +} + +static void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) { +} + +static void* external_context() { + return nullptr; +} + } // namespace TfLiteStatus )" << prefix_ << R"(init() { + // Setup microcontext functions + mc.AllocateTempInputTensor = &AllocateTempInputTensor; + mc.AllocateTempOutputTensor = &AllocateTempOutputTensor; + mc.DeallocateTempTfLiteTensor = &DeallocateTempTfLiteTensor; + mc.external_context = &external_context; + + // Setup tflitecontext functions ctx.AllocatePersistentBuffer = &AllocatePersistentBuffer; ctx.GetEvalTensor = &GetEvalTensor; + ctx.RequestScratchBufferInArena = &RequestScratchBufferInArena; + ctx.GetScratchBuffer = &GetScratchBuffer; + + // Set microcontext as the context ptr + ctx.impl_ = (void*)&mc; ctx.tensors = tflTensors; )"; wr << " ctx.tensors_size = " << tensors_.size() << ";\n"; @@ -474,11 +698,39 @@ TfLiteStatus )" std::string opName; if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM) { opName = registrations_[i].custom_name; + if (opName == "TFLite_Detection_PostProcess") { + wr << " registrations[OP_" << opName + << "] = *(tflite::Register_DETECTION_POSTPROCESS());\n"; + } + } else if ((registrations_[i].code == tflite::BuiltinOperator_ADD) || + (registrations_[i].code == + tflite::BuiltinOperator_AVERAGE_POOL_2D) || + (registrations_[i].code == tflite::BuiltinOperator_CONV_2D) || + (registrations_[i].code == + tflite::BuiltinOperator_DEPTHWISE_CONV_2D) || + (registrations_[i].code == tflite::BuiltinOperator_DEQUANTIZE) || + (registrations_[i].code == + tflite::BuiltinOperator_FULLY_CONNECTED) || + (registrations_[i].code == tflite::BuiltinOperator_LOGISTIC) || + (registrations_[i].code == + tflite::BuiltinOperator_MAX_POOL_2D) || + (registrations_[i].code == tflite::BuiltinOperator_MEAN) || + (registrations_[i].code == tflite::BuiltinOperator_MUL) || + (registrations_[i].code == tflite::BuiltinOperator_PRELU) || + (registrations_[i].code == tflite::BuiltinOperator_QUANTIZE) || + (registrations_[i].code == tflite::BuiltinOperator_RELU) || + (registrations_[i].code == tflite::BuiltinOperator_SHAPE) || + (registrations_[i].code == tflite::BuiltinOperator_SOFTMAX) || + (registrations_[i].code == + tflite::BuiltinOperator_TRANSPOSE_CONV)) { + opName = tflite::EnumNameBuiltinOperator(registrations_[i].code); + wr << " registrations[OP_" << opName << "] = tflite::Register_" << opName + << "();\n"; } else { opName = tflite::EnumNameBuiltinOperator(registrations_[i].code); + wr << " registrations[OP_" << opName + << "] = tflite::ops::micro::Register_" << opName << "();\n"; } - wr << " registrations[OP_" << opName << "] = tflite::ops::micro::Register_" - << opName << "();\n"; } wr << "\n"; wr << " for(size_t i = 0; i < " << nodes_.size() << R"(; ++i) { diff --git a/src/Compiler.h b/src/Compiler.h index 3a69b27..6e5a21d 100644 --- a/src/Compiler.h +++ b/src/Compiler.h @@ -1,16 +1,26 @@ #ifndef TFLMCOMPILER_COMPILER_H #define TFLMCOMPILER_COMPILER_H -#include - #include "MemMap.h" #include "tensorflow/lite/micro/all_ops_resolver.h" #include "tensorflow/lite/micro/micro_error_reporter.h" +#define private public #include "tensorflow/lite/micro/micro_interpreter.h" +#undef private #include "tensorflow/lite/schema/schema_generated.h" namespace tflmc { +struct Allocation { + ptrdiff_t offset; + size_t len; + int nodeIndex; +}; + +TfLiteStatus AllocateTensors( + std::unique_ptr &interpreter); +TfLiteTensor *GetTensor(tflite::MicroInterpreter *interpreter, int i); + bool CompileFile(const std::string &modelFileName, const std::string &outFileName, const std::string &prefix = "model_"); @@ -33,9 +43,7 @@ class Compiler { private: struct TensorInfo { - TensorInfo(const TfLiteTensor *tensor_ptr) : - tensor(tensor_ptr) - {} + TensorInfo(const TfLiteTensor *tensor_ptr) : tensor(tensor_ptr) {} const TfLiteTensor *tensor = nullptr; }; struct RegistrationInfo { @@ -52,10 +60,8 @@ class Compiler { }; struct NodeInfo { NodeInfo() {} - NodeInfo(TfLiteNode tfl_node, ptrdiff_t reg_index) : - node(tfl_node), - regIndex(reg_index) - {} + NodeInfo(TfLiteNode tfl_node, ptrdiff_t reg_index) + : node(tfl_node), regIndex(reg_index) {} TfLiteNode node; ptrdiff_t regIndex = -1; }; @@ -89,8 +95,10 @@ class Compiler { std::vector nodes_; std::vector inputTensorIndices_; std::vector outputTensorIndices_; + std::vector scratchBufferOffsets; bool has_custom_ops = false; + bool has_tflite_custom_ops = false; bool has_quantization = false; Option common_tensor_type; Option common_tensor_is_variable; diff --git a/src/MemMap.h b/src/MemMap.h index 55295b1..61258de 100644 --- a/src/MemMap.h +++ b/src/MemMap.h @@ -1,9 +1,9 @@ #ifndef TFLMCOMPILER_MEMMAP_H #define TFLMCOMPILER_MEMMAP_H +#include #include #include -#include namespace tflmc { diff --git a/src/RecordAllocations.cc b/src/RecordAllocations.cc deleted file mode 100644 index dc7b3ce..0000000 --- a/src/RecordAllocations.cc +++ /dev/null @@ -1,92 +0,0 @@ -#include -#define private public -#include "tensorflow/lite/micro/micro_interpreter.h" -#undef private - -#include "CustomOperators.h" -#include "RecordAllocations.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -static std::vector g_loggedAllocations; -static tflite::MicroAllocator *g_allocator; -static int g_currentNodeIndex = -1; -static uint8_t *g_arenaPtr = nullptr; - -static ptrdiff_t g_arena_size = 0; - -static void* LoggingAllocatePersistentBuffer(struct TfLiteContext *ctx, - size_t bytes) { - void* ptr = g_allocator->AllocatePersistentBuffer(bytes); - assert(ptr!=nullptr && "Alloc failure"); - g_loggedAllocations.push_back( - {-(g_arenaPtr - (uint8_t *)ptr + g_arena_size), bytes, - g_currentNodeIndex}); - return ptr; -} -static TfLiteStatus LoggingRequestScratchBufferInArena(TfLiteContext *ctx, - size_t bytes, - int *buffer_idx) { - assert(false && "Not handling scratch buffers currently"); - return g_allocator->RequestScratchBufferInArena(g_currentNodeIndex, bytes, - buffer_idx); -} - -std::vector tflmc::RecordAllocations( - const tflite::Model *model, ptrdiff_t arena_size) { - g_arena_size = arena_size; - std::vector arena_buf(g_arena_size); - g_arenaPtr = arena_buf.data(); - - tflite::MicroErrorReporter error_reporter; - tflite::AllOpsResolver resolver; - tflmc::custom_operator_handle custom = tflmc::LoadCustom(&resolver); - tflite::MicroInterpreter interpreter(model, resolver, arena_buf.data(), - g_arena_size, &error_reporter); - - auto ctx = &interpreter.context_; - auto allocator = &interpreter.allocator_; - - tflite::NodeAndRegistration *nodeAndRegs; - TfLiteEvalTensor *eval_tensors=nullptr; - allocator->StartModelAllocation(model, resolver, &nodeAndRegs, &eval_tensors); - allocator->FinishModelAllocation(model, eval_tensors); - - g_allocator = allocator; - ctx->AllocatePersistentBuffer = &LoggingAllocatePersistentBuffer; - ctx->RequestScratchBufferInArena = nullptr; - ctx->GetScratchBuffer = nullptr; - - auto subgraph = model->subgraphs()->Get(0); - for (size_t i = 0; i < subgraph->operators()->size(); i++) { - auto node = &nodeAndRegs[i].node; - auto reg = nodeAndRegs[i].registration; - if (reg->init) { - g_currentNodeIndex = i; - node->user_data = reg->init(ctx, (const char *)node->builtin_data, 0); - } - } - - ctx->RequestScratchBufferInArena = &LoggingRequestScratchBufferInArena; - - for (size_t i = 0; i < subgraph->operators()->size(); i++) { - auto node = &nodeAndRegs[i].node; - auto reg = nodeAndRegs[i].registration; - if (reg->prepare) { - g_currentNodeIndex = i; - reg->prepare(ctx, node); - } - } - tflmc::UnloadCustom(custom); - return g_loggedAllocations; -} - -TfLiteEvalTensor *tflmc::GetEvalTensor(tflite::MicroInterpreter *interpreter, int i) { - auto ctx = &interpreter->context_; - return ctx->GetEvalTensor(ctx, i); -} - -TfLiteTensor *tflmc::GetTensor(tflite::MicroInterpreter *interpreter, int i) { - auto ctx = &interpreter->context_; - return ctx->GetTensor(ctx, i); -} diff --git a/src/RecordAllocations.h b/src/RecordAllocations.h deleted file mode 100644 index a8848ac..0000000 --- a/src/RecordAllocations.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef TFLMCOMPILER_RECORDALLOCATIONS_H -#define TFLMCOMPILER_RECORDALLOCATIONS_H - -#include "tensorflow/lite/schema/schema_generated.h" -#include - -namespace tflmc { - -struct Allocation { - ptrdiff_t offset; - size_t len; - int nodeIndex; -}; - -std::vector RecordAllocations(const tflite::Model *model, ptrdiff_t arena_size); - - -TfLiteEvalTensor *GetEvalTensor(tflite::MicroInterpreter *interpreter, int i); -TfLiteTensor *GetTensor(tflite::MicroInterpreter *interpreter, int i); - -} // namespace tflmc - -#endif diff --git a/src/TypeToString.cc b/src/TypeToString.cc index 2747904..57a4f8c 100644 --- a/src/TypeToString.cc +++ b/src/TypeToString.cc @@ -67,7 +67,7 @@ std::string tflmc::to_string(TfLiteFusedActivation t) { switch (t) { NAME(kTfLiteActNone); NAME(kTfLiteActRelu); - NAME(kTfLiteActRelu1); + NAME(kTfLiteActReluN1To1); NAME(kTfLiteActRelu6); NAME(kTfLiteActTanh); NAME(kTfLiteActSignBit);