From 23c4d46790c4de576555eee5fed73b3692280405 Mon Sep 17 00:00:00 2001 From: Andrew Stevens Date: Tue, 11 Aug 2020 11:44:22 +0200 Subject: [PATCH 01/14] Over squashed 100 IFX Commits (stripped down to essentials) --- .vscode/c_cpp_properties.json | 22 - .vscode/launch.json | 65 -- .vscode/tasks.json | 34 - CMakeLists.txt | 29 +- Makefile | 27 +- README.md | 10 +- common.mk | 22 +- src/BuiltinAllocations.cc | 136 +++ src/BuiltinAllocations.h | 20 + src/CodeWriter.cc | 278 +++---- src/CodeWriter.h | 61 +- src/Compiler.cc | 1051 +++++++++++++++++++----- src/Compiler.h | 111 +-- src/CustomOperators.cc | 19 +- src/CustomOperators.h | 2 +- src/Makefile.inc | 9 +- src/MemMap.cc | 196 ++++- src/MemMap.h | 123 ++- src/ModelInfo.h | 52 ++ src/Options.h | 23 + src/RecordAllocations.cc | 185 ++++- src/RecordAllocations.h | 40 +- src/TypeToString.cc | 2 +- src/main.cc | 75 +- tflite_u_preint/static_data_utils.cc | 99 +++ tflite_u_preint/static_data_utils.h | 41 + tflite_u_preint/static_init_support.cc | 430 ++++++++++ tflite_u_preint/static_init_support.h | 528 ++++++++++++ 28 files changed, 3042 insertions(+), 648 deletions(-) delete mode 100644 .vscode/c_cpp_properties.json delete mode 100644 .vscode/launch.json delete mode 100644 .vscode/tasks.json create mode 100644 src/BuiltinAllocations.cc create mode 100644 src/BuiltinAllocations.h create mode 100644 src/ModelInfo.h create mode 100644 src/Options.h create mode 100644 tflite_u_preint/static_data_utils.cc create mode 100644 tflite_u_preint/static_data_utils.h create mode 100644 tflite_u_preint/static_init_support.cc create mode 100644 tflite_u_preint/static_init_support.h diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json deleted file mode 100644 index 8b32d41..0000000 --- a/.vscode/c_cpp_properties.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "configurations": [ - { - "name": "Linux", - "includePath": [ - "${workspaceFolder}/**", - "${workspaceFolder}/../tensorflow", - "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/", - "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/gemmlowp", - "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include", - "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/ruy", - "${workspaceFolder}/../tensorflow/tensorflow/lite/micro/tools/make/downloads/kissfft" - ], - "defines": [ "TF_LITE_STATIC_MEMORY", "NDEBUG", "TF_LITE_DISABLE_X86_NEON", "SUFFICIENT_ARENA_SIZE" ], - "compilerPath": "/usr/bin/g++", - "cStandard": "c11", - "cppStandard": "c++17", - "intelliSenseMode": "clang-x64" - } - ], - "version": 4 -} diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index df88327..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "(gdb) hello example Launch", - "type": "cppdbg", - "request": "launch", - "program": "${workspaceFolder}/example/hello_world", - "args": [], - "stopAtEntry": true, - "cwd": "${workspaceFolder}", - "environment": [], - "externalConsole": false, - "MIMode": "gdb", - "setupCommands": [ - { - "description": "Enable pretty-printing for gdb", - "text": "-enable-pretty-printing", - "ignoreFailures": true - } - ] - }, - { - "name": "(gdb) cifar compiled example Launch", - "type": "cppdbg", - "request": "launch", - "program": "${workspaceFolder}/examples/cifar10_compiled", - "args": [], - "stopAtEntry": true, - "cwd": "${workspaceFolder}", - "environment": [], - "externalConsole": false, - "MIMode": "gdb", - "setupCommands": [ - { - "description": "Enable pretty-printing for gdb", - "text": "-enable-pretty-printing", - "ignoreFailures": true - } - ] - }, - { - "name": "(gdb) cifar interpreter example Launch", - "type": "cppdbg", - "request": "launch", - "program": "${workspaceFolder}/examples/cifar10", - "args": [], - "stopAtEntry": true, - "cwd": "${workspaceFolder}", - "environment": [], - "externalConsole": false, - "MIMode": "gdb", - "setupCommands": [ - { - "description": "Enable pretty-printing for gdb", - "text": "-enable-pretty-printing", - "ignoreFailures": true - } - ] - } - ] -} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json deleted file mode 100644 index 313b24c..0000000 --- a/.vscode/tasks.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - // See https://go.microsoft.com/fwlink/?LinkId=733558 - // for the documentation about the tasks.json format - "version": "2.0.0", - "tasks": [ - { - "label": "make", - "type": "shell", - "command": "make", - // use options.cwd property if the Makefile is not in the project root ${workspaceRoot} dir - "options": { - "cwd": "${workspaceRoot}" - }, - // start the build without prompting for task selection, use "group": "build" otherwise - "group": { - "kind": "build", - "isDefault": true - }, - "presentation": { - "echo": true, - "reveal": "always", - "focus": false, - "panel": "shared" - }, - // arg passing example: in this case is executed make QUIET=0 - "args": ["QUIET=0"], - // Use the standard less compilation problem matcher. - "problemMatcher": { - "base": "$gcc", - "fileLocation": [ "relative", "${workspaceRoot}" ] - } - } - ] -} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 21c8fb8..8b93a33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,18 +8,23 @@ IF(NOT TF_DIR) SET(TF_DIR "../tensorflow" CACHE STRING "TensorFlow source directory") ENDIF() -GET_FILENAME_COMPONENT(tf_fullpath ${TF_DIR} REALPATH) +GET_FILENAME_COMPONENT(TF_ABSPATH ${TF_DIR} REALPATH) IF(NOT GET_TF_SRC) - if(EXISTS "${tf_fullpath}") - SET(TFL_SRC ${TF_DIR}/tensorflow/lite) + if(EXISTS "${TF_ABSPATH}") + SET(TFL_SRC ${TF_ABSPATH}/tensorflow/lite) SET(TFLM_SRC ${TFL_SRC}/micro) SET(TFLMD_SRC ${TFLM_SRC}/tools/make/downloads) SET(TF_INCS - ${TF_DIR} - ${TFLMD_SRC}/flatbuffers/include - ${TFLMD_SRC}/ruy + ${TF_ABSPATH} + ${TFLMD_SRC}/flatbuffers/include + ${TFLMD_SRC}/ruy ) + IF(WIN32) + SET(TF_LIB ${TFLM_SRC}/tools/make/gen/windows_x86_64/lib/libtensorflow-microlite.a) + ELSE() + SET(TF_LIB ${TFLM_SRC}/tools/make/gen/linux_x86_64/lib/libtensorflow-microlite.a) + ENDIF() ELSE() MESSAGE(FATAL_ERROR "\ No valid TensorFlow source directory provided, default path \ @@ -32,10 +37,9 @@ ELSE() SET(TF_INCS ${TFLite_INCLUDE_DIRS} ) + SET(TF_LIB tensorflow-microlite) ENDIF() -SET(TF_LIB tensorflow-microlite) - SET(COMPILER_HEADERS ${PROJECT_SOURCE_DIR}/src/CodeWriter.h ${PROJECT_SOURCE_DIR}/src/Compiler.h @@ -43,6 +47,8 @@ SET(COMPILER_HEADERS ${PROJECT_SOURCE_DIR}/src/MemMap.h ${PROJECT_SOURCE_DIR}/src/RecordAllocations.h ${PROJECT_SOURCE_DIR}/src/TypeToString.h + ${PROJECT_SOURCE_DIR}/src/BuiltinAllocations.h + ${PROJECT_SOURCE_DIR}/src/ModelInfo.h ) SET(COMPILER_SRCS @@ -52,6 +58,7 @@ SET(COMPILER_SRCS ${PROJECT_SOURCE_DIR}/src/MemMap.cc ${PROJECT_SOURCE_DIR}/src/RecordAllocations.cc ${PROJECT_SOURCE_DIR}/src/TypeToString.cc + ${PROJECT_SOURCE_DIR}/src/BuiltinAllocations.cc ${PROJECT_SOURCE_DIR}/src/main.cc ) @@ -63,12 +70,6 @@ TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TF_INCS} ) -IF(WIN32) - TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/windows_x86_64/lib) -ELSE() - TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/linux_x86_64/lib) -ENDIF() - TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC ${TF_LIB}) TARGET_COMPILE_DEFINITIONS(${PROJECT_NAME} PUBLIC diff --git a/Makefile b/Makefile index fcb7d5b..721cb02 100644 --- a/Makefile +++ b/Makefile @@ -1,20 +1,22 @@ TF_DIR=../tensorflow include common.mk -.PHONY: tflite all +.PHONY: tflite all -all: compiler examples +all: compiler$(EXE_SUFFIX) examples + +$(TF_MICROLITE_LIB): tflite tflite: $(MAKE) -C $(TF_DIR) -f tensorflow/lite/micro/tools/make/Makefile microlite -COMPILER_OBJS = src/main.o src/Compiler.o src/CodeWriter.o src/TypeToString.o src/RecordAllocations.o src/MemMap.o src/CustomOperators.o +COMPILER_OBJS = src/main.o src/Compiler.o src/CodeWriter.o src/TypeToString.o src/RecordAllocations.o src/MemMap.o src/CustomOperators.o src/BuiltinAllocations.o -compiler: $(COMPILER_OBJS) tflite - $(CXX) $(LDOPTS) -o $@ $(COMPILER_OBJS) $(LIBS) +compiler$(EXE_SUFFIX): $(COMPILER_OBJS) $(TF_MICROLITE_LIB) + $(CXX) $(CXXFLAGS) $(LDOPTS) -o $@ $(COMPILER_OBJS) $(LIBS) clean: clean-compiler clean-examples - $(MAKE) -C $(TF_DIR) -f tensorflow/lite/micro/tools/make/makefile clean + $(MAKE) -C $(TF_DIR) -f tensorflow/lite/micro/tools/make/Makefile clean FORMAT_FILES := $(shell find src -regex '.*\(h\|cpp\)') @@ -22,12 +24,19 @@ format: clang-format -i $(FORMAT_FILES) .PHONY: examples clean-examples clean-compiler -examples: - cd examples && $(MAKE) + +examples: tflite + $(MAKE) -C examples all + +run_examples: tflite + $(MAKE) -C examples run_all + +regenerate: compiler$(EXE_SUFFIX) + $(MAKE) -C examples regenerate clean-examples: $(MAKE) -C examples clean clean-compiler: - $(RM) src/*.o compiler + $(RM) src/*.o compiler$(EXE_SUFFIX) diff --git a/README.md b/README.md index 5908636..74d3aee 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ CMake using the option `TF_TAG`. e.g. ``` bash -cmake -DGET_TF_SRC=ON TF_TAG=v2.2.0 .. +cmake -DGET_TF_SRC=ON -DTF_TAG=v2.2.0 .. ``` Similarly a Git commit hash can be provided using `TF_COMMIT`. Note that @@ -57,7 +57,7 @@ Similarly a Git commit hash can be provided using `TF_COMMIT`. Note that e.g. ```bash -cmake -DGET_TF_SRC=ON TF_COMMIT=0fecf6f89fd7bacc1ec4213b946a254e885b82ac .. +cmake -DGET_TF_SRC=ON -DTF_COMMIT=0fecf6f89fd7bacc1ec4213b946a254e885b82ac .. ``` To checkout a different TensorFlow code base without clearing the CMake cache @@ -67,7 +67,7 @@ source to be checked-out again. e.g. ```bash -cmake -DGET_TF_SRC=ON -DTF_RECACHE=ON TF_COMMIT=0fecf6f89fd7bacc1ec4213b946a254e885b82ac .. +cmake -DGET_TF_SRC=ON -DTF_RECACHE=ON -DTF_COMMIT=0fecf6f89fd7bacc1ec4213b946a254e885b82ac .. ``` ## Providing TensorFlow Source Manually @@ -79,7 +79,7 @@ providing the argument `TF_DIR`. e.g. ``` bash -cmake -DTF_DIR=../../my_tf_source .. +cmake -DTF_DIR=../my_tensorflow .. ``` ## Additional Targets @@ -117,7 +117,7 @@ make format ./compiler hello_world.tflite hello_compiled.cpp hello_ ``` -- for a quick view into the generated code see [`compiled_hello.cpp`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/compiled_hello.cpp) +- for a quick view into the generated code see [`compiled_hello_world.cc`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/compiled_hello_world.cc) You can compare calling into interpreter and compiled code between [`hello_world.cc`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/hello_world.cc) and [`hello_world2.cc`](https://github.com/cpetig/tflite_micro_compiler/blob/master/examples/hello_world2.cc) diff --git a/common.mk b/common.mk index fe89325..1ff3e61 100644 --- a/common.mk +++ b/common.mk @@ -1,17 +1,23 @@ -CXXFLAGS=-g -std=c++14 -DTF_LITE_STATIC_MEMORY -DNDEBUG -O3 -DTF_LITE_DISABLE_X86_NEON -DSUFFICIENT_ARENA_SIZE=128\*1024\*1024 \ +CXXFLAGS=-g -std=c++14 -DTF_LITE_STATIC_MEMORY -DDEBUG -O1 -DTF_LITE_DISABLE_X86_NEON -DSUFFICIENT_ARENA_SIZE=128\*1024\*1024 \ -I$(TF_DIR) -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/ \ -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/gemmlowp \ -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include \ -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/ruy \ - -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/kissfft - -LDOPTS=-L $(TF_DIR)/tensorflow/lite/micro/tools/make/gen/$(HOST_OS_BUILD)/lib + -I$(TF_DIR)/tensorflow/lite/micro/tools/make/downloads/kissfft +ifeq ($(BUILD_TYPE),debug) + HOST_OS_BUILD:=$(HOST_OS_BUILD)_debug +endif +TF_MICROLITE_LIBDIR=$(TF_DIR)/tensorflow/lite/micro/tools/make/gen/$(HOST_OS_BUILD)/lib +TF_MICROLITE_LIB=$(TF_MICROLITE_LIBDIR)/libtensorflow-microlite.a ifeq ($(OS),Windows_NT) - LIBS=-ltensorflow-microlite - HOST_OS_BUILD=windows_x86_64 + LIBS=$(TF_MICROLITE_LIB) + HOST_OS_BUILD := windows_x86_64 + EXE_SUFFIX := .exe else - LIBS=-ltensorflow-microlite -ldl - HOST_OS_BUILD=linux_x86_64 + LIBS=$(TF_MICROLITE_LIB) -ldl + HOST_OS_BUILD := linux_x86_64 + EXE_SUFFIX := endif + diff --git a/src/BuiltinAllocations.cc b/src/BuiltinAllocations.cc new file mode 100644 index 0000000..4f9d03d --- /dev/null +++ b/src/BuiltinAllocations.cc @@ -0,0 +1,136 @@ +#include "BuiltinAllocations.h" + +#include +#include + +#include "TypeToString.h" +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/core/api/error_reporter.h" + +namespace { + +class AllocatorToGetLastAllocSize : public tflite::BuiltinDataAllocator { + public: + + void* Allocate(size_t size, size_t alignment_hint) override { + lastAllocSize = size; + allocated_blocks.push_back(std::make_unique(size)); + return reinterpret_cast(allocated_blocks.back().get()); + } + + void Deallocate(void* data) override { + } + + + size_t GetLastAllocSize() { return lastAllocSize; } + + private: + std::vector> allocated_blocks; + size_t lastAllocSize = 0; +}; + +} // namespace + +namespace tflmc { +namespace BuiltinAllocations { + +size_t GetBuiltinDataSize(tflite::BuiltinOperator opType, + const tflite::SubGraph* subgraph, + tflite::ErrorReporter &errReporter) { + // There seems to be no simple query function for this, so tickle the + // information out of the parse function. + auto dummyOp = subgraph->operators()->Get(0); + AllocatorToGetLastAllocSize allocator; + void* outData = nullptr; + if (tflite::ParseOpData(dummyOp, opType, &errReporter, &allocator, + &outData) != kTfLiteOk) { + throw std::runtime_error("ERROR: Unable to use tflite::ParseOpData to extract the BuiltinDataSize!\n" + "tensorflow/lite/core/api/flatbuffer_conversions.cc needs a patch to support this feature..."); + } + + return allocator.GetLastAllocSize(); +} + +std::pair getBuiltinStrings(tflite::BuiltinOperator op, + const void* data) { + using namespace tflmc; + std::stringstream builtinOptionsName, builtinOptionsStruct; + switch (op) { + case tflite::BuiltinOperator_CONV_2D: { + builtinOptionsName << "TfLiteConvParams"; + TfLiteConvParams const* p = (TfLiteConvParams const*)data; + builtinOptionsStruct << "{ " << to_string(p->padding) << ", " << p->stride_width << "," + << p->stride_height << ", " << to_string(p->activation) << ", " + << p->dilation_width_factor << "," << p->dilation_height_factor + << " }"; + } break; + case tflite::BuiltinOperator_DEPTHWISE_CONV_2D: { + builtinOptionsName << "TfLiteDepthwiseConvParams"; + TfLiteDepthwiseConvParams const* p = + (TfLiteDepthwiseConvParams const*)data; + builtinOptionsStruct << "{ " << to_string(p->padding) << ", " << p->stride_width << "," + << p->stride_height << ", " << p->depth_multiplier << ", " + << to_string(p->activation) << ", " << p->dilation_width_factor + << "," << p->dilation_height_factor << " }"; + } break; + case tflite::BuiltinOperator_FULLY_CONNECTED: { + builtinOptionsName << "TfLiteFullyConnectedParams"; + TfLiteFullyConnectedParams const* p = + (TfLiteFullyConnectedParams const*)data; + builtinOptionsStruct << "{ " << to_string(p->activation) << ", " << to_string(p->weights_format) + << ", " << p->keep_num_dims << ", " << p->asymmetric_quantize_inputs + << " }"; + } break; + case tflite::BuiltinOperator_MAX_POOL_2D: + case tflite::BuiltinOperator_AVERAGE_POOL_2D: { + builtinOptionsName << "TfLitePoolParams"; + TfLitePoolParams const* p = (TfLitePoolParams const*)data; + builtinOptionsStruct << "{ " << to_string(p->padding) << ", " << p->stride_width << "," + << p->stride_height << ", " << p->filter_width << "," + << p->filter_height << ", " << to_string(p->activation) << ", { " + << to_string(p->computed.padding) << " } }"; + } break; + case tflite::BuiltinOperator_RESHAPE: { + builtinOptionsName << "TfLiteReshapeParams"; + builtinOptionsStruct << "{ {"; + TfLiteReshapeParams const* p = (TfLiteReshapeParams const*)data; + for (uint32_t i = 0; i < TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT; ++i) + builtinOptionsStruct << p->shape[i] << ", "; + builtinOptionsStruct << "}, " << p->num_dimensions << " }"; + } break; + case tflite::BuiltinOperator_SOFTMAX: { + builtinOptionsName << "TfLiteSoftmaxParams"; + TfLiteSoftmaxParams const* p = (TfLiteSoftmaxParams const*)data; + builtinOptionsStruct << "{ " << p->beta << " }"; + } break; + case tflite::BuiltinOperator_ADD: { + builtinOptionsName << "TfLiteAddParams"; + TfLiteAddParams const* p = (TfLiteAddParams const*)data; + builtinOptionsStruct << "{ " << to_string(p->activation) << ", " + << p->pot_scale_int16 << " }"; + } break; + case tflite::BuiltinOperator_MUL: { + builtinOptionsName << "TfLiteMulParams"; + TfLiteMulParams const* p = (TfLiteMulParams const*)data; + builtinOptionsStruct << "{ " << to_string(p->activation) << " }"; + } break; + case tflite::BuiltinOperator_SUB: { + builtinOptionsName << "TfLiteSubParams"; + TfLiteSubParams const* p = (TfLiteSubParams const*)data; + builtinOptionsStruct << "{ " << to_string(p->activation) << ", " + << p->pot_scale_int16 << " }"; + } break; + case tflite::BuiltinOperator_CONCATENATION: { + builtinOptionsName << "TfLiteConcatenationParams"; + TfLiteConcatenationParams const* p = + (TfLiteConcatenationParams const*)data; + builtinOptionsStruct << "{ " << p->axis << ", " << to_string(p->activation) << " }"; + } break; + default: { + } break; + } + return std::make_pair(builtinOptionsName.str(), builtinOptionsStruct.str()); +} + +} // namespace BuiltinAllocations +} // namespace tflmc diff --git a/src/BuiltinAllocations.h b/src/BuiltinAllocations.h new file mode 100644 index 0000000..73bcb62 --- /dev/null +++ b/src/BuiltinAllocations.h @@ -0,0 +1,20 @@ +#ifndef TFLMCOMPILER_BUILTIN_ALLOCATIONS_H +#define TFLMCOMPILER_BUILTIN_ALLOCATIONS_H + +#include "tensorflow/lite/micro/micro_interpreter.h" +#include "tensorflow/lite/core/api/error_reporter.h" + +namespace tflmc { +namespace BuiltinAllocations { + +size_t GetBuiltinDataSize(tflite::BuiltinOperator opType, + const tflite::SubGraph* subgraph, + tflite::ErrorReporter &errReporter); + +std::pair getBuiltinStrings(tflite::BuiltinOperator op, + const void* data); + +} // namespace BuiltinAllocations +} // namespace tflmc + +#endif diff --git a/src/CodeWriter.cc b/src/CodeWriter.cc index 564c0b1..39c4ff3 100644 --- a/src/CodeWriter.cc +++ b/src/CodeWriter.cc @@ -3,43 +3,20 @@ #include #include -#include "TypeToString.h" +#include "BuiltinAllocations.h" #include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace { - -class AllocatorToGetLastAllocSize : public tflite::BuiltinDataAllocator { - public: - void* Allocate(size_t size, size_t alignment_hint) override { - lastAllocSize = size; - return malloc(size); - } - void Deallocate(void* data) override { free(data); } - size_t GetLastAllocSize() { return lastAllocSize; } - - private: - size_t lastAllocSize = 0; -}; -size_t GetBuiltinDataSize(tflite::BuiltinOperator opType, - const tflite::SubGraph* subgraph) { - // There seems to be no simple query function for this, so tickle the - // information out of the parse function. - auto dummyOp = subgraph->operators()->Get(0); - tflite::MicroErrorReporter errReporter; - AllocatorToGetLastAllocSize allocator; - void* outData = nullptr; - if (tflite::ParseOpData(dummyOp, opType, &errReporter, &allocator, - &outData) == kTfLiteOk) - free(outData); - return allocator.GetLastAllocSize(); -} - -} // namespace +#include "tensorflow/lite/core/api/error_reporter.h" tflmc::CodeWriter::CodeWriter(std::ostream& out, - const tflite::SubGraph* subgraph) - : out_(out), subgraph_(subgraph) { + const tflite::SubGraph* subgraph, + tflite::ErrorReporter &err_reporter + ) + : out_(out), subgraph_(subgraph) + , err_reporter_(err_reporter) + , init_data_usage_(0) + , uninit_data_usage_(0) + , const_data_usage_(0) +{ // Setup stream: Print booleans as string: out_ << std::boolalpha; // Print floats with precision that is sufficient for exact back-conversion: @@ -62,97 +39,58 @@ void tflmc::CodeWriter::writeBuiltin(tflite::BuiltinOperator op, return; } out_ << "const "; - switch (op) { - case tflite::BuiltinOperator_CONV_2D: { - out_ << "TfLiteConvParams " << name << " = { "; - TfLiteConvParams const* p = (TfLiteConvParams const*)data; - out_ << to_string(p->padding) << ", " << p->stride_width << "," - << p->stride_height << ", " << to_string(p->activation) << ", " - << p->dilation_width_factor << "," << p->dilation_height_factor - << " };"; - } break; - case tflite::BuiltinOperator_DEPTHWISE_CONV_2D: { - out_ << "TfLiteDepthwiseConvParams " << name << " = { "; - TfLiteDepthwiseConvParams const* p = - (TfLiteDepthwiseConvParams const*)data; - out_ << to_string(p->padding) << ", " << p->stride_width << "," - << p->stride_height << ", " << p->depth_multiplier << ", " - << to_string(p->activation) << ", " << p->dilation_width_factor - << "," << p->dilation_height_factor << " };"; - } break; - case tflite::BuiltinOperator_FULLY_CONNECTED: { - out_ << "TfLiteFullyConnectedParams " << name << " = { "; - TfLiteFullyConnectedParams const* p = - (TfLiteFullyConnectedParams const*)data; - out_ << to_string(p->activation) << ", " << to_string(p->weights_format) - << ", " << p->keep_num_dims << ", " << p->asymmetric_quantize_inputs - << " };"; - } break; - case tflite::BuiltinOperator_MAX_POOL_2D: - case tflite::BuiltinOperator_AVERAGE_POOL_2D: { - out_ << "TfLitePoolParams " << name << " = { "; - TfLitePoolParams const* p = (TfLitePoolParams const*)data; - out_ << to_string(p->padding) << ", " << p->stride_width << "," - << p->stride_height << ", " << p->filter_width << "," - << p->filter_height << ", " << to_string(p->activation) << ", { " - << to_string(p->computed.padding) << " } };"; - } break; - case tflite::BuiltinOperator_RESHAPE: { - out_ << "TfLiteReshapeParams " << name << " = { { "; - TfLiteReshapeParams const* p = (TfLiteReshapeParams const*)data; - for (uint32_t i = 0; i < TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT; ++i) - out_ << p->shape[i] << ", "; - out_ << "}, " << p->num_dimensions << " };"; - } break; - case tflite::BuiltinOperator_SOFTMAX: { - out_ << "TfLiteSoftmaxParams " << name << " = { "; - TfLiteSoftmaxParams const* p = (TfLiteSoftmaxParams const*)data; - out_ << p->beta << " };"; - } break; - case tflite::BuiltinOperator_ADD: { - out_ << "TfLiteAddParams " << name << " = { "; - TfLiteAddParams const* p = (TfLiteAddParams const*)data; - out_ << to_string(p->activation) << " };"; - } break; - case tflite::BuiltinOperator_MUL: { - out_ << "TfLiteMulParams " << name << " = { "; - TfLiteMulParams const* p = (TfLiteMulParams const*)data; - out_ << to_string(p->activation) << " };"; - } break; - case tflite::BuiltinOperator_SUB: { - out_ << "TfLiteSubParams " << name << " = { "; - TfLiteSubParams const* p = (TfLiteSubParams const*)data; - out_ << to_string(p->activation) << " };"; - } break; - case tflite::BuiltinOperator_CONCATENATION: { - out_ << "TfLiteConcatenationParams " << name << " = { "; - TfLiteConcatenationParams const* p = - (TfLiteConcatenationParams const*)data; - out_ << p->axis << ", " << to_string(p->activation) << " };"; - } break; - default: { - size_t datalen = GetBuiltinDataSize(op, subgraph_); - uint32_t alignment = datalen >= 4 ? 4 : datalen >= 2 ? 2 : 1; - out_ << "ALIGN(" << alignment << ") uint8_t " << name << "[" << datalen - << "] = { "; - for (uint32_t i = 0; i < datalen; ++i) - out_ << int(((uint8_t const*)data)[i]) << ", "; - out_ << " }; /* op type " << int(op) << "=" - << tflite::EnumNameBuiltinOperator(op) << " */"; - } break; + auto builtin_strings = BuiltinAllocations::getBuiltinStrings(op, data); + if (!builtin_strings.first.empty() && !builtin_strings.first.empty()) { + out_ << builtin_strings.first << " " << name << " = " + << builtin_strings.second << ";"; + } else { + size_t datalen = BuiltinAllocations::GetBuiltinDataSize(op, subgraph_, err_reporter_); + uint32_t alignment = datalen >= 4 ? 4 : datalen >= 2 ? 2 : 1; + out_ << "ALIGN(" << alignment << ") uint8_t " << name << "[" << datalen + << "] = { "; + for (uint32_t i = 0; i < datalen; ++i) + out_ << int(((uint8_t const*)data)[i]) << ", "; + out_ << " }; /* op type " << int(op) << "=" + << tflite::EnumNameBuiltinOperator(op) << " */"; } out_ << '\n'; } + + +void tflmc::CodeWriter::writeCustom(uint8_t const *opdata, size_t node_i, size_t opdata_size) { + out_ << "uint8_t ALIGN(4) opdata" + std::to_string(node_i) << "[" + << opdata_size << "] = { "; + for (size_t j = 0; j < opdata_size; ++j) + out_ << int(opdata[j]) << ", "; + out_ << " }; /* custom_initial_data */\n"; + const_data_usage_ += opdata_size; + init_data_usage_ += opdata_size; +} + +template +size_t writeTfArray( std::ostream &os, const TFArray *tfarray, const std::string &name, const char * suffix, const char *data_type_id) +{ + os << "const TfArray<" + << tfarray->size << ", " + << data_type_id << "> " + << name << suffix + << " = { " << tfarray->size << ", { "; + for (int i = 0; i < tfarray->size; i++) { + os << tfarray->data[i] << ", "; + } + os << "} };\n"; + return tfarray->size+1; +} + void tflmc::CodeWriter::writeIntArray(const TfLiteIntArray& arr, const std::string& name) { if (arr.size == 0) { out_ << "const int " << name << " = 0; /* empty TfLiteIntArray */\n"; + const_data_usage_ += sizeof(int); } else { - out_ << "const TfArray<" << arr.size << ", int> " << name << " = { " - << arr.size << ", { "; - writeIntArrayData(arr); - out_ << " } };\n"; + auto arr_size = writeTfArray(out_, &arr, name, "", "int"); + const_data_usage_ += sizeof(int)*arr_size; } } @@ -168,14 +106,16 @@ void tflmc::CodeWriter::writeIntArrayData(const TfLiteIntArray& arr) { // outputting int8_t as a character is not what we intend here, we want to see // the value, so we introduce printT template -static void dump_tensor_contents(std::ostream& out_, const TfLiteTensor& t, +static size_t dump_tensor_contents(std::ostream& out_, const TfLiteTensor& t, const std::string& tname, const std::string& name) { + + size_t mem_size; if (t.dims->size == 0) { // special case 0 dimensions, we output an array to // avoid distinction from >0 dimension at every use out_ << "const " << tname << " " << name << "[1] = { " << (printT)(tflite::GetTensorData(&t)[0]) << " };\n"; - return; + return sizeof(T); } uint32_t alignment = t.bytes >= 8 ? 8 : t.bytes >= 4 ? 4 : 2; @@ -194,6 +134,7 @@ static void dump_tensor_contents(std::ostream& out_, const TfLiteTensor& t, if (serialized_elts != nominal_elts) { out_ << serialized_elts << " /* PACKED "; + } out_ << t.dims->data[0]; @@ -202,13 +143,15 @@ static void dump_tensor_contents(std::ostream& out_, const TfLiteTensor& t, out_ << " */"; } out_ << "] = { "; - if (t.dims->size == 1 || serialized_elts != nominal_elts) { - // one dimension/packed: 10 per line of data - for (int i = 0; i < serialized_elts; ++i) { - if (i % 10 == 0) out_ << "\n "; - out_ << (printT)(tflite::GetTensorData(&t)[i]) << ", "; + if (t.dims->size == 1 || serialized_elts != nominal_elts) // one dimension/packed: 10 per line of data + { + for (size_t i = 0; i < serialized_elts; ++i) { + if (i%10 == 0) + out_ << "\n "; + out_ << (printT)(tflite::GetTensorData(&t)[i]) << ", "; } out_ << "\n};\n"; + mem_size = serialized_elts*sizeof(T); } else if (t.dims->size == 2) { // two dimensions: Inner dimension is one line for (int i = 0; i < t.dims->data[0]; ++i) { @@ -218,6 +161,7 @@ static void dump_tensor_contents(std::ostream& out_, const TfLiteTensor& t, << ", "; } out_ << "\n};\n"; + mem_size = nominal_elts*sizeof(T); } else { // More dimensions: Inner two dimensions per line (space between two // middle elements) @@ -249,12 +193,14 @@ static void dump_tensor_contents(std::ostream& out_, const TfLiteTensor& t, } } out_ << "\n};\n"; + mem_size = nominal_elts*sizeof(T); } + return mem_size; } #define DUMP_TENSOR2(TfType, CType, PrintType) \ case TfType: \ - dump_tensor_contents(out_, t, #CType, name); \ + const_data_usage_ += dump_tensor_contents(out_, t, #CType, name); \ break void tflmc::CodeWriter::writeTensor(const TfLiteTensor& t, @@ -276,42 +222,72 @@ void tflmc::CodeWriter::writeTensor(const TfLiteTensor& t, for (size_t i = 0; i < t.bytes; i++) out_ << int((uint8_t)t.data.raw_const[i]) << ","; out_ << " };\n"; + const_data_usage_ += t.bytes; } break; } } + +static void writeAffineQuantizationFields(std::ostream &out, const std::string& name, TfLiteAffineQuantization const *aq) { + + out << "{ " + << "(TfLiteFloatArray*)&" << name << "_scale, " + << "(TfLiteIntArray*)&" << name << "_zero, " << aq->quantized_dimension + << " }"; +} + + +#if SUPPORT_CUSTOM_QUANT +static void writeQuantizationDetails( + std::ostream& out, const TfLiteCustomSub8BitPackingDetails* sub8_details, + const std::string& name) { + out << "const TfLiteCustomSub8BitPackingDetails " << name << " = { "; + out << static_cast(sub8_details->bits_per_item) << ", "; + out << static_cast(sub8_details->container_bits) << ", "; + out << static_cast(sub8_details->packed_minor_dims) << ", "; + out << static_cast(sub8_details->sparsity_coding) << ", "; + out << "{}"; + out << "};\n"; +} +#endif // SUPPORT_CUSTOM_QUANT + void tflmc::CodeWriter::writeQuantization(const TfLiteQuantization& q, const std::string& name) { if (q.type == kTfLiteAffineQuantization) { auto aq = (TfLiteAffineQuantization const*)q.params; - out_ << "const TfArray<" << aq->scale->size << ", float> " << name - << "_scale = { " << aq->scale->size << ", { "; - for (int i = 0; i < aq->scale->size; i++) { - out_ << aq->scale->data[i] << ", "; - } - out_ << "} };\n"; - out_ << "const TfArray<" << aq->zero_point->size << ", int> " << name - << "_zero = { " << aq->zero_point->size << ", { "; - writeIntArrayData(*aq->zero_point); - out_ << " } };\n"; - out_ << "const TfLiteAffineQuantization " << name << " = { " - << "(TfLiteFloatArray*)&" << name << "_scale, " - << "(TfLiteIntArray*)&" << name << "_zero, " << aq->quantized_dimension - << " };\n"; + auto scale_size = writeTfArray(out_, aq->scale, name, "_scale", "float"); + auto zp_size = writeTfArray(out_, aq->zero_point, name, "_zero", "int"); + const_data_usage_ += scale_size * sizeof(float) + zp_size*sizeof(int); + out_ << "const TfLiteAffineQuantization " << name << " = "; + writeAffineQuantizationFields(out_, name, aq); + out_ << ";\n"; + const_data_usage_ += sizeof(TfLiteAffineQuantization); +#if SUPPORT_CUSTOM_QUANT + } else if (q.type == kTfLitePackedAffineQuantization) { + auto paq = (TfLitePackedAffineQuantization const*)q.params; + writeQuantizationDetails(out_, paq->custom_sub8bit_packing, name + "_packing"); + const_data_usage_ += sizeof(TfLiteCustomSub8BitPackingDetails); + auto aq = &paq->affine; + auto scale_size = writeTfArray(out_, aq->scale, name, "_scale", "float"); + auto zp_size = writeTfArray(out_, aq->zero_point, name, "_zero", "int"); + const_data_usage_ += scale_size * sizeof(float) + zp_size*sizeof(int); + out_ << "const TfLitePackedAffineQuantization " << name << " = { "; + writeAffineQuantizationFields(out_, name, aq); + out_ << ", &" << name + "_packing" << "};\n"; + const_data_usage_ += sizeof(kTfLitePackedAffineQuantization); +#endif // SUPPORT_CUSTOM_QUANT } } -#if TF_LITE_PACKED_QUANTIZED_DATA_VERSION == 100 -void tflmc::CodeWriter::writeQuantizationDetails(const TfLiteQuantization& q, - const std::string& name) { - if (q.details.type == kTfLiteSub8BitPackedUniformDetail) { - out_ << "const TfLiteCustomSub8BitPackingDetails " << name << " = { "; - auto sub8_details = q.details.data.custom_sub8bit_packing; - out_ << static_cast(sub8_details->bits_per_item) << ", "; - out_ << static_cast(sub8_details->container_bits) << ", "; - out_ << static_cast(sub8_details->packed_minor_dims) << ", "; - out_ << "{}"; - out_ << "};\n"; - } +void tflmc::CodeWriter::writeTensorArena(size_t tensor_arena_size) +{ + out_ << R"( +constexpr int kTensorArenaSize = )" + << tensor_arena_size << R"(; +uint8_t tensor_arena[kTensorArenaSize] ALIGN(16); +)"; + uninit_data_usage_ += tensor_arena_size; } -#endif + + + \ No newline at end of file diff --git a/src/CodeWriter.h b/src/CodeWriter.h index e8c69b7..f08a22d 100644 --- a/src/CodeWriter.h +++ b/src/CodeWriter.h @@ -2,18 +2,25 @@ #define TFLMCOMPILER_CODEWRITER_H #include - #include "tensorflow/lite/micro/micro_interpreter.h" +#include "tensorflow/lite/core/api/error_reporter.h" +#include "tensorflow/lite/version.h" namespace tflmc { // Helper functions for top-level code generation. class CodeWriter { public: - CodeWriter(std::ostream &out, const tflite::SubGraph *subgraph); + CodeWriter(std::ostream &out, const tflite::SubGraph *subgraph, + tflite::ErrorReporter &errReporter); void writeBuiltin(tflite::BuiltinOperator op, const void *data, const std::string &name); + + void writeCustom(uint8_t const *opdata, size_t node_i, size_t opdata_size); + + std::pair getBuiltinStrings(tflite::BuiltinOperator op, + const void* data); // Write IntArray with variable declaration. void writeIntArray(const TfLiteIntArray &arr, const std::string &name); @@ -24,10 +31,41 @@ class CodeWriter { void writeQuantization(const TfLiteQuantization &q, const std::string &name); -#if TF_LITE_PACKED_QUANTIZED_DATA_VERSION == 100 - void writeQuantizationDetails(const TfLiteQuantization &q, - const std::string &name); -#endif + void writeTensorArena(size_t tensor_arena_size); + +template +void writeArray(const Container &container, size_t elt_size, bool is_const, + const char *decl, const char *name ) { + + out_ << decl << ' ' << name << R"([] = { +)"; + size_t elts = 0; + for (auto &e : container) { + out_ << std::to_string(e) << ","; + ++elts; + if (elts % 10 == 0) { + out_ << "\n"; + } else { + out_ << " "; + } + } + // To suppress warnings add dummy element if no scratch bufs + if (container.empty()) { + out_ << "0 // dummy to avoid empty vector"; + } + out_ << R"( +}; +)"; + + size_t footprint = elt_size * container.size(); + if (is_const) { + const_data_usage_ += footprint; + } else { + init_data_usage_ += footprint; + } +} + + template CodeWriter &operator<<(T &&value) { @@ -35,9 +73,20 @@ class CodeWriter { return *this; } + inline size_t initDataUsage() const { return init_data_usage_; } + + inline size_t uninitDataUsage() const { return uninit_data_usage_; } + + inline size_t constDataUsage() const { return const_data_usage_; } + private: std::ostream &out_; const tflite::SubGraph *subgraph_ = nullptr; + tflite::ErrorReporter &err_reporter_; + + size_t init_data_usage_; + size_t uninit_data_usage_; + size_t const_data_usage_; }; } // namespace tflmc diff --git a/src/Compiler.cc b/src/Compiler.cc index 53fe3e0..82a957a 100644 --- a/src/Compiler.cc +++ b/src/Compiler.cc @@ -1,34 +1,217 @@ #include "Compiler.h" - #include #include +#include #include #include +#include #include "CodeWriter.h" #include "CustomOperators.h" #include "RecordAllocations.h" +#include "Options.h" #include "TypeToString.h" -#include "tensorflow/lite/version.h" +#include "tensorflow/lite/c/common.h" -#ifndef SUFFICIENT_ARENA_SIZE -#define SUFFICIENT_ARENA_SIZE (128*1024*1024) + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA +#include "tflite_u_preint/static_init_support.h" #endif -#if TF_LITE_PACKED_QUANTIZED_DATA_VERSION -#if TF_LITE_PACKED_QUANTIZED_DATA_VERSION != 100 -#error "ONLY TF_LITE_PACKED_QUANTIZED_DATA_VERSION Version 100 supported!" +#ifndef SUFFICIENT_ARENA_SIZE +#define SUFFICIENT_ARENA_SIZE (128*1024*1024) #endif + +#ifndef SUFFICIENT_ARENA_ALIGNMENT +#define SUFFICIENT_ARENA_ALIGNMENT (16) #endif -bool tflmc::CompileFile(const std::string &modelFileName, - const std::string &outFileName, +const static int ILLEGAL_IF_EVER_MULTIPLE_SUBGRAPH = 0xdeadbeef; + + + +namespace tflmc +{ + + /** + * @brief Generation of specialized TensorInfo_t POD struct + * + */ + struct GeneratedTensorInfo { + + struct Full_t{ + TfLiteType type; + void* data; + TfLiteIntArray* dims; + size_t bytes; + TfLiteQuantization quantization; + bool is_variable; + }; + + static std::string generated(bool has_type, bool has_quantization, bool has_is_variable) { + + std::stringstream wr; + + wr << R"( +struct TensorInfo_t { // subset of TfLiteTensor used for initialization from constant memory +)"; + if (has_type) { + wr << " TfLiteType type;\n"; + } + wr << R"( void* data; + TfLiteIntArray* dims; + size_t bytes; +)"; + if (has_quantization) { + wr << " TfLiteQuantization quantization;\n"; + } + if (has_is_variable) { + wr << " bool is_variable;\n"; + } + wr << "};\n"; + return wr.str(); + } + + struct TrailingBoolField { + bool a_bool; + }; + + static size_t size(bool has_type, bool has_quantization, bool has_is_variable) { + auto size = sizeof(Full_t); + if (!has_type) { size -= sizeof(TfLiteType); } + if (!has_quantization) { size -= sizeof(TfLiteQuantization); } + // Dangling bool... prboably more accurate than simply sizeof(bool) + // once alignment / packing constraints are accounted for. + if (!has_is_variable) { size -= sizeof(TrailingBoolField); } + return size; + } + }; + + /** + * @brief Generation of specialized NodeInfo_t POD struct + * + */ + + struct GeneratedNodeInfo { + + enum used_operators_e { DUMMY_OP_INDEX, LAST_OP }; + + struct Full_t { + struct TfLiteIntArray* inputs; + struct TfLiteIntArray* outputs; + void* builtin_data; + used_operators_e used_op_index; + int custom_initial_data_size; + }; + + + static std::string generated(bool has_custom_ops) { + + std::stringstream wr; + wr << R"( +struct NodeInfo_t { // subset of TfLiteNode used for initialization from constant memory + struct TfLiteIntArray* inputs; + struct TfLiteIntArray* outputs; + void* builtin_data; + used_operators_e used_op_index; + )"; + if (has_custom_ops) { + wr << " int custom_initial_data_size;\n"; + } + wr << "};\n"; + return wr.str(); + } + + static size_t size(bool has_custom_ops) { + auto size = sizeof(Full_t); + if (!has_custom_ops) size -= sizeof(int); + return size; + } + }; +} // namespace tflmc + +static std::vector flat_namespaced_ops({ + tflite::BuiltinOperator_ADD, + tflite::BuiltinOperator_ADD_N, + tflite::BuiltinOperator_ASSIGN_VARIABLE, + tflite::BuiltinOperator_AVERAGE_POOL_2D, + tflite::BuiltinOperator_BATCH_TO_SPACE_ND, + tflite::BuiltinOperator_CALL_ONCE, + tflite::BuiltinOperator_CAST, + tflite::BuiltinOperator_CONV_2D, + tflite::BuiltinOperator_CUMSUM, + tflite::BuiltinOperator_DEPTH_TO_SPACE, + tflite::BuiltinOperator_DEPTHWISE_CONV_2D, + tflite::BuiltinOperator_DIV, + tflite::BuiltinOperator_ELU, + tflite::BuiltinOperator_EXP, + tflite::BuiltinOperator_EXPAND_DIMS, + tflite::BuiltinOperator_FILL, + tflite::BuiltinOperator_FLOOR_DIV, + tflite::BuiltinOperator_FLOOR_MOD, + tflite::BuiltinOperator_FULLY_CONNECTED, + tflite::BuiltinOperator_GATHER, + tflite::BuiltinOperator_GATHER_ND, + tflite::BuiltinOperator_HARD_SWISH, + tflite::BuiltinOperator_IF, + tflite::BuiltinOperator_L2_POOL_2D, + tflite::BuiltinOperator_LEAKY_RELU, + tflite::BuiltinOperator_LOG_SOFTMAX, + tflite::BuiltinOperator_LOGICAL_AND, + tflite::BuiltinOperator_LOGICAL_OR, + tflite::BuiltinOperator_LOGISTIC, + tflite::BuiltinOperator_MAX_POOL_2D, + tflite::BuiltinOperator_MIRROR_PAD, + tflite::BuiltinOperator_MUL, + tflite::BuiltinOperator_PRELU, + tflite::BuiltinOperator_QUANTIZE, + tflite::BuiltinOperator_READ_VARIABLE, + tflite::BuiltinOperator_RELU, + tflite::BuiltinOperator_RELU6, + tflite::BuiltinOperator_RESIZE_BILINEAR, + tflite::BuiltinOperator_SHAPE, + tflite::BuiltinOperator_SLICE, + tflite::BuiltinOperator_SOFTMAX, + tflite::BuiltinOperator_SPACE_TO_BATCH_ND, + tflite::BuiltinOperator_SPACE_TO_DEPTH, + tflite::BuiltinOperator_SQUEEZE, + tflite::BuiltinOperator_SUB, + tflite::BuiltinOperator_SVDF, + tflite::BuiltinOperator_TRANSPOSE, + tflite::BuiltinOperator_TRANSPOSE_CONV, + tflite::BuiltinOperator_VAR_HANDLE, + tflite::BuiltinOperator_ZEROS_LIKE + }) +; + + +static std::vector graph_dependent_ops({ + + tflite::BuiltinOperator_ASSIGN_VARIABLE, + tflite::BuiltinOperator_CALL_ONCE, + tflite::BuiltinOperator_IF, + tflite::BuiltinOperator_VAR_HANDLE, + tflite::BuiltinOperator_READ_VARIABLE, + }) +; + +int tflmc::Compiler::TrackingErrorReporter::Report(const char* format, va_list args) { + vfprintf(stderr, format, args); + error_reported_ = true; + return 0; +} + + + +bool tflmc::CompileFile(const std::string &modelPathName, + const std::string &outSrcPathName, + const std::string &outHdrPathName, const std::string &prefix) { // Load model flatbuffer. - std::ifstream model_file(modelFileName, std::ios::binary | std::ios::ate); + std::ifstream model_file(modelPathName, std::ios::binary | std::ios::ate); if (!model_file) { - std::cerr << "Could not open " << modelFileName << " for read\n"; + std::cerr << "Could not open " << modelPathName << " for read\n"; return false; } auto sz = model_file.tellg(); @@ -43,23 +226,26 @@ bool tflmc::CompileFile(const std::string &modelFileName, return false; } - std::ofstream outFile(outFileName); + + std::ofstream outFile(outSrcPathName); if (!outFile) { - std::cerr << "Failed to create output file\n"; + std::cerr << "Failed to create output source file: " << outSrcPathName << std::endl;; return false; } - std::ofstream outHeaderFile(outFileName + ".h"); + std::ofstream outHeaderFile(outHdrPathName); if (!outHeaderFile) { - std::cerr << "Failed to create output header file\n"; + std::cerr << "Failed to create output header file: " << outHdrPathName << std::endl; return false; } try { Compiler compiler(model_data.data(), prefix); + compiler.writeSource(outFile); compiler.writeHeader(outHeaderFile); - return true; + compiler.reportMemUsage(); + return compiler.noErrorsReported(); } catch (const std::exception &e) { std::cerr << e.what() << "\n"; } catch (...) { @@ -70,7 +256,10 @@ bool tflmc::CompileFile(const std::string &modelFileName, } tflmc::Compiler::Compiler(const void *modelData, const std::string &prefix) - : prefix_(prefix) { + : prefix_(prefix) + , arena_(SUFFICIENT_ARENA_SIZE, SUFFICIENT_ARENA_ALIGNMENT) { + aligned_arena_start_ = arena_.alginedBufferStart(); + arena_size_ = SUFFICIENT_ARENA_SIZE; if (!init(modelData)) { throw std::runtime_error("Could not set up compiler"); } @@ -104,14 +293,24 @@ bool tflmc::Compiler::init(const void *modelData) { for (auto outIndex : *subgraph_->outputs()) { outputTensorIndices_.push_back(outIndex); } - tflmc::custom_operator_handle custom = tflmc::LoadCustom(&resolver_); + tflmc::custom_operator_handle custom = + tflmc::LoadCustom(static_cast(&resolver_)); // Build an interpreter to run the model with. - arena_buf_.resize(SUFFICIENT_ARENA_SIZE); + interpreter_ = std::unique_ptr( new tflite::MicroInterpreter( - model_, resolver_, arena_buf_.data(), arena_buf_.size(), - µErrReporter_)); + model_, resolver_, aligned_arena_start_, arena_size_, + &errReporter())); + + // Now know model size etc so we can initialize (tables) + // in tensor arena memory map. + arenaMap_.init(interpreter_->operators_size()); + +#if TFLMC_USE_INTERPRETER_HOOKS + // Activate hooks to record memory alliocations to fill _arenaMaop etc. + tflmc::SetRecordAllocationhooks( interpreter_.get(), aligned_arena_start_, arena_size_); +#endif // Allocate memory from the tensor_arena for the model's tensors. TfLiteStatus allocate_status = interpreter_->AllocateTensors(); @@ -121,48 +320,46 @@ bool tflmc::Compiler::init(const void *modelData) { } ptrdiff_t ramTensorBufferSize = 0; - ptrdiff_t romOffset = 0; auto numTensors = tensors->size(); if (numTensors > 0) { auto tensor = GetTensor(interpreter_.get(), 0); common_tensor_type = tensor->type; - common_tensor_is_variable = tensor->is_variable; } for (size_t i = 0; i < numTensors; i++) { auto tensor = GetTensor(interpreter_.get(), i); tensors_.push_back({tensor}); - if (tensor->allocation_type == kTfLiteMmapRo) { - memMap_.recordROM(romOffset, tensor->bytes, getTensorName(i)); - romOffset += tensor->bytes; - } else { - ptrdiff_t offset = (uint8_t *)tensor->data.data - arena_buf_.data(); + if (tensor->allocation_type != kTfLiteMmapRo) { + ptrdiff_t offset = (uint8_t *)tensor->data.data - aligned_arena_start_; ptrdiff_t highSize = offset + tensor->bytes; ramTensorBufferSize = std::max(ramTensorBufferSize, highSize); - memMap_.recordRAM(offset, tensor->bytes, getTensorName(i)); + arenaMap_.recordPersistent(offset, tensor->bytes, getTensorName(i)); } + // determine whether we need to individually set these properties for each // tensor - if ((!has_quantization) && - tensor->quantization.type != kTfLiteNoQuantization) { - has_quantization = true; - } + has_quantization |= ( tensor->quantization.type != kTfLiteNoQuantization); if ((!common_tensor_type.None) && common_tensor_type.Some != tensor->type) { common_tensor_type.clear(); } - if ((!common_tensor_is_variable.None) && - common_tensor_is_variable.Some != tensor->is_variable) { - common_tensor_is_variable.clear(); - } + has_is_variable |= tensor->is_variable; } + int unsupported_ops = 0; for (size_t i = 0; i < interpreter_->operators_size(); i++) { - auto nodeAndReg = interpreter_->node_and_registration(i); + auto nodeAndReg = interpreter_->node_and_registration(ILLEGAL_IF_EVER_MULTIPLE_SUBGRAPH,i); auto node = &nodeAndReg.node; auto reg = nodeAndReg.registration; auto code = tflite::EnumValuesBuiltinOperator()[reg->builtin_code]; - printf("operation %lu: %s\n", i, tflite::EnumNamesBuiltinOperator()[code]); - + std::cout << "operation " << i + << ": " << tflite::EnumNamesBuiltinOperator()[code]; + + if (std::find(graph_dependent_ops.begin(), graph_dependent_ops.end(), code) != graph_dependent_ops.end()) { + std::cout << " - requires operator graph access(unsupported)" << std::endl; + ++unsupported_ops; + } else { + std::cout << std::endl; + } RegistrationInfo regInfo; regInfo.reg = reg; regInfo.code = code; @@ -180,85 +377,174 @@ bool tflmc::Compiler::init(const void *modelData) { nodes_.push_back(NodeInfo{*node, itOp - registrations_.begin()}); } - auto runtimeAllocations = tflmc::RecordAllocations(model_, SUFFICIENT_ARENA_SIZE); - ptrdiff_t minRuntimeOffset = 0; // These are negative so zero start is fine. - for (const auto &alloc : runtimeAllocations) { - minRuntimeOffset = std::min(minRuntimeOffset, alloc.offset); + if (unsupported_ops > 0 ) { + errReporter().Report("Model includes %d unsupported operators", unsupported_ops); + return false; + } + + for (size_t i = 0; i < registrations_.size(); i++) { + std::string opName; + auto code = registrations_[i].code; + if (code == tflite::BuiltinOperator_CUSTOM) { + opName = registrations_[i].custom_name; + } else { + opName = tflite::EnumNameBuiltinOperator(code); + } + } - size_t totalRuntimeAllocSize = 0; + +#if TFLMC_USE_INTERPRETER_HOOKS + tflmc::RecordScratchBufferAllocations(interpreter_.get()); +#else + tflmc::RecordAllocations(model_, SUFFICIENT_ARENA_SIZE, SUFFICIENT_ARENA_ALIGNMENT); +#endif + auto runtimeAllocations = tflmc::RecordedAllocations(); + for (const auto &alloc : runtimeAllocations) { - // TODO: This drops the alignment between buffers. Is this fine? - totalRuntimeAllocSize += alloc.len; - ptrdiff_t offset = alloc.offset - minRuntimeOffset + ramTensorBufferSize; - memMap_.recordRAM(offset, alloc.len, - "PersistentBuf" + std::to_string(alloc.nodeIndex)); + switch( alloc.kind ) { + case tflmc::AllocKind::Persistent : + arenaMap_.recordPersistent(alloc.offset, alloc.len, + "PersistentBuf_" + std::to_string(alloc.nodeIndex)); + break; + case tflmc::AllocKind::Scratch : + arenaMap_.recordScratchBuf(alloc.buffer_index, alloc.offset, alloc.len, alloc.nodeIndex, + "ScratchBuf_" + std::to_string(alloc.nodeIndex) + "_" + std::to_string(alloc.buffer_index)); + break; + default: + assert(false && "Urecognized allocation kind"); + } + + } - // This includes: + + // At this point memMap only records the tensor arena. // - Tensors // - Scratch buffers // - Persistent buffers - // tensor metadata is not included, since we declare them outside the arena - arenaBufferSize_ = ramTensorBufferSize + totalRuntimeAllocSize; - - // TODO: This is overestimating by quite a bit because of ABI differences. - size_t tensorMetaSize = tensors_.size() * sizeof(TfLiteTensor); - size_t nodeMetaSize = nodes_.size() * sizeof(TfLiteNode); - memMap_.recordRAM(arenaBufferSize_, tensorMetaSize, "TensorMetadata"); - memMap_.recordRAM(arenaBufferSize_ + tensorMetaSize, nodeMetaSize, - "NodeMetadata"); - memMap_.recordRAM(arenaBufferSize_ + tensorMetaSize + nodeMetaSize, - sizeof(TfLiteContext), "TfLiteContext"); - - memMap_.report(); - tflmc::UnloadCustom(custom); + // Required arena size is end of ram memory usage after we have + // compacted it. Currently merely by stripping the largest + // gap (usual the gap between head/tail of arena) + arenaMap_.stripLargestGap(SUFFICIENT_ARENA_ALIGNMENT); + tflmc::UnloadCustom(custom); return true; } -void tflmc::Compiler::writeSource(std::ostream &out) { - CodeWriter wr(out, subgraph_); +void tflmc::Compiler::finalizeMemMap(const CodeWriter &wr) +{ + size_t tensorMetaSize = tensors_.size() * (sizeof(TfLiteTensor)+sizeof(TfLiteEvalTensor)); + uninitMemMap_.record(tensorMetaSize, "TfliteTensorTables"); - wr << R"( -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/kernels/micro_ops.h" + auto TensorInfo_t_size = + tflmc::GeneratedTensorInfo::size(!common_tensor_type.None, has_quantization, has_is_variable); + size_t tensorInfoSize = tensors_.size() * TensorInfo_t_size; + constMemMap_.record(tensorInfoSize, "TensorInfo"); -#if defined __GNUC__ -#define ALIGN(X) __attribute__((aligned(X))) -#elif defined _MSC_VER -#define ALIGN(X) __declspec(align(X)) -#elif defined __TASKING__ -#define ALIGN(X) __align(X) + initMemMap_.record(sizeof(TfLiteContext), "TfLiteContext"); + constMemMap_.record(sizeof(TfLiteContext), "TfLiteContext"); + + auto NodeInfo_t_size = tflmc::GeneratedNodeInfo::size(has_custom_ops); + size_t nodeMetaSize = nodes_.size() * NodeInfo_t_size; + constMemMap_.record(nodeMetaSize, "NodeDataTable"); + + size_t registrationsSize = registrations_.size() * sizeof(TfLiteRegistration); + initMemMap_.record(registrationsSize, "OpRegistrations"); + + constMemMap_.record(wr.constDataUsage(), "TensorAndOpdata"); + initMemMap_.record(wr.initDataUsage(), "TensorAndOpdata"); + uninitMemMap_.record(wr.uninitDataUsage(), "TensorAndOpdata"); + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + constMemMap_.record(tflite::micro::constDataUsage(), "OpUserData"); + initMemMap_.record(tflite::micro::initDataUsage(), "OpUserData"); + uninitMemMap_.record(tflite::micro::uninitDataUsage(), "OpUserData"); #endif -)"; +} + + +void tflmc::Compiler::reportMemUsage() +{ + + size_t romUsage = constMemMap_.size() + initMemMap_.size(); + std::fstream memmap_json; + auto options = Options::instance(); + if (!options.memmap_json.empty()) { + memmap_json.open(options.memmap_json, std::fstream::out); + if (!memmap_json) { + std::cerr << "Could not open '" << options.memmap_json << "' for writing." << std::endl; + exit(1); + } + + memmap_json << "{" << std::endl; + } + if (memmap_json.is_open()) { + memmap_json << "\"rodata\": " << constMemMap_.size() << "," << std::endl; + memmap_json << "\"data\": " << initMemMap_.size() << "," << std::endl; + memmap_json << "\"bss\": " << uninitMemMap_.size() << "," << std::endl; + } + std::cout << "ROM summary: "<< romUsage << " bytes total" << std::endl; + if (memmap_json.is_open()) { + memmap_json << "\"rom\": " << romUsage << "," << std::endl; + } + + size_t ramUsage = uninitMemMap_.size() + initMemMap_.size(); + if (memmap_json.is_open()) { + memmap_json << "\"ram\": " << ramUsage << std::endl; + memmap_json << "}" << std::endl; + memmap_json.close(); + if (!memmap_json) { + std::cerr << "Could not write '" << options.memmap_json << "'." << std::endl; + exit(1); + } + } + + constMemMap_.report("const data (.rodata)"); + initMemMap_.report("initalized data (.data)"); + uninitMemMap_.report("uninitalized data (.bss)"); + arenaMap_.report("Tensor Arena details"); +} + + +void tflmc::Compiler::writeCustomRegistrationsSource(CodeWriter &wr) { + // declare custom registrations if (has_custom_ops) { wr << R"(namespace tflite { -namespace ops { namespace micro { )"; for (size_t i = 0; i < registrations_.size(); i++) { if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM) { - wr << "extern TfLiteRegistration Register_" + wr << "extern TfLiteRegistration *Register_" << registrations_[i].custom_name << "(void);\n"; } } wr << R"(} // namespace micro -} // namespace ops } // namespace tflite )"; } +} + + + +void tflmc::Compiler::writeTypesAndWorkingArraysSource(CodeWriter &wr) { + wr << R"(namespace { -constexpr int kTensorArenaSize = )" - << arenaBufferSize_ << R"(; -uint8_t tensor_arena[kTensorArenaSize] ALIGN(16); +)"; + + wr.writeTensorArena(arenaMap_.size()); + wr << R"( + template struct TfArray { int sz; T elem[SZ]; }; +)"; + + wr << R"( + enum used_operators_e { )"; for (size_t i = 0; i < registrations_.size(); i++) { @@ -269,43 +555,43 @@ enum used_operators_e { << ", "; } } + wr << R"( OP_LAST }; -struct TensorInfo_t { // subset of TfLiteTensor used for initialization from constant memory -)"; - if (common_tensor_type.None) { - wr << " TfLiteType type;\n"; - } - wr << R"( void* data; - TfLiteIntArray* dims; - size_t bytes; -)"; - if (has_quantization) { - wr << " TfLiteQuantization quantization;\n"; - } - if (common_tensor_is_variable.None) { - wr << " bool is_variable;\n"; - } - wr << R"(}; -struct NodeInfo_t { // subset of TfLiteNode used for initialization from constant memory - struct TfLiteIntArray* inputs; - struct TfLiteIntArray* outputs; - void* builtin_data; - used_operators_e used_op_index; + )"; - if (has_custom_ops) { - wr << " int custom_initial_data_size;\n"; - } - wr << R"(}; + + wr << + tflmc::GeneratedTensorInfo::generated(common_tensor_type.None, has_quantization, has_is_variable); + + wr << + tflmc::GeneratedNodeInfo::generated(has_custom_ops); + + wr << R"( TfLiteContext ctx{}; -TfLiteTensor tflTensors[)" - << tensors_.size() << R"(]; + +// Tensor table with space for -1-th element used +// designate missing optional inputs/outputs. +TfLiteTensor tflTensorsWithMinus1[)" + << tensors_.size()+1u << R"(]; + TfLiteEvalTensor evalTensors[)" << tensors_.size() << R"(]; + +TfLiteTensor * const tflTensors = tflTensorsWithMinus1+1; + TfLiteRegistration registrations[OP_LAST]; -TfLiteNode tflNodes[)" - << nodes_.size() << R"(]; +)"; + +} + + +void tflmc::Compiler::writeTflNodesSource(CodeWriter &wr) { + + wr << "constexpr size_t kOpNodesCount = " << nodes_.size() <<";\n\n"; + wr << R"( +TfLiteNode tflNodes[kOpNodesCount]; )"; for (size_t i = 0; i < tensors_.size(); i++) { @@ -315,19 +601,13 @@ TfLiteNode tflNodes[)" } wr.writeIntArray(*t->dims, "tensor_dimension" + std::to_string(i)); wr.writeQuantization(t->quantization, "quant" + std::to_string(i)); -#if TF_LITE_PACKED_QUANTIZED_DATA_VERSION - wr.writeQuantizationDetails(t->quantization, "quant_details" + std::to_string(i)); -#endif } for (size_t i = 0; i < nodes_.size(); i++) { auto &node = nodes_[i].node; auto ®Info = registrations_[nodes_[i].regIndex]; if (regInfo.code == tflite::BuiltinOperator_CUSTOM) { - wr << "uint8_t ALIGN(4) opdata" + std::to_string(i) << "[" - << node.custom_initial_data_size << "] = { "; - for (int j = 0; j < node.custom_initial_data_size; ++j) - wr << int(((uint8_t const *)node.custom_initial_data)[j]) << ", "; - wr << " }; /* custom_initial_data */\n"; + wr.writeCustom((uint8_t const *)node.custom_initial_data, i, + node.custom_initial_data_size); } else { wr.writeBuiltin(regInfo.code, node.builtin_data, "opdata" + std::to_string(i)); @@ -335,6 +615,11 @@ TfLiteNode tflNodes[)" wr.writeIntArray(*node.inputs, "inputs" + std::to_string(i)); wr.writeIntArray(*node.outputs, "outputs" + std::to_string(i)); } +} + + +void tflmc::Compiler::writeTensorDataSource(CodeWriter &wr) { + wr << R"(const TensorInfo_t tensorData[] = { )"; for (size_t i = 0; i < tensors_.size(); i++) { @@ -347,7 +632,7 @@ TfLiteNode tflNodes[)" wr << "(void*)tensor_data" << i; } else { wr << "tensor_arena + " - << ((uintptr_t)t->data.data - (uintptr_t)arena_buf_.data()); + << ((uintptr_t)t->data.data - (uintptr_t)aligned_arena_start_); } wr << ", " << "(TfLiteIntArray*)&tensor_dimension" << i << ", "; @@ -357,29 +642,29 @@ TfLiteNode tflNodes[)" wr << "{kTfLiteAffineQuantization, " "const_cast(static_cast(&quant" << i << ")) "; +#if SUPPORT_CUSTOM_QUANT + } else if (t->quantization.type == kTfLitePackedAffineQuantization) { + wr << "{kTfLitePackedAffineQuantization, " + "const_cast(static_cast(&quant" + << i << ")) "; +#endif // SUPPORT_CUSTOM_QUANT } else { wr << "{kTfLiteNoQuantization, nullptr "; } -#if TF_LITE_PACKED_QUANTIZED_DATA_VERSION - if (t->quantization.details.type == kTfLiteSub8BitPackedUniformDetail) { - wr << ", {kTfLiteSub8BitPackedUniformDetail, " - "{&quant_details" - << i << "}}"; - } else { - wr << ", {kTfLiteNoDetails, {}}"; - } -#endif + wr << "},"; } - if (common_tensor_is_variable.None) { - wr << std::to_string(t->is_variable) - << ", "; // TODO: is there a bool to string? + if (has_is_variable) { + wr << t->is_variable << ", "; } wr << "},\n"; } wr << "};\n"; - wr << R"(const NodeInfo_t nodeData[] = { +} + +void tflmc::Compiler::writeNodeDataSource(CodeWriter &wr) { + wr << R"(const NodeInfo_t nodeData[kOpNodesCount] = { )"; for (size_t i = 0; i < nodes_.size(); i++) { wr << " { (TfLiteIntArray*)&inputs" << i << ", "; @@ -406,11 +691,45 @@ TfLiteNode tflNodes[)" } wr << "},\n"; } - wr << "};"; - // TODO: This code assumes that persistent allocations are made from the end - // (which is true for the current implementation) + wr << "};\n\n"; + +} + + +void tflmc::Compiler::writeScratchBufferOffsets(CodeWriter &wr) { + + // Complication: nodes with offline pre-computed user_data (OpData) + // won't actually call RequestScratchBufferInArena + // so we need to compute correct next_scratch_buffer_idx for each node + // from calls made during pre-interpretation + wr << R"( + // Used by RequestScratchBufferInArena to generate buffer index + // for each request. Reset for each node from _init to allow + // for nodes omitting calls as scratch buffer indexes is in pre-computed OpData + int next_scratch_buffer_idx; + )"; + + wr.writeArray(arenaMap_.nodesScratchBufferAllocationCounts(), sizeof(uint8_t), true, + "const uint8_t", "node_scratch_buffer_requests" + ); + wr << "\n"; + wr.writeArray(arenaMap_.scratchBufOffsets(), sizeof(size_t), true, + "const size_t", "scratchbuf_offsets"); +} + + + +void tflmc::Compiler::writeContextAllocationHandlersSource(CodeWriter &wr) { + + // We assume that persistent allocations are made from the end + // of the arena downwards. We should really have have a CI test + // to verify this explicitly but it is VERY unlikely the other + // tests will pass if tflite(u) changes this one day. + // Obviously adding support for external memory allocation + // would complicate this... + wr << R"( -static void* AllocatePersistentBuffer(struct TfLiteContext* ctx, +void *AllocatePersistentBuffer(struct TfLiteContext* ignore, size_t bytes) { static uint8_t *AllocPtr = tensor_arena + sizeof(tensor_arena); @@ -418,22 +737,152 @@ static void* AllocatePersistentBuffer(struct TfLiteContext* ctx, return AllocPtr; } -static TfLiteEvalTensor *GetEvalTensor(const struct TfLiteContext *context, +TfLiteEvalTensor *GetEvalTensor(const struct TfLiteContext *ignore, int tensor_idx) { return &evalTensors[tensor_idx]; } -} // namespace +)"; + +// Scratch buffers are "easy" - we simply re-use the allocations +// from our offline init/prepare phases. This must of course +// match the target build. Worse case same kernel library, +// target compiler settings and target compiler. +// Complication: nodes with offline pre-computed user_data (OpData) +// won't actually call RequestScratchBufferInArena +// so we record the calls each node made and corect next_scratch_buffer_idx +// from that after each prepare call. -TfLiteStatus )" + wr << R"( +TfLiteStatus RequestScratchBufferInArena(TfLiteContext *ignored, + size_t bytes_ignored, + int *buffer_idx) { + *buffer_idx = next_scratch_buffer_idx; + ++next_scratch_buffer_idx; + return kTfLiteOk; +} + +void* GetScratchBuffer(struct TfLiteContext *ignore, int buffer_idx) { + return tensor_arena + scratchbuf_offsets[buffer_idx]; +} +)"; + +} + + +void tflmc::Compiler::writeMicroContextSource(CodeWriter &wr) { + + wr << R"( +class )" << prefix_ << R"(PreinterpretedMicroContext : public tflite::MicroContext { + public: + )" << prefix_ << R"(PreinterpretedMicroContext() : + tflite::MicroContext(nullptr, nullptr, nullptr) {} + + // Allocate persistent buffer which has the same life time as the interpreter. + // Returns nullptr on failure. + // The memory is allocated from the tail. + // This method is only available in Init or Prepare stage. + // Virtual so that it can be faked for kernel tests. + virtual void* AllocatePersistentBuffer(size_t bytes) { + return ::AllocatePersistentBuffer(nullptr, bytes); + } + + // Request a scratch buffer in the arena through static memory planning. + // This method is only available in Prepare stage and the buffer is allocated + // by the interpreter between Prepare and Eval stage. In Eval stage, + // GetScratchBuffer API can be used to fetch the address. + // Virtual so that it can be faked for kernel tests. + virtual TfLiteStatus RequestScratchBufferInArena(size_t bytes, + int* buffer_idx) { + return ::RequestScratchBufferInArena(nullptr, bytes, buffer_idx); + } + + // Get the scratch buffer pointer. + // This method is only available in Eval stage. + // Virtual so that it can be faked for kernel tests. + virtual void* GetScratchBuffer(int buffer_idx) { + return ::GetScratchBuffer(nullptr, buffer_idx); + } + + // Returns a temporary TfLiteTensor struct for a given index. + // Virtual so that it can be faked for kernel tests. + virtual TfLiteTensor* AllocateTempTfLiteTensor(int tensor_idx) { + return tensor_idx >= 0 ? &tflTensors[tensor_idx] : nullptr; + } + + // Returns a temporary TfLiteTensor struct for the specified input tensor of a + // given mode. This is the recommended API over the deprecated + // GetInput/GetInputSafe to get a temp input tensor. The returned tensor shall + // be freed via calling DeallocateTempTfLiteTensor. + virtual TfLiteTensor* AllocateTempInputTensor(const TfLiteNode* node, + int index) { + return AllocateTempTfLiteTensor(node->inputs->data[index]); + } + + // Returns a temporary TfLiteTensor struct for the specified output tensor of + // a given mode. This is the recommended API over the deprecated + // GetOutput/GetOutputSafe to get a temp output tensor. The returned tensor + // shall be freed via calling DeallocateTempTfLiteTensor. + virtual TfLiteTensor* AllocateTempOutputTensor(const TfLiteNode* node, + int index) { + return AllocateTempTfLiteTensor(node->outputs->data[index]); + } + + // Deallocates a temp TfLiteTensor. + // Virtual so that it can be faked for kernel tests. + virtual void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) { + // No-op + } + + // Returns a TfLiteEvalTensor struct for a given index. + // Virtual so that it can be faked for kernel tests. + virtual TfLiteEvalTensor* GetEvalTensor(int tensor_idx) { + return ::GetEvalTensor(nullptr, tensor_idx); + } + + + // Does not take ownership of the pointer and the pointer must refer to valid + // an object that outlive this class instance. + // This can only be called once to set one external context. + TfLiteStatus set_external_context(void* external_context_payload); + + void* external_context() { return external_context_payload_; } +protected: + void* external_context_payload_ = nullptr; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +)"; + +} + + +void tflmc::Compiler::writeInitSource(CodeWriter &wr) { + + wr << R"(extern "C" TfLiteStatus )" << prefix_ << R"(init() { ctx.AllocatePersistentBuffer = &AllocatePersistentBuffer; + ctx.RequestScratchBufferInArena = &RequestScratchBufferInArena; + ctx.GetScratchBuffer = &GetScratchBuffer; ctx.GetEvalTensor = &GetEvalTensor; ctx.tensors = tflTensors; )"; wr << " ctx.tensors_size = " << tensors_.size() << ";\n"; + + wr << R"( + static )" << prefix_ << R"(PreinterpretedMicroContext u_ctx; + ctx.impl_ = static_cast(&u_ctx); +)"; + // TODO: Do we really support variable tensors? // TODO: Do we encounter other than kTfLiteMmapRo and kTfLiteArenaRw, if so we // need to store the type separately. + + wr << R"( + TfLiteIntArray dimsEmptyTensor = {0}; + tflTensors[-1].dims = &dimsEmptyTensor; + tflTensors[-1].data.raw = nullptr; +)"; wr << " for(size_t i = 0; i < " << tensors_.size() << R"(; ++i) { tflTensors[i].data.data = tensorData[i].data; evalTensors[i].data.data = tensorData[i].data; @@ -447,11 +896,10 @@ TfLiteStatus )" wr << " evalTensors[i].type = " << tflmc::to_string(common_tensor_type.Some) << ";\n"; } - if (common_tensor_is_variable.None) { + if (has_is_variable) { wr << " tflTensors[i].is_variable = tensorData[i].is_variable;\n"; } else { - wr << " tflTensors[i].is_variable = " - << std::to_string(common_tensor_is_variable.Some) << ";\n"; + wr << " tflTensors[i].is_variable = false;\n"; } wr << R"( tflTensors[i].allocation_type = (tensor_arena <= tensorData[i].data && tensorData[i].data < tensor_arena + kTensorArenaSize) ? kTfLiteArenaRw : kTfLiteMmapRo; tflTensors[i].bytes = tensorData[i].bytes; @@ -464,6 +912,10 @@ TfLiteStatus )" TfLiteAffineQuantization const* quant = ((TfLiteAffineQuantization const*)(tensorData[i].quantization.params)); tflTensors[i].params.scale = quant->scale->data[0]; tflTensors[i].params.zero_point = quant->zero_point->data[0]; + } else if (tflTensors[i].quantization.type == kTfLitePackedAffineQuantization) { + TfLitePackedAffineQuantization const* quant = (TfLitePackedAffineQuantization const*)(tensorData[i].quantization.params); + tflTensors[i].params.scale = quant->affine.scale->data[0]; + tflTensors[i].params.zero_point = quant->affine.zero_point->data[0]; } )"; } else { @@ -471,18 +923,25 @@ TfLiteStatus )" } wr << R"( } )"; + for (size_t i = 0; i < registrations_.size(); i++) { std::string opName; - if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM) { + auto code = registrations_[i].code; + if (code == tflite::BuiltinOperator_CUSTOM) { opName = registrations_[i].custom_name; } else { - opName = tflite::EnumNameBuiltinOperator(registrations_[i].code); + opName = tflite::EnumNameBuiltinOperator(code); } - wr << " registrations[OP_" << opName << "] = tflite::ops::micro::Register_" - << opName << "();\n"; } wr << "\n"; - wr << " for(size_t i = 0; i < " << nodes_.size() << R"(; ++i) { +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + wr << R"( +#if TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA +tflite::micro::resetOfflineOpUserData( tflite::micro::)" << prefix_ << R"(model::precomputed_op_user_data); +#endif // TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA +)"; +#endif + wr << R"( for(size_t i = 0; i < kOpNodesCount; ++i) { tflNodes[i].inputs = nodeData[i].inputs; tflNodes[i].outputs = nodeData[i].outputs; tflNodes[i].builtin_data = nodeData[i].builtin_data; @@ -499,45 +958,129 @@ TfLiteStatus )" } } )"; - wr << " for(size_t i = 0; i < " << nodes_.size() << R"(; ++i) { + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + wr << R"( +#if TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA +tflite::micro::resetOfflineOpUserData( tflite::micro::)" << prefix_ << R"(model::precomputed_op_user_data); +#endif // TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA +)"; +#endif + + wr << R"( size_t precomputed_sb_idx_ctr = 0; + + for(size_t i = 0; i < kOpNodesCount; ++i) { + next_scratch_buffer_idx = precomputed_sb_idx_ctr; if (registrations[nodeData[i].used_op_index].prepare) { TfLiteStatus status = registrations[nodeData[i].used_op_index].prepare(&ctx, &tflNodes[i]); if (status != kTfLiteOk) { return status; } } + precomputed_sb_idx_ctr += node_scratch_buffer_requests[i]; } return kTfLiteOk; } +)"; -static const int inTensorIndices[] = { - )"; - for (auto inIndex : inputTensorIndices_) { - out << inIndex << ", "; - } - out << R"( -}; -TfLiteTensor* )" - << prefix_ << R"(input(int index) { - return &ctx.tensors[inTensorIndices[index]]; } -static const int outTensorIndices[] = { - )"; // TODO: perhaps use a smaller type than int? - for (auto outIndex : outputTensorIndices_) { - out << outIndex << ", "; + +void tflmc::Compiler::writeTensorAccessorsSource(CodeWriter &wr) { + wr << R"( +extern "C" TfLiteTensor* )" + << prefix_ << R"(input(int index) { + static const int inTensorIndices[] = { + )"; + for (auto inIndex : inputTensorIndices_) { + wr << inIndex << ", "; + } + wr << R"( + }; + return &ctx.tensors[inTensorIndices[index]]; } - out << R"( -}; -TfLiteTensor* )" - << prefix_ << R"(output(int index) { - return &ctx.tensors[outTensorIndices[index]]; + +extern "C" TfLiteTensor* )" + << prefix_ << R"(output(int index) { + static const int outTensorIndices[] = { + )"; // TODO: perhaps use a smaller type than int? + for (auto outIndex : outputTensorIndices_) { + wr << outIndex << ", "; + } + wr << R"( + }; + return &ctx.tensors[outTensorIndices[index]]; + } + )"; + + + std::string code = R"( + +// Returns the number of input tensors. +extern "C" size_t %PREFIX%inputs() { + return )" + std::to_string(inputTensorIndices_.size()) + + R"(; +} +// Returns the number of output tensors. +extern "C" size_t %PREFIX%outputs() { + return )" + std::to_string(outputTensorIndices_.size()) + + R"(; +} + +extern "C" void *%PREFIX%input_ptr(int index) { + return %PREFIX%input(index)->data.data; +} +extern "C" size_t %PREFIX%input_size(int index) { + return %PREFIX%input(index)->bytes; +} +extern "C" int %PREFIX%input_dims_len(int index) { + return %PREFIX%input(index)->dims->size; +} +extern "C" int *%PREFIX%input_dims(int index) { + return &%PREFIX%input(index)->dims->data[0]; +} + +extern "C" void *%PREFIX%output_ptr(int index) { + return %PREFIX%output(index)->data.data; +} +extern "C" size_t %PREFIX%output_size(int index) { + return %PREFIX%output(index)->bytes; +} +extern "C" int %PREFIX%output_dims_len(int index) { + return %PREFIX%output(index)->dims->size; +} +extern "C" int *%PREFIX%output_dims(int index) { + return &%PREFIX%output(index)->dims->data[0]; +} + +)"; + + static std::regex rePrefix("%PREFIX%"); + code = std::regex_replace(code, rePrefix, prefix_); + + wr << code; + } -TfLiteStatus )" + +void tflmc::Compiler::writeInvokeSource(CodeWriter &wr) { + wr << R"( + +extern "C" TfLiteStatus )" << prefix_ << R"(invoke() { - for(size_t i = 0; i < )" - << nodes_.size() << R"(; ++i) { +)"; +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + wr << R"( +#if TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA +tflite::micro::resetOfflineOpUserData( tflite::micro::)" << prefix_ << R"(model::precomputed_op_user_data); +#endif // TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA +)"; +#endif + wr << R"( + for(size_t i = 0; i < kOpNodesCount; ++i) { +#if LOG_OP_INPUTS + tflite::logOpInvoke(&ctx, &tflNodes[i]); +#endif TfLiteStatus status = registrations[nodeData[i].used_op_index].invoke(&ctx, &tflNodes[i]); if (status != kTfLiteOk) { return status; @@ -545,11 +1088,92 @@ TfLiteStatus )" } return kTfLiteOk; } + +)"; +} + + +void tflmc::Compiler::writeSource(std::ostream &out) { + CodeWriter wr(out, subgraph_, microErrReporter_); + + wr << R"( +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/kernels/micro_ops.h" +#include "tensorflow/lite/micro/compatibility.h" +#include "tensorflow/lite/micro/micro_context.h" +#if LOG_OP_INPUTS +#include "tensorflow/lite/micro/micro_invoke_log.h" +#endif +)"; + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + wr << R"( +#if TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA +#include "tensorflow/lite/micro/kernels/ifx_common/offline_prepare_utils.h" +#endif // TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA +)"; +#endif + + wr << R"( + +#if defined __GNUC__ +#define ALIGN(X) __attribute__((aligned(X))) +#elif defined _MSC_VER +#define ALIGN(X) __declspec(align(X)) +#elif defined __TASKING__ +#define ALIGN(X) __align(X) +#endif + +)"; + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + wr << "#if TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA\n"; + tflite::micro::writeStaticOpDataHeaders(out); + wr << "#endif // TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA\n"; +#endif + + writeCustomRegistrationsSource(wr); + + writeTypesAndWorkingArraysSource(wr); + + writeTflNodesSource(wr); + + writeTensorDataSource(wr); + + writeNodeDataSource(wr); + + writeScratchBufferOffsets(wr); + + writeContextAllocationHandlersSource(wr); + +// TODO: Really need to support AllocateBufferForEval. Should be easy - just need to +// permit allocating a suitable "gap" in the arena or a dedicated scratchpad area. + +wr << R"( +} // namespace )"; + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + wr << "#if TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA\n"; + tflite::micro::writeStaticOpDataDefinitions(prefix_, out); + wr << "#endif // TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA\n"; +#endif + + writeMicroContextSource(wr); + + writeInitSource(wr); + + writeTensorAccessorsSource(wr); + + writeInvokeSource(wr); + + finalizeMemMap(wr); } + void tflmc::Compiler::writeHeader(std::ostream &out) { - tflmc::CodeWriter wr(out, subgraph_); + tflmc::CodeWriter wr(out, subgraph_, errReporter()); std::string code = R"( #ifndef %PREFIX%GEN_H @@ -557,6 +1181,18 @@ void tflmc::Compiler::writeHeader(std::ostream &out) { #include "tensorflow/lite/c/common.h" +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#define %PREFIX%MODEL_CONST_DATA_SIZE )"+ std::to_string(constMemMap_.size()) + + R"( +#define %PREFIX%MODEL_INIT_DATA_SIZE )"+ std::to_string(initMemMap_.size()) + + R"( +#define %PREFIX%MODEL_UNINIT_DATA_SIZE )"+ std::to_string(uninitMemMap_.size()) + + R"( + + // Sets up the model with init and prepare steps. TfLiteStatus %PREFIX%init(); // Returns the input tensor with the given index. @@ -567,41 +1203,38 @@ TfLiteTensor *%PREFIX%output(int index); TfLiteStatus %PREFIX%invoke(); // Returns the number of input tensors. -inline size_t %PREFIX%inputs() { - return )" + std::to_string(inputTensorIndices_.size()) + - R"(; -} +size_t %PREFIX%inputs(); + // Returns the number of output tensors. -inline size_t %PREFIX%outputs() { - return )" + std::to_string(outputTensorIndices_.size()) + - R"(; -} +size_t %PREFIX%outputs(); -inline void *%PREFIX%input_ptr(int index) { - return %PREFIX%input(index)->data.data; -} -inline size_t %PREFIX%input_size(int index) { - return %PREFIX%input(index)->bytes; -} -inline int %PREFIX%input_dims_len(int index) { - return %PREFIX%input(index)->dims->data[0]; -} -inline int *%PREFIX%input_dims(int index) { - return &%PREFIX%input(index)->dims->data[1]; -} +// Return the buffer pointer of input tensor +void *%PREFIX%input_ptr(int index); -inline void *%PREFIX%output_ptr(int index) { - return %PREFIX%output(index)->data.data; -} -inline size_t %PREFIX%output_size(int index) { - return %PREFIX%output(index)->bytes; -} -inline int %PREFIX%output_dims_len(int index) { - return %PREFIX%output(index)->dims->data[0]; -} -inline int *%PREFIX%output_dims(int index) { - return &%PREFIX%output(index)->dims->data[1]; -} +// Return the buffer size of input tensor +size_t %PREFIX%input_size(int index); + +// Return the dimention size of input tensor +int %PREFIX%input_dims_len(int index); + +// Return the dimention buffer pointer of input tensor +int *%PREFIX%input_dims(int index); + +// Return the buffer pointer of output tensor +void *%PREFIX%output_ptr(int index); + +// Return the buffer size of output tensor +size_t %PREFIX%output_size(int index); + +// Return the dimention size of output tensor +int %PREFIX%output_dims_len(int index); + +// Return the dimention buffer pointer of output tensor +int *%PREFIX%output_dims(int index); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus #endif )"; @@ -620,7 +1253,7 @@ std::string tflmc::Compiler::getTensorName(int tensorIndex) const { auto nOps = interpreter_->operators_size(); for (size_t i = 0; i < nOps; i++) { - auto nodeAndReg = interpreter_->node_and_registration(i); + auto nodeAndReg = interpreter_->node_and_registration(ILLEGAL_IF_EVER_MULTIPLE_SUBGRAPH,i); auto node = &nodeAndReg.node; auto checkAndAdd = [&](const TfLiteIntArray *indices, @@ -642,3 +1275,7 @@ std::string tflmc::Compiler::getTensorName(int tensorIndex) const { return ss.str(); } + +bool tflmc::Compiler::noErrorsReported() const { + return ! microErrReporter_.getErrorReported(); +} diff --git a/src/Compiler.h b/src/Compiler.h index 3a69b27..b2ff90f 100644 --- a/src/Compiler.h +++ b/src/Compiler.h @@ -4,16 +4,20 @@ #include #include "MemMap.h" +#include "ModelInfo.h" #include "tensorflow/lite/micro/all_ops_resolver.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" +#include "tensorflow/lite/core/api/error_reporter.h" #include "tensorflow/lite/micro/micro_interpreter.h" #include "tensorflow/lite/schema/schema_generated.h" namespace tflmc { -bool CompileFile(const std::string &modelFileName, - const std::string &outFileName, - const std::string &prefix = "model_"); +class CodeWriter; + +bool CompileFile(const std::string &modelPathName, + const std::string &outSrcPathName, + const std::string &outHdrPathName, + const std::string &prefix); class Compiler { public: @@ -23,67 +27,74 @@ class Compiler { void writeSource(std::ostream &out); void writeHeader(std::ostream &out); - + void reportMemUsage(); + // Returns a name that describes a tensors relation to network layers. std::string getTensorName(int tensorIndex) const; + bool noErrorsReported() const; + private: bool init(const void *modelData); tflite::ErrorReporter &errReporter() { return microErrReporter_; } + void writeCustomRegistrationsSource(CodeWriter &wr); + + void writeTflNodesSource(CodeWriter &wr); + + void writeTensorDataSource(CodeWriter &wr); + + void writeTypesAndWorkingArraysSource(CodeWriter &wr); + + void writeNodeDataSource(CodeWriter &wr); + + void writeScratchBufferOffsets(CodeWriter &wr); + + void writeContextAllocationHandlersSource(CodeWriter &wr); + + void writeMicroContextSource(CodeWriter &wr); + + void writeInitSource(CodeWriter &wr); + + void writeTensorAccessorsSource(CodeWriter &wr); + + void writeInvokeSource(CodeWriter &wr); + + void finalizeMemMap(const CodeWriter &wr); + private: - struct TensorInfo { - TensorInfo(const TfLiteTensor *tensor_ptr) : - tensor(tensor_ptr) - {} - const TfLiteTensor *tensor = nullptr; - }; - struct RegistrationInfo { - const TfLiteRegistration *reg = nullptr; - tflite::BuiltinOperator code; - std::string custom_name; - bool operator==(const RegistrationInfo &other) { - if (code != other.code) return false; - if (code == tflite::BuiltinOperator_CUSTOM) { - return custom_name == other.custom_name; - } else - return true; - } - }; - struct NodeInfo { - NodeInfo() {} - NodeInfo(TfLiteNode tfl_node, ptrdiff_t reg_index) : - node(tfl_node), - regIndex(reg_index) - {} - TfLiteNode node; - ptrdiff_t regIndex = -1; - }; - template - struct Option { - bool None = true; - T Some = T(); - void operator=(T const &val) { - None = false; - Some = val; - } - void clear() { - Some = T(); - None = true; - } + + /** + * @brief Error reporter that tracks if Error was reported. + * + */ + class TrackingErrorReporter : public tflite::ErrorReporter { + public: + + ~TrackingErrorReporter() {} + int Report(const char* format, va_list args) override; + + bool getErrorReported() const { return error_reported_; } + + private: + + bool error_reported_ = false; }; - private: std::string prefix_; - tflite::MicroErrorReporter microErrReporter_; + TrackingErrorReporter microErrReporter_; const tflite::Model *model_ = nullptr; const tflite::SubGraph *subgraph_ = nullptr; tflite::AllOpsResolver resolver_; - std::vector arena_buf_; + SufficientArena arena_; + uint8_t *aligned_arena_start_; + size_t arena_size_; std::unique_ptr interpreter_; - MemMap memMap_; + ArenaMemMap arenaMap_; + MemMap initMemMap_; + MemMap uninitMemMap_; + MemMap constMemMap_; - size_t arenaBufferSize_ = 0; std::vector tensors_; std::vector registrations_; std::vector nodes_; @@ -93,7 +104,7 @@ class Compiler { bool has_custom_ops = false; bool has_quantization = false; Option common_tensor_type; - Option common_tensor_is_variable; + bool has_is_variable = false; }; } // namespace tflmc diff --git a/src/CustomOperators.cc b/src/CustomOperators.cc index d5ae115..3d5859b 100644 --- a/src/CustomOperators.cc +++ b/src/CustomOperators.cc @@ -15,20 +15,19 @@ limitations under the License. #include "CustomOperators.h" -#include - -#include // dynamic loading for custom operators -#ifndef _WIN32 +#ifdef LINUX +#include +#include #include tflmc::custom_operator_handle tflmc::LoadCustom( - tflite::AllOpsResolver *resolver) { + tflite::MicroOpResolver *resolver) { const char *filename = "./libtflite_micro_custom.so"; void *custom_lib = dlopen(filename, RTLD_NOW); if (custom_lib) { - TfLiteStatus (*reg_fun)(tflite::AllOpsResolver * res); + TfLiteStatus (*reg_fun)(tflite::MicroOpResolver * res); // see "man dlopen" for an explanation of this nasty construct *(void **)(®_fun) = dlsym(custom_lib, "register_custom"); char *error = dlerror(); @@ -53,9 +52,13 @@ void tflmc::UnloadCustom(tflmc::custom_operator_handle custom_lib) { } #else -// anyone interested in implementing this for Windows (LoadLibrary+GetProcAddr) +// Obviously, no chance of loading shared lib on semi-hosted embedded builds +// of pre-interpeter. +// TODO: could it work on user-space hosted execution on qemu? Attractive option... +// as stuff like command-line args ought to work correctly. +// TODO: anyone interested in implementing this for Windows (LoadLibrary+GetProcAddr) tflmc::custom_operator_handle tflmc::LoadCustom( - tflite::AllOpsResolver *resolver) { + tflite::MicroOpResolver *resolver) { return nullptr; } diff --git a/src/CustomOperators.h b/src/CustomOperators.h index 3d7f60e..ff5383b 100644 --- a/src/CustomOperators.h +++ b/src/CustomOperators.h @@ -21,7 +21,7 @@ limitations under the License. namespace tflmc { typedef void* custom_operator_handle; -custom_operator_handle LoadCustom(tflite::AllOpsResolver* res); +custom_operator_handle LoadCustom(tflite::MicroOpResolver* res); void UnloadCustom(custom_operator_handle); } // namespace tflmc diff --git a/src/Makefile.inc b/src/Makefile.inc index eb5d7f3..48d1730 100644 --- a/src/Makefile.inc +++ b/src/Makefile.inc @@ -1,12 +1,11 @@ -$(info Adding tflite-micro compiler) + TFLITE_U_COMPILER_SRCS := \ src/CodeWriter.cc src/CustomOperators.cc src/MemMap.cc src/TypeToString.cc \ - src/Compiler.cc src/main.cc src/RecordAllocations.cc + src/Compiler.cc src/main.cc src/RecordAllocations.cc src/BuiltinAllocations.cc TFLITE_U_COMPILER_HDRS := \ - src/CodeWriter.h src/Compiler.h src/CustomOperators.h src/MemMap.h src/RecordAllocations.h src/TypeToString.h - - + src/CodeWriter.h src/Compiler.h src/CustomOperators.h src/MemMap.h src/RecordAllocations.h \ + src/TypeToString.h src/BuiltinAllocations.h src/ModelInfo.h # Builds a standalone binary. diff --git a/src/MemMap.cc b/src/MemMap.cc index 6a08945..d8eaab8 100644 --- a/src/MemMap.cc +++ b/src/MemMap.cc @@ -1,24 +1,163 @@ #include "MemMap.h" +#include "Options.h" +#include +#include +#include +#include +#include -void tflmc::MemMap::recordROM(ptrdiff_t offset, size_t len, + +tflmc::SufficientArena::SufficientArena( size_t sufficient_size, size_t sufficient_alignment) +{ + size_t padded_size = sufficient_size + 2*sufficient_alignment; + arena_buf.resize(padded_size); + void *arena_start = arena_buf.data(); + aligned_start_ = + static_cast( + std::align(sufficient_alignment, sufficient_alignment, arena_start, padded_size)); + assert( aligned_start_!=nullptr && "Arena alignment failed"); +} + +tflmc::MemMap::MemMap() + : m_total(0) +{ +} + + + +void tflmc::MemMap::record(size_t len, const std::string &tag) { - m_romEntries.push_back({offset, len, tag}); + if (len > 0) { + m_entries.push_back({m_total, len, tag}); + m_total += len; + } +} + +tflmc::ArenaMemMap::ArenaMemMap() +{ +} + + +void tflmc::ArenaMemMap::init(size_t model_op_count) { + m_node_scratchbuf_counts.resize(model_op_count,0); } -void tflmc::MemMap::recordRAM(ptrdiff_t offset, size_t len, +void tflmc::ArenaMemMap::recordPersistent(ptrdiff_t offset, size_t len, const std::string &tag) { - m_ramEntries.push_back({offset, len, tag}); + m_entries.push_back({offset, len, tag}); + updateUsedList(offset, len); +} + +void tflmc::ArenaMemMap::recordScratchBuf(int idx, + ptrdiff_t offset, size_t len, + size_t allocating_node, + const std::string &tag) { + m_scratchbuf_map[idx] = m_entries.size(); + recordPersistent(offset, len, tag); + m_node_scratchbuf_counts[allocating_node] += 1; +} + +std::vector tflmc::ArenaMemMap::scratchBufOffsets() { + std::vector res; + for( auto &sb : m_scratchbuf_map ) + { + assert(sb.first >= 0); + size_t req_sb_table_size = sb.first+1; + res.resize(std::max(req_sb_table_size,res.size())); + res[sb.first] = m_entries[sb.second].base; + } + + return res; +} + +void tflmc::ArenaMemMap::updateUsedList(ptrdiff_t used_begin, size_t used_len) { + + ptrdiff_t used_end = used_begin + used_len; + std::vector to_delete; + auto overlapped_i = m_usedList.lower_bound(used_begin); + // Fuse used block overlapping on left + if (overlapped_i != m_usedList.begin()) + { + --overlapped_i; + if (overlapped_i->second >= used_begin) { + used_begin = overlapped_i->first; + } else { + ++overlapped_i; + } + } + + + auto end_i = m_usedList.upper_bound(used_end); + + // Fuse used blocks overlapped completely or on + // right. + while(overlapped_i != end_i) { + // Invariant: overlapped_i->first >= used_begin + // Invariant: overlapped_i->first < used_end + to_delete.push_back(overlapped_i->first); + used_end = std::max(overlapped_i->second, used_end); + ++overlapped_i; + } + + // Fuse ... + for (auto k : to_delete) { + m_usedList.erase(k); + } + m_usedList[used_begin] = used_end; +} + +void tflmc::ArenaMemMap::stripLargestGap(size_t alignment_to_maintain) { + + // Find largest gap between used blocks. + auto used_i = m_usedList.begin(); + ptrdiff_t prev_end = 0; + ptrdiff_t max_gap_size = 0; + ptrdiff_t max_gap_begin = 0; + while( used_i != m_usedList.end() ) { + auto cur_begin = used_i->first; + auto cur_gap_size = cur_begin-prev_end; + if (cur_gap_size > max_gap_size) { + max_gap_size = cur_gap_size; + max_gap_begin = prev_end; + } + prev_end = used_i->second; + ++used_i; + } + + // Adjust RAM entries to strip out largest gap... we get a little sneaky to maintain + // alginment by adjusting the gap_size so sufficient alignment is maintained. + max_gap_size = max_gap_size / alignment_to_maintain * alignment_to_maintain; + for (auto &entry : m_entries ) + { + if (entry.base > max_gap_begin) { + assert( entry.base >= max_gap_begin+max_gap_size); + entry.base -= max_gap_size; + } + } + } +size_t tflmc::ArenaMemMap::size() const { + + ptrdiff_t max_end = 0; + for (auto &entry : m_entries ) + { + max_end = std::max(static_cast(entry.base+entry.len), max_end); + } + return max_end; +} + + + static void PrintBar(const std::string &label, float start, float end) { static const int BAR_WIDTH = 100; static const int TEXT_LABEL_START = 3; if (start == -1.0f) { for (int i = 0; i < BAR_WIDTH + 2; i++) { - printf("#"); + std::cout << '#'; } - printf("\n"); + std::cout << std::endl; return; } @@ -44,42 +183,33 @@ static void PrintBar(const std::string &label, float start, float end) { } } - printf("#"); + std::cout << '#'; for (int i = 0; i < BAR_WIDTH; i++) { if (i >= labelStart && i < labelEnd) { - printf("%c", label[i - labelStart]); + std::cout << label[i - labelStart]; } else if (i >= barStart && i < barEnd) { - printf(smallBar ? "|" : "X"); + std::cout << (smallBar ? "|" : "X"); } else { - printf("."); + std::cout << '.'; } } - printf("#\n"); + std::cout << '#' << std::endl; } -void tflmc::MemMap::report() const { - size_t constSize = 0; - size_t arenaSize = 0; - for (const auto &entry : m_romEntries) { - constSize = std::max(constSize, entry.base + entry.len); - } - for (const auto &entry : m_ramEntries) { - arenaSize = std::max(arenaSize, entry.base + entry.len); - } - printf("ROM summary: %lu bytes total\n", constSize); - PrintBar("", -1.0f, -1.0f); - for (const auto &entry : m_romEntries) { - PrintBar(entry.tag, entry.base / (float)constSize, - (entry.base + entry.len) / (float)constSize); - } - PrintBar("", -1.0f, -1.0f); +void tflmc::MemMap::report(const char *label) const { + tflmc::Options &options = tflmc::Options::instance(); + size_t usage = size(); - printf("RAM summary: %lu bytes total\n", arenaSize); - PrintBar("", -1.0f, -1.0f); - for (const auto &entry : m_ramEntries) { - PrintBar(entry.tag, entry.base / (float)arenaSize, - (entry.base + entry.len) / (float)arenaSize); + + std::cout << label << " summary: " < #include +#include #include namespace tflmc { + + +struct SufficientArena +{ +public: + SufficientArena( size_t sufficient_size, size_t sufficient_alignment); + + uint8_t *alginedBufferStart() { return aligned_start_; } +protected: + std::vector arena_buf; + uint8_t *aligned_start_; + +}; + + // Keeps track of buffers and prints a summary. class MemMap { public: - void recordROM(ptrdiff_t offset, size_t len, const std::string &tag); - void recordRAM(ptrdiff_t offset, size_t len, const std::string &tag); - void report() const; - private: + MemMap(); + + /** + * @brief Record allocated memory section + * + * Primarily these will be data from constant tensors or constant tensor meta-data + * Since ROM cannot be re-used assumged location is simply counted internally + * hence no offset parameter. + * + * @param len Allocated size (may no account for alignment padding) + * @param tag identifying tag for diagnostic/analytic output + */ + void record(size_t len, const std::string &tag); + + void report(const char *label) const; + + + virtual size_t size() const { return m_total; } + + protected: + struct Entry { ptrdiff_t base; size_t len; std::string tag; }; - std::vector m_romEntries; - std::vector m_ramEntries; + + std::vector m_entries; + + ptrdiff_t m_total; +}; + + +// Keeps track of buffers and prints a summary. +class ArenaMemMap : public MemMap +{ + public: + + ArenaMemMap(); + + /** + * @brief Initialize per-op tables (scratch buffer offset etc) + * + * This can't be done at construction time as number of ops is not + * available to pre-intreter "Compiler" sub-object construction time + * (tflite interpreter has yet to be created). + */ + + void init(size_t model_op_count); + + /** + * @brief Record persistent tensor arena-allocatio). + * + * Primarily these will be persistent data buffers for intermediate + * tensor values. Due to differing lifetimes it is quite legal/normal + * for these to overlap. + * @param offset Starting offset in tensor arena + * @param len Length in bytes + * @param tag identifying tag for diagnoistic/analytic output. + */ + void recordPersistent(ptrdiff_t offset, size_t len, const std::string &tag); + + /** + * @brief Record scatch tensor area-allocatino) + * + * Scratch buffers (buffers allocated only for the duration of a single operator + * evaluation) are handled seperately from longer-lived tensor arena allocations + * (intermediate-value tensors and persistent buffers). Presumably this + * to minimize number items processed by the full (expensive) memory allocation algorithm. + * + * + * @param idx Scratch buffer index (handle) + * @param offset Starting offset in tensor arena + * @param len Buffer length in bytes + * @param tag identifying tag for diagnoistic/analytic output. + */ + + void recordScratchBuf(int idx, ptrdiff_t offset, size_t len, size_t allocating_node, const std::string &tag); + + std::vector scratchBufOffsets(); + + typedef std::vector scratchbuf_counts_map_t; + inline const scratchbuf_counts_map_t &nodesScratchBufferAllocationCounts() { + return m_node_scratchbuf_counts; + } + + void stripLargestGap(size_t alginment_to_maintain); + virtual size_t size() const; + + private: + + void updateUsedList(ptrdiff_t used_base, size_t used_len); + + // [begin,end) of unused memory sections + typedef std::map occupancy_map_t; + occupancy_map_t m_usedList; + + // Table of RAM allocations associated with scratchbufs. + typedef std::map scratchbuf_map_t; + scratchbuf_map_t m_scratchbuf_map; + + // Table of number of scratch buffers assigned by each node. + // This is needed to correctly assign scratch buffer indexes in the + // prepare phase for nodes that do use statically code-generated user_data OpData. + + scratchbuf_counts_map_t m_node_scratchbuf_counts; }; } // namespace tflmc diff --git a/src/ModelInfo.h b/src/ModelInfo.h new file mode 100644 index 0000000..ef7e546 --- /dev/null +++ b/src/ModelInfo.h @@ -0,0 +1,52 @@ +#ifndef TFLMCOMPILER_MODELINFO_H +#define TFLMCOMPILER_MODELINFO_H + +#include "tensorflow/lite/micro/micro_interpreter.h" +#include "tensorflow/lite/schema/schema_generated.h" + +namespace tflmc { + +struct TensorInfo { + TensorInfo(const TfLiteTensor *tensor_ptr) : + tensor(tensor_ptr) + {} + const TfLiteTensor *tensor = nullptr; +}; +struct RegistrationInfo { + const TfLiteRegistration *reg = nullptr; + tflite::BuiltinOperator code; + std::string custom_name; + bool operator==(const RegistrationInfo &other) { + if (code != other.code) return false; + if (code == tflite::BuiltinOperator_CUSTOM) { + return custom_name == other.custom_name; + } else + return true; + } +}; +struct NodeInfo { + NodeInfo() {} + NodeInfo(TfLiteNode tfl_node, ptrdiff_t reg_index) : + node(tfl_node), + regIndex(reg_index) + {} + TfLiteNode node; + ptrdiff_t regIndex = -1; +}; +template +struct Option { + bool None = true; + T Some = T(); + void operator=(T const &val) { + None = false; + Some = val; + } + void clear() { + Some = T(); + None = true; + } +}; + +} // namespace tflmc + +#endif // TFLMCOMPILER_MODELINFO_H diff --git a/src/Options.h b/src/Options.h new file mode 100644 index 0000000..e3a4b02 --- /dev/null +++ b/src/Options.h @@ -0,0 +1,23 @@ +#ifndef TFLMCOMPILER_OPTIONS_H +#define TFLMCOMPILER_OPTIONS_H + +#include + +namespace tflmc { +class Options +{ +private: + Options() {} +public: + bool verbose = false; + std::string memmap_json; + + static Options &instance() { + static Options options; + return options; + } +}; + +} + +#endif // TFLMCOMPILER_OPTIONS_H diff --git a/src/RecordAllocations.cc b/src/RecordAllocations.cc index dc7b3ce..e49f4c8 100644 --- a/src/RecordAllocations.cc +++ b/src/RecordAllocations.cc @@ -1,47 +1,176 @@ #include +#include +#include + +#if !TFLMC_USE_INTERPRETER_HOOKS #define private public +#endif #include "tensorflow/lite/micro/micro_interpreter.h" +#if !TFLMC_USE_INTERPRETER_HOOKS #undef private +#endif #include "CustomOperators.h" #include "RecordAllocations.h" +#include "MemMap.h" #include "tensorflow/lite/micro/all_ops_resolver.h" #include "tensorflow/lite/micro/micro_error_reporter.h" static std::vector g_loggedAllocations; -static tflite::MicroAllocator *g_allocator; -static int g_currentNodeIndex = -1; +static std::vector g_nodeScratchBufferAllocations; +static size_t g_currentNodeIndex = -1; static uint8_t *g_arenaPtr = nullptr; static ptrdiff_t g_arena_size = 0; -static void* LoggingAllocatePersistentBuffer(struct TfLiteContext *ctx, + +struct ScratchBufferInfo { + size_t node_id; + size_t bytes; +}; + +static std::map g_logged_scratch_buffers; + + + +#if TFLMC_USE_INTERPRETER_HOOKS + + +static tflite::MicroInterpreter::TfLiteContextHooks *g_tflm_hooks; + + +static void *LoggingAllocatePersistentBuffer(struct TfLiteContext *ctx, size_t bytes) { + auto ptr = g_tflm_hooks->AllocatePersistentBuffer(ctx, bytes); + assert(ptr != nullptr && "Alloc failure"); + ptrdiff_t offset = (uint8_t *)ptr - g_arenaPtr; + + g_loggedAllocations.push_back( + {offset, bytes, + g_currentNodeIndex, tflmc::AllocKind::Persistent, 0}); + return ptr; +} + + +static TfLiteStatus LoggingRequestScratchBufferInArena(TfLiteContext *ctx, + size_t bytes, + int *buffer_idx) { + + auto res = g_tflm_hooks->RequestScratchBufferInArena(ctx, bytes, buffer_idx); + if (res == kTfLiteOk) { + g_logged_scratch_buffers[*buffer_idx] = {g_currentNodeIndex, bytes}; + } + return res; +} + + + +static void* LoggingGetScratchBuffer(struct TfLiteContext* ctx, int buffer_idx) { + return g_tflm_hooks->GetScratchBuffer (ctx, buffer_idx); +} + +static void LoggingNotifyNodeIndex(const struct TfLiteContext* context, + size_t node) { + g_currentNodeIndex = node; + return g_tflm_hooks->NotifyNodeIndex(context, node); +} + +static tflite::MicroInterpreter::TfLiteContextHooks g_recording_hooks = +{ + LoggingAllocatePersistentBuffer, + LoggingRequestScratchBufferInArena, + LoggingGetScratchBuffer, + LoggingNotifyNodeIndex + +}; + +void tflmc::SetRecordAllocationhooks( tflite::MicroInterpreter *interpreter, + uint8_t *arena_start, + size_t arena_size) { + g_tflm_hooks = interpreter->getHooks(); + g_arenaPtr = arena_start; + g_arena_size = arena_size; + interpreter->setHooks(&g_recording_hooks); +} + +void tflmc::RecordScratchBufferAllocations(tflite::MicroInterpreter *interpreter) +{ + auto ctx = interpreter->getTFLContext(); + for( auto &sb_i : g_logged_scratch_buffers ) + { + auto sb_idx = sb_i.first; + void *sb_start = g_tflm_hooks->GetScratchBuffer(ctx, sb_idx ); + assert(sb_start != nullptr && "Unknown Scratch Buffer"); + ptrdiff_t offset = (uint8_t *)sb_start - g_arenaPtr; + g_loggedAllocations.push_back( + {offset, sb_i.second.bytes, + sb_i.second.node_id, tflmc::AllocKind::Scratch, sb_i.first}); + + size_t node_id = sb_i.second.node_id; + if (g_nodeScratchBufferAllocations.size() <= node_id) { + g_nodeScratchBufferAllocations.resize(node_id+1, 0); + } + g_nodeScratchBufferAllocations[node_id] += 1; + } + +} + + + +TfLiteEvalTensor *tflmc::GetEvalTensor(tflite::MicroInterpreter *interpreter, int i) { + auto ctx = interpreter->getTFLContext(); + return ctx->GetEvalTensor(ctx, i); +} + +TfLiteTensor *tflmc::GetTensor(tflite::MicroInterpreter *interpreter, int i) { + auto ctx = interpreter->getTFLContext(); + return ctx->GetTensor(ctx, i); +} + +#else + +static tflite::MicroAllocator *g_allocator; +static void *LoggingAllocatePersistentBuffer(struct TfLiteContext *ctx, size_t bytes) { - void* ptr = g_allocator->AllocatePersistentBuffer(bytes); - assert(ptr!=nullptr && "Alloc failure"); + auto ptr = g_allocator->AllocatePersistentBuffer(bytes); + assert(ptr != nullptr && "Alloc failure"); + ptrdiff_t offset = (uint8_t *)ptr - g_arenaPtr; + g_loggedAllocations.push_back( - {-(g_arenaPtr - (uint8_t *)ptr + g_arena_size), bytes, - g_currentNodeIndex}); + {offset, bytes, + g_currentNodeIndex, tflmc::AllocKind::Persistent, -1}); return ptr; } + static TfLiteStatus LoggingRequestScratchBufferInArena(TfLiteContext *ctx, size_t bytes, int *buffer_idx) { assert(false && "Not handling scratch buffers currently"); - return g_allocator->RequestScratchBufferInArena(g_currentNodeIndex, bytes, + return g_allocator->RequestScratchBufferInArena(bytes, buffer_idx); } -std::vector tflmc::RecordAllocations( - const tflite::Model *model, ptrdiff_t arena_size) { + + // HACK: here in essence, we create a duplicate interpreter here and re-execute + // Fragmnents of MicroInterpreter::AllocateTensors() with instrumented context + // API calls. + +void tflmc::RecordAllocations( + const tflite::Model *model, + size_t arena_size, size_t arena_alignment) { + + tflmc::SufficientArena arena(arena_size, arena_alignment); + g_arenaPtr = arena.alginedBufferStart(); g_arena_size = arena_size; - std::vector arena_buf(g_arena_size); - g_arenaPtr = arena_buf.data(); tflite::MicroErrorReporter error_reporter; + + // Resolver must be passed in as otherwise pointers to its internal table + // in the arena will be invalidated.... + + tflite::AllOpsResolver resolver; tflmc::custom_operator_handle custom = tflmc::LoadCustom(&resolver); - tflite::MicroInterpreter interpreter(model, resolver, arena_buf.data(), + tflite::MicroInterpreter interpreter(model, resolver, g_arenaPtr, g_arena_size, &error_reporter); auto ctx = &interpreter.context_; @@ -49,14 +178,18 @@ std::vector tflmc::RecordAllocations( tflite::NodeAndRegistration *nodeAndRegs; TfLiteEvalTensor *eval_tensors=nullptr; + tflite::ScratchBufferHandle* scratchhandle=nullptr; + allocator->StartModelAllocation(model, resolver, &nodeAndRegs, &eval_tensors); - allocator->FinishModelAllocation(model, eval_tensors); + allocator->FinishModelAllocation(model, eval_tensors, &scratchhandle); g_allocator = allocator; ctx->AllocatePersistentBuffer = &LoggingAllocatePersistentBuffer; ctx->RequestScratchBufferInArena = nullptr; + auto ctx_GetScratchBuffer = ctx->GetScratchBuffer; ctx->GetScratchBuffer = nullptr; + auto subgraph = model->subgraphs()->Get(0); for (size_t i = 0; i < subgraph->operators()->size(); i++) { auto node = &nodeAndRegs[i].node; @@ -67,6 +200,7 @@ std::vector tflmc::RecordAllocations( } } + ctx->RequestScratchBufferInArena = &LoggingRequestScratchBufferInArena; for (size_t i = 0; i < subgraph->operators()->size(); i++) { @@ -76,11 +210,26 @@ std::vector tflmc::RecordAllocations( g_currentNodeIndex = i; reg->prepare(ctx, node); } + allocator->ResetTempAllocations(); } + + allocator->FinishModelAllocation(model, eval_tensors); + tflmc::UnloadCustom(custom); - return g_loggedAllocations; + for( auto &sb_i : g_logged_scratch_buffers ) + { + auto sb_idx = sb_i.first; + void *sb_start = ctx_GetScratchBuffer( ctx, sb_idx ); + assert(sb_start != nullptr && "Unknown Scratch Buffer"); + ptrdiff_t offset = (uint8_t *)sb_start - g_arenaPtr; + g_loggedAllocations.push_back( + {offset, sb_i.second.bytes, + sb_i.second.node_id, tflmc::AllocKind::Scratch, -1}); + } + } + TfLiteEvalTensor *tflmc::GetEvalTensor(tflite::MicroInterpreter *interpreter, int i) { auto ctx = &interpreter->context_; return ctx->GetEvalTensor(ctx, i); @@ -90,3 +239,9 @@ TfLiteTensor *tflmc::GetTensor(tflite::MicroInterpreter *interpreter, int i) { auto ctx = &interpreter->context_; return ctx->GetTensor(ctx, i); } + +#endif + +const std::vector &tflmc::RecordedAllocations() { + return g_loggedAllocations; +} diff --git a/src/RecordAllocations.h b/src/RecordAllocations.h index a8848ac..ece4ec2 100644 --- a/src/RecordAllocations.h +++ b/src/RecordAllocations.h @@ -6,13 +6,49 @@ namespace tflmc { +enum AllocKind : int { + Persistent, + Scratch +}; + struct Allocation { ptrdiff_t offset; size_t len; - int nodeIndex; + size_t nodeIndex; + AllocKind kind; + int buffer_index; }; -std::vector RecordAllocations(const tflite::Model *model, ptrdiff_t arena_size); + + + // We can try to use a stock kernel but + // this requires us to access private data and re-execute + // Fragmnents of MicroInterpreter::AllocateTensors() with instrumented context + // API calls. Painful to maintain and prone subtle Bugs. Simpler to maintain a patch + // that adds hooks to MicroInterpreter to gather data from an + // actual MicroInterpreter::AllocateTensors() by intercepting the TfliteContext vectors + // which are a reasonably stable API. + +#if TFLMC_USE_INTERPRETER_HOOKS + +void SetRecordAllocationhooks(tflite::MicroInterpreter *interpreter, + uint8_t *arena_start, + size_t arena_size); + +void RecordScratchBufferAllocations(tflite::MicroInterpreter *interpreter); + + +#else +void RecordAllocations( + const tflite::Model *model, size_t arena_size, size_t arena_alignment); +#endif + +/** + * @brief Allocations from tensor arena with type and associated node /scratch buffer index. + * + * @return const std::vector& + */ +const std::vector &RecordedAllocations(); TfLiteEvalTensor *GetEvalTensor(tflite::MicroInterpreter *interpreter, int i); diff --git a/src/TypeToString.cc b/src/TypeToString.cc index 2747904..57a4f8c 100644 --- a/src/TypeToString.cc +++ b/src/TypeToString.cc @@ -67,7 +67,7 @@ std::string tflmc::to_string(TfLiteFusedActivation t) { switch (t) { NAME(kTfLiteActNone); NAME(kTfLiteActRelu); - NAME(kTfLiteActRelu1); + NAME(kTfLiteActReluN1To1); NAME(kTfLiteActRelu6); NAME(kTfLiteActTanh); NAME(kTfLiteActSignBit); diff --git a/src/main.cc b/src/main.cc index 89922f1..418ed7a 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1,22 +1,85 @@ #include "CodeWriter.h" #include "Compiler.h" +#include "Options.h" -int main(int argc, char *argv[]) { - if (argc < 3 || argc > 4) { +#define LOG_ARGC_ARGV 1 + + +/** The "real" main - physical main has workarounds for various + * semi-hosting environments + */ + +int hosted_main(int argc, char *argv[]) { + + tflmc::Options &options = tflmc::Options::instance(); + int cur_arg = 1; + bool usage_error = false; + while (cur_arg < argc && !usage_error) { + + const std::string verbose_flag("--verbose"); + const std::string memory_flag("--mem_summary"); + if (verbose_flag == argv[cur_arg]) { + options.verbose = true; + } else if (memory_flag == argv[cur_arg]) { + if (cur_arg+1 < argc) { + options.memmap_json = argv[cur_arg+1]; + ++cur_arg; + } else { + usage_error = true; + } + } else if (argv[cur_arg][0] == '-') { + // No other "flag" + usage_error = true; + } else { + break; + } + ++cur_arg; + } + if (cur_arg+1 >= argc || cur_arg+4 < argc) { printf( - "Usage: %s modelFile.tflite outFile.cpp [NamingPrefix = \"model_\"]\n", + "Usage: %s [--verbose] [--mem_summary filename.json] modelFile.tflite outputSrcFile outputHdrFile [NamingPrefix (default: \"model_\")]\n", argv[0]); return 1; } std::string prefix = "model_"; - if (argc == 4) { - prefix = argv[3]; + if (cur_arg+3 < argc) { + prefix = argv[cur_arg+3]; } - if (!tflmc::CompileFile(argv[1], argv[2], prefix)) { + if (!tflmc::CompileFile(argv[cur_arg], argv[cur_arg+1], argv[cur_arg+2], prefix)) { return 1; } return 0; } + +#ifdef __ARMCOMPILER_VERSION +extern "C" int arm_sh_parse_cmdline( char ***p_argv); +extern "C" void arm_sh_exit(int code); +#endif + +int main(int argc, char *argv[]) { + +#ifdef __ARMCOMPILER_VERSION + // ARMClang runtime library has a very low (undocumented) maximum command line length + // for its internal argv parsing - exceeding it silently results in empty argv/argc. + argc = arm_sh_parse_cmdline(&argv); +#endif +#if LOG_ARGC_ARGV + printf( "ARGC=%d ", argc); + for(int i=0; i < argc; ++i) { + printf(",%s", argv[i]); + } + printf("\n"); +#endif + int status = hosted_main(argc,argv); +#ifdef __ARMCOMPILER_VERSION + if (status) { + // ARMClang runtime library ignores exit status it always exits + // angel_SWIreason_ReportException with ADP_Stopped_ApplicationExit + arm_sh_exit(status); + } +#endif + return status; +} diff --git a/tflite_u_preint/static_data_utils.cc b/tflite_u_preint/static_data_utils.cc new file mode 100644 index 0000000..fa1d51e --- /dev/null +++ b/tflite_u_preint/static_data_utils.cc @@ -0,0 +1,99 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// PORTABLE OPTIMIZED + +// Support recording of selected kernel variant in prepare phase for static extraction for +// a fixed tflite model. + +// TF_LITE_MICRO_RECORD_OP_USER_DATA: +// When set the names of kernel variants eval functions recorded and can be dumped +// via PointerCollect API. +// TF_LITE_MICRO_USE_OFFLINE_OP_USER_DATA +// When set prepare phase kernel variant selection code is dropped with +// the eval functions recorded in tflite::micro::kernels::conv::eval_functions used instead. +// +// Benefits smaller binary, used unnecessary eval function variants are not lnked. + + + + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" + +#include "tflite_u_preint/static_init_support.h" +#include "tensorflow/lite/micro/kernels/conv.h" + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + +namespace tflite { +namespace micro { + + + +CppPODStructInitializer TfLitePaddingValuesSubStruct(TfLitePaddingValues &pv) { + + auto init = new CppItems(); + *init + << pv.width + << pv.height + << pv.width_offset + << pv.height_offset; + + CppPODStructInitializer res(init); + return res; +} + + + +CppPODStructInitializer TfLiteOpDataConvSubStruct(OpDataConv &odc, size_t output_depth) { + + auto init = new CppItems(); + *init + << TfLitePaddingValuesSubStruct(odc.padding) + << odc.input_zero_point + << odc.filter_zero_point + << odc.output_zero_point + << odc.output_multiplier + << odc.output_shift + << tflite::micro::CppNamedVec("per_channel_output_multiplier", "int32_t", + odc.per_channel_output_multiplier, output_depth) + << tflite::micro::CppNamedVec("per_channel_output_shift", "int32_t", + odc.per_channel_output_shift, output_depth) + << odc.output_activation_min + << odc.output_activation_max; + CppPODStructInitializer res(init); + return res; +} + + +CppNamedStruct TfLiteCustomSub8BitPackingDetailsStructPtr(const char *name, const TfLiteCustomSub8BitPackingDetails &pv) { + + auto init = new CppItems(); + *init + << pv.bits_per_item + << pv.container_bits + << pv.packed_minor_dims + << pv.sparsity_coding + << "{}"; // Empty initializer + CppNamedStruct res(name, "const TfLiteCustomSub8BitPackingDetails", init); + return res; +} + + +} // namespace micro +} // namespace tflite + +#endif // TF_LITE_MICRO_RECORD_OP_USER_DATA diff --git a/tflite_u_preint/static_data_utils.h b/tflite_u_preint/static_data_utils.h new file mode 100644 index 0000000..3d99e81 --- /dev/null +++ b/tflite_u_preint/static_data_utils.h @@ -0,0 +1,41 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TFLMCOMPILER_STATIC_DATA_UTILS_H_ +#define TFLMCOMPILER_STATIC_DATA_UTILS_H_ + +#include "tflite_u_preint/static_init_support.h" + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +namespace tflite { + struct OpDataConv; + +namespace micro { + + + +CppPODStructInitializer TfLitePaddingValuesSubStruct(TfLitePaddingValues &pv); + +CppPODStructInitializer TfLiteOpDataConvSubStruct(OpDataConv &odc, size_t output_depth); + +CppNamedStruct TfLiteCustomSub8BitPackingDetailsStructPtr(const char *name, const TfLiteCustomSub8BitPackingDetails &pv); + + +} // namespace micro +} // namespace tflite + +#endif +#endif // #ifndef TFLMCOMPILER_STATIC_DATA_UTILS_H_ diff --git a/tflite_u_preint/static_init_support.cc b/tflite_u_preint/static_init_support.cc new file mode 100644 index 0000000..a03b2ab --- /dev/null +++ b/tflite_u_preint/static_init_support.cc @@ -0,0 +1,430 @@ +/* + * static_init_support.cc + * + * Created on: 10.08.2020 + * Author: stevensa + */ + +#include "tflite_u_preint/static_init_support.h" + +#if TF_LITE_MICRO_AUTO_DUMPED_OPDATA +#include "tensorflow/lite/micro/kernels/ifx_common/offline_prepare_utils.h" +#endif + +#include +#include +#include +#include +#include + + +namespace tflite { +namespace micro { + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + +// Vector: needs a named sub-initializer that has to be output first +CppItems &CppItems::operator<<(const char *literal) { + elements_.push_back( + std::unique_ptr(new CppLiteral(literal))); + return *this; +} + +CppItems &CppItems::operator<<(float value) { + elements_.push_back(std::unique_ptr( + new CppPrimitiveInitializer(value))); + return *this; +} + +CppItems &CppItems::operator<<(const CppNamedStruct &structref) { + elements_.push_back(std::unique_ptr( + new CppNamedStruct(structref))); + return *this; +} + +CppItems &CppItems::operator<<(const CppPODStructInitializer &substruct) { + elements_.push_back(std::unique_ptr( + new CppPODStructInitializer(substruct))); + return *this; +} + + +// TODO Fold into CppInitializerCollector + +class BaseCollector { + public: + BaseCollector() {} + + void recordLiteralForPointer(void *ptr, const std::string &identifier) { + pointer_literals_[ptr] = identifier; + } + + std::string getLiteralForPointer(void *ptr) { + std::string res; + auto lit_i = pointer_literals_.find(ptr); + if (lit_i != pointer_literals_.end()) { + res = lit_i->second; + } + return res; + } + + protected: + // LUT to find name for pointer (mainly intendded for function pointers) + std::map pointer_literals_; + std::string output_path_; +}; + +// +// singleton owning all all pointer collector implementations +// Used to implement auto-dump on exit without dependency +// on static object destruction ordering. +// + +class CppInitializerCollector : public BaseCollector { +protected: + CppInitializerCollector(); + public: + static CppInitializerCollector &instance(); + + void recordOpDataHeaders(const char *op_name, const char *headers, + const char *type); + + void recordStaticOpdata(const char *op_name, CppItems *op_data); + + void writeStaticOpDataHeaders(std::ostream &os); + + void writeStaticOpDataDefinitions(const std::string &prefix, std::ostream &os); + + size_t constDataSize() const; + + size_t initDataSize() const; + + size_t uninitDataSize() const; + + // Scratch buffer recording suuproted only for unit-testing static op data recording + // auto-dump. Post-compiler intercepts all Allocation requests itself + +#if TF_LITE_MICRO_AUTO_DUMP_POINTER_TABLES + int recordScratchBuffer(ptrdiff_t offset_from_head); + + ptrdiff_t getRecordedScratchBufferStart(int globally_unique_buf_idx); + + void writeRecordedScratchBufferAllocations(std::ostream &os); + + void codegenRecordedOpdata() { + std::fstream myfile( + "gen/autodumped_src/static_eval_tables.cc", std::fstream::out | std::fstream::trunc); + myfile << "#include \"tensorflow/lite/c/common.h\"\n" + "#include \"tensorflow/lite/c/builtin_op_data.h\"\n" + "#include \"tflite_u_preint/static_init_support.h\"\n" + "\n"; + writeStaticOpDataHeaders(myfile); + myfile << "\n"; + writeStaticOpDataDefinitions("autorecord_", myfile); + myfile << "\n"; + + // Needed for unit-tests as KernelRunner (etc) don't inject recording + // of buffer Allocation + writeRecordedScratchBufferAllocations(myfile); + myfile.close(); + } + + static void autoDumpOpDataTables() { + instance().codegenRecordedOpdata(); + } + + ~CppInitializerCollector() { + } + +#endif + + std::map op_headers_; + + // Map associating operator supporting static initializatino data + // with required headers (identified via node pointer) + // with recorded C++ static initialization data + std::map> + per_inst_user_data_; + + +#if TF_LITE_MICRO_AUTO_DUMP_POINTER_TABLES + /** + * @brief Allocated scratch buffer starts in tensor arena + * + */ + std::vector scratch_buf_allocations_; +#endif + /** + * @brief Recorded per-instance op user_data sequence + * + * Per-op user data in order of op invocation (identified by op-type and + * instance in model execution order) + */ + + struct OpInstUserData { + std::string op_name; //!< Op type name + size_t user_data_idx; //!< Instance of op type in model + }; + + std::vector op_user_data_; + +}; + + +CppInitializerCollector::CppInitializerCollector() + { + } + + +CppInitializerCollector &CppInitializerCollector::instance() { + + /* We manually created a object destructed on exit as not all our + embedded/semi-hosted environments seem to support C++ static object + destruction on exit */ + static CppInitializerCollector *inst = nullptr; + if( inst != nullptr) { + return *inst; + } + inst = new CppInitializerCollector; + + // For autodump based testing we generate C++ source with + // the captured op user_data and buffer memory allocations... + // on exit... +#if TF_LITE_MICRO_AUTO_DUMP_POINTER_TABLES + atexit(CppInitializerCollector::autoDumpOpDataTables); +#endif + return *inst; +} + +void CppInitializerCollector::recordOpDataHeaders(const char *op_name, + const char *headers, + const char *op_data_type) { + std::string key(op_name); + auto &headers_for_op = op_headers_[key]; + assert(headers_for_op.empty()); + headers_for_op = std::string(headers); + + // Create a Named struct vector record to hold the per-instance op user_data + // for instances of this operator type. + auto &op_user_data = per_inst_user_data_[key]; + op_user_data.reset( + new CppNamedStructVecInitializer("op_user_data", op_data_type)); +} + +void CppInitializerCollector::recordStaticOpdata(const char *op_name, + CppItems *op_data) { + std::string key(op_name); + auto &inst_user_data = per_inst_user_data_[key]; + size_t inst_idx = inst_user_data->getSize(); + auto pod_init = new CppPODStructInitializer(op_data); + inst_user_data->pushBackElt(pod_init); + + + // Record reference to op-data to provide to this op instance during execution + OpInstUserData user_data_ref = {op_name, inst_idx}; + op_user_data_.push_back(user_data_ref); +} + + + +void CppInitializerCollector::writeStaticOpDataHeaders(std::ostream &os) { + for (auto &hdr_i : op_headers_) { + os << hdr_i.second; + os << "\n"; + } +} + +void CppInitializerCollector::writeStaticOpDataDefinitions(const std::string &prefix, std::ostream &os) { + os << "namespace tflite {\n" + "namespace ops {\n" + "namespace micro {\n\n"; + // Op user_data tables (one per op-type supporting offline pre-computed user-data) + for (auto &id_i : per_inst_user_data_) { + os << "namespace " << id_i.first << " {\n\n"; + id_i.second->cppDefinition(os, prefix); + os << "} // namespace " << id_i.first << "\n\n"; + } + + os << "} // namespace micro\n" + "} // namespace ops\n\n" + + "namespace micro {\n" + "namespace " << prefix << "model {\n"; + + // Table of op user_data in op invocation order + os << "void *precomputed_op_user_data[] = {\n"; + for (auto &ud_ref_i : op_user_data_ ) { + os << " &tflite::ops::micro::" << ud_ref_i.op_name << "::" << prefix << "op_user_data[" << ud_ref_i.user_data_idx << "],\n"; + } + os << "};\n\n"; + + os << "} // namespace " << prefix << "model\n"; + + os << "} // namespace micro\n"; + os << "} // namespace tflite\n"; +} + +size_t CppInitializerCollector::initDataSize() const { + + // Currently due to non-const clean-ness in tflite(u) + // we are generate ALL OpData as initialized non-const data. + // Hence consumes value size in ROM AND RAM. + size_t usage = 0; + for (auto &id_i : per_inst_user_data_) { + usage += id_i.second->value_size(); + } + return usage; +} + + +size_t CppInitializerCollector::uninitDataSize() const { + return 0; +} + +size_t CppInitializerCollector::constDataSize() const { + return 0; +} + +#if TF_LITE_MICRO_AUTO_DUMP_POINTER_TABLES + +int CppInitializerCollector::recordScratchBuffer(ptrdiff_t offset_from_head) { + int globally_unique_buf_idx = static_cast(scratch_buf_allocations_.size()); + scratch_buf_allocations_.push_back(offset_from_head); + return globally_unique_buf_idx; +} + + +ptrdiff_t CppInitializerCollector::getRecordedScratchBufferStart(int globally_unique_buf_idx) { + if (globally_unique_buf_idx < 0 || static_cast(globally_unique_buf_idx) >= scratch_buf_allocations_.size()) { + return static_cast(0xdeadbeef); + } else { + return scratch_buf_allocations_[globally_unique_buf_idx]; + } +} + + +void CppInitializerCollector::writeRecordedScratchBufferAllocations(std::ostream &os) +{ + os << "namespace tflite {\n" + << "namespace micro {\n\n"; + + if (scratch_buf_allocations_.size() == 0) { + os << +R"( +ptrdiff_t getRecordedScratchBufferStart(int buf_idx) { + return 0xdeadbeef; +} +)"; + } else { + os << +R"( +ptrdiff_t scratch_buffer_allocations[] = { +)"; + size_t offsets = 0; + for (auto o : scratch_buf_allocations_) { + os << std::to_string(o) << ","; + ++offsets; + if (offsets % 10 == 0) { + os << "\n"; + } else { + os << " "; + } + } + os << +R"( +}; + +ptrdiff_t getRecordedScratchBufferStart(int globally_unique_buf_idx) { + const int num_sbuf_allocs = static_cast(sizeof(scratch_buffer_allocations) / sizeof(ptrdiff_t)); + if (globally_unique_buf_idx < 0 || globally_unique_buf_idx >= num_sbuf_allocs) { + return 0xdeadbeef; + } else { + return scratch_buffer_allocations[globally_unique_buf_idx]; + } +} + +)"; + } + os << "} // namespace micro\n" + "} // namespace tflite\n"; +} +#endif + +void CppPointerLiteral::cppInitializer(std::ostream &os, + const std::string &id_prefix) { + auto literal = CppInitializerCollector::instance().getLiteralForPointer(ptr_); + assert(!literal.empty()); + os << literal; +} + +// +// Primary entry point for tflite(u) post-compiler... +// + +void writeStaticOpDataHeaders(std::ostream &os) { + CppInitializerCollector::instance().writeStaticOpDataHeaders(os); +} + +void writeStaticOpDataDefinitions(const std::string &prefix, std::ostream &os) { + CppInitializerCollector::instance().writeStaticOpDataDefinitions(prefix, os); +} + +void recordStaticOpdata(const char *op_name, CppItems *op_data) { + CppInitializerCollector::instance().recordStaticOpdata(op_name, op_data); +} + +void recordLiteralForPointer(const std::string &literal, void *ptr) { + CppInitializerCollector::instance().recordLiteralForPointer(ptr, literal); +} + +size_t initDataUsage() { + return CppInitializerCollector::instance().initDataSize(); +} + +size_t uninitDataUsage() { + return CppInitializerCollector::instance().uninitDataSize(); +} + +size_t constDataUsage() { + return CppInitializerCollector::instance().constDataSize(); +} + + +DefineStaticOpDataHeaders::DefineStaticOpDataHeaders( + const char *op_name, const char *headers, const char *user_data_type) { + CppInitializerCollector::instance().recordOpDataHeaders(op_name, headers, + user_data_type); +} + +#endif + +#if TF_LITE_MICRO_AUTO_DUMP_POINTER_TABLES +int recordScratchBuffer(ptrdiff_t offset_from_head) { + return CppInitializerCollector::instance().recordScratchBuffer(offset_from_head); +} +#endif + + +#if TF_LITE_MICRO_AUTO_DUMP_POINTER_TABLES +ptrdiff_t getRecordedScratchBufferStart(int globally_unique_buf_idx) { + return CppInitializerCollector::instance().getRecordedScratchBufferStart(globally_unique_buf_idx); +} +#endif + + +#if TF_LITE_MICRO_AUTO_DUMPED_OPDATA + +// Provided by autorecord-ed generated op user_data code.... + +namespace autorecord_model { +extern void *precomputed_op_user_data[]; +} // namespace autorecord_mdoel + +void selectAutoDumpedOfflineOpUserData() { + resetOfflineOpUserData(autorecord_model::precomputed_op_user_data); +} +#endif + + +} // namespace micro +} // namespace tflite diff --git a/tflite_u_preint/static_init_support.h b/tflite_u_preint/static_init_support.h new file mode 100644 index 0000000..cc5a825 --- /dev/null +++ b/tflite_u_preint/static_init_support.h @@ -0,0 +1,528 @@ +/* + * static_init_support.h + * + * Created on: 10.08.2020 + * Author: stevensa + */ + +#ifndef TFLMCOMPILER_STATIC_INIT_SUPPORT_H_ +#define TFLMCOMPILER_STATIC_INIT_SUPPORT_H_ + + + + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/op_macros.h" + +#include +#include +#include +#include +#include +#include +#include + + +namespace tflite { +namespace micro { + +#if TF_LITE_MICRO_RECORD_OP_USER_DATA + +class BaseCollector; + +class CppNamedStruct; +class CppPODStructInitializer; + +struct CppInitializerBase { + virtual void cppInitializer(std::ostream &os, + const std::string &id_prefix) = 0; + virtual void cppDefinition(std::ostream &os, + const std::string &id_prefix) = 0; + + /** + * @brief Memory need to hold value of an item to initialize POD struct member + * + * + * @return size_t + */ + virtual size_t value_size() const { return 0; }; + + /** + * @brief Memory need to reference the value of the item as a struct menber + * + * For items located inline this will be 0, for named itsemf the size of + * a pointer / ref to the item. + * + * @return size_t + */ + virtual size_t ref_size() const { return 0; }; + + /** + * @brief Alignment constraint for value / ref POD struct member initializer + * @return size_t + */ + virtual size_t align() const { return 1; }; + + + /** + * @brief Compute aount of padding need to achieve alignment constraint + * + * @param prev_item_end End of previous item in assumed address space. + * @param required_alignment Required alignment in assumed address space + * @return size_t Padding required to achive alignment + */ + inline static size_t alignment_padding(size_t prev_item_end, size_t required_alignment) { + size_t misalign = prev_item_end%required_alignment; + return misalign != 0 ? required_alignment-misalign : 0; + } + + + virtual ~CppInitializerBase() {} + +}; + +template +class CppPrimitiveInitializer : public CppInitializerBase { + public: + CppPrimitiveInitializer(const T val) : val_(val) {} + + void cppDefinition(std::ostream &os, const std::string &id_prefix) {} + + void cppInitializer(std::ostream &os, const std::string &id_prefix) { + os << std::to_string(val_); + } + + size_t value_size() const { + return sizeof(T); + } + + size_t align() const { + return alignof(T); + } + + protected: + T val_; +}; + +class CppNamedItemBase : virtual public CppInitializerBase { + protected: + CppNamedItemBase() {} + + public: + CppNamedItemBase(const char *id) : id_(id) {} + + const char *getId() const { return id_; } + + protected: + const char *id_; +}; + +class CppInitializerReference : public CppNamedItemBase { + public: + CppInitializerReference(const char *id) : CppNamedItemBase(id) {} + + void cppInitializer(std::ostream &os, const std::string &id_prefix) { + os << id_prefix << id_; + } + + // A little dirty but fortunately exotica with varying pointer sizes + // not our worry... + size_t ref_size() const { return sizeof(int &); } + size_t align() const { return alignof(int &); } +}; + + +class CppInitializerPointer : public CppNamedItemBase { + public: + CppInitializerPointer(const char *id) : CppNamedItemBase(id) {} + + void cppDefinition(std::ostream &os, const std::string &id_prefix) {} + + void cppInitializer(std::ostream &os, const std::string &id_prefix) { + os << "&" << id_prefix << id_; + } + + // A little dirty but fortunately exotica with varying pointer sizes + // not our worry... + size_t ref_size() const { return sizeof(int *); } + size_t align() const { return alignof(int *); } + +}; + + +class CppLiteral : public CppInitializerBase { + public: + CppLiteral(const char *literal) : literal_(literal) {} + + CppLiteral(const std::string &literal) : literal_(literal) {} + + CppLiteral(std::string &&literal) + : literal_(std::forward(literal)) {} + + void cppDefinition(std::ostream &os, const std::string &id_prefix) {} + + void cppInitializer(std::ostream &os, const std::string &id_prefix) { + os << literal_; + } + + size_t value_size() const { return sizeof(int *); } + size_t align() const { return sizeof(int *); } + + protected: + std::string literal_; +}; + + +class CppPointerLiteral : public CppInitializerBase { + public: + CppPointerLiteral(void *ptr) : ptr_(ptr) {} + + + void cppDefinition(std::ostream &os, const std::string &id_prefix) {} + + void cppInitializer(std::ostream &os, const std::string &id_prefix); + + // A little dirty but fortunately exotica with varying pointer sizes + // not our worry... + size_t value_size() const { return sizeof(void *); } + size_t align() const { return alignof(void *); } + + protected: + void *ptr_; +}; + + +class CppDefinitionBase : public CppNamedItemBase { + public: + CppDefinitionBase(const char *id, const char *type) + : CppNamedItemBase(id), type_(type) {} + + void cppInitializer(std::ostream &os, const std::string &id_prefix) { + os << id_prefix << id_; + } + const char *getType() const { return type_; } + + protected: + const char *type_; +}; + + +template +class CppNamedVec : public CppDefinitionBase { + public: + CppNamedVec(const char *id, const char *type, const T *data, size_t len) + : CppDefinitionBase(id, type) + , null_(data == nullptr) { + if (!null_) { + for (size_t i = 0; i < len; ++i) { + data_.push_back(data[i]); + } + } + } + + void cppDefinition(std::ostream &os, const std::string &id_prefix) { + if (null_) { + os << "constexpr " << type_ << " *" << id_prefix << id_ << " = nullptr;\n"; + } else { + os << type_ << " " << id_prefix << id_ << "[] = {\n"; + for (size_t i = 0; i < data_.size(); ++i) { + os << data_[i] << ", "; + } + os << "\n};\n"; + } + } + + + size_t value_size() const { return sizeof(T)*data_.size(); } + size_t ref_size() const { return sizeof(T *); } + size_t align() const { return alignof(T *); } + protected: + // We have copy data as (de)allocation before serialization is possible + std::vector data_; + bool null_; +}; + + +class CppItems { + public: + CppItems() {} + + template + typename std::enable_if::value, CppItems &>::type + operator<<(T value) { + elements_.push_back(std::unique_ptr( + new CppPrimitiveInitializer(value))); + return *this; + } + + // For pointer to array: needs a named sub-initializer that has to be output first + template + CppItems &operator<<(const CppNamedVec &subvec); + + CppItems &operator<<(const char *literal); + + CppItems &operator<<(float fvalue); + + + template + typename std::enable_if::value, + CppItems &>::type + operator<<(T value); + + // Pointer to structure: needs a named sub-initializer that has to be output first + CppItems &operator<<(const CppNamedStruct &structref); + + // For sub-strucuture: an + CppItems &operator<<(const CppPODStructInitializer &substruct); + + typedef std::deque> named_subinits_t; + typedef std::vector> elements_t; + + + const elements_t &elements() const { return elements_; } + + size_t value_size() const { + size_t init_size = 0; + size_t values_size = 0; + for( auto &e : elements_) { + auto e_align = e->align(); + auto padding = CppInitializerBase::alignment_padding(init_size, e_align); + init_size += padding + e->ref_size(); + values_size += e->value_size(); + } + // TODO: really we should allow for padding between values too! + return init_size+values_size; + } + + size_t align() const { + if (elements_.empty()) { + return 1; + } else { + return elements_[0]->align(); + } + } + +protected: + elements_t elements_; + +}; // namespace micro + + +class CppPODStructInitializer : public CppInitializerBase { + public: + CppPODStructInitializer(CppItems *cppitems) + : cppitems_(cppitems) + { + } + + + void cppDefinition(std::ostream &os, const std::string &id_prefix) { + for (auto &si : cppitems_->elements()) { + si->cppDefinition(os, id_prefix); + } + } + + void cppInitializer(std::ostream &os, const std::string &id_prefix) { + os << "{"; + auto &elts = cppitems_->elements(); + for (size_t i = 0; i < elts.size(); ++i) { + if (i > 0) { + os << ", "; + } + elts[i]->cppInitializer(os, id_prefix); + } + os << "}"; + } + + size_t value_size() const { + return cppitems_->value_size(); + } + + size_t align() const { + return cppitems_->align(); + } + +protected: + + std::shared_ptr cppitems_; + +}; // namespace micro + + /** + * @todo really, this should be named CppPtrToNamedStruct + */ +class CppNamedStruct : public CppDefinitionBase { + public: + CppNamedStruct(const char *id, const char *type, CppItems *cppitems) + : CppDefinitionBase(id, type) + , cppitems_(cppitems) + {} + + + void cppInitializer(std::ostream &os, const std::string &id_prefix) { + os << "&" << id_prefix << id_; + } + + void cppDefinition(std::ostream &os, const std::string &id_prefix) { + std::string sub_prefix = id_prefix + id_ + "_"; + cppitems_.cppDefinition(os, sub_prefix); + os << type_ << " " << id_prefix << id_ << " = \n"; + cppitems_.cppInitializer(os, sub_prefix); + os << ";\n"; + } + + size_t ref_size() const { + return sizeof(int *); + } + + size_t value_size() const { + return cppitems_.value_size(); + } + + size_t align() const { + return alignof(int *); + } + + +protected: + CppPODStructInitializer cppitems_; +}; + + +class CppNamedStructVecInitializer : public CppDefinitionBase { + public: + CppNamedStructVecInitializer(const char *id, const char *type) + : CppDefinitionBase(id, type) {} + + + void cppDefinition(std::ostream &os, const std::string &id_prefix) { + for (size_t i = 0; i < elts_.size(); ++i) { + std::string sub_prefix = id_prefix + id_ + std::to_string(i) + "_"; + elts_[i]->cppDefinition(os, sub_prefix); + } + os << getType() << " " << id_prefix << id_ << "[] = {\n"; + for (size_t i = 0; i < elts_.size(); ++i) { + os << " "; + std::string sub_prefix = id_prefix + id_ + std::to_string(i) + "_"; + elts_[i]->cppInitializer(os, sub_prefix); + if (i < elts_.size()-1) { + os << ", "; + } + os << "\n"; + } + os << "};\n"; + } + + void pushBackElt(CppPODStructInitializer *elt) { + elts_.push_back(std::unique_ptr(elt)); + } + + + size_t getSize() const { return elts_.size(); } + + size_t ref_size() const { + return sizeof(int *); + } + + size_t value_size() const { + if(elts_.empty()) { + return 0; + } else { + auto value_size = elts_[0]->value_size(); + auto alignment = elts_[0]->align(); + auto padding = alignment_padding(value_size, alignment); + auto aligned_size = value_size+padding; + return aligned_size*elts_.size(); + } + } + + size_t align() const { + return alignof(int *); + } + + protected: + std::vector> elts_; + +}; + +// +// Implementation of CppItems stream ops +// + +template +CppItems &CppItems::operator<<(const CppNamedVec &subvec) { + elements_.push_back(std::unique_ptr( + new CppNamedVec (subvec)) + ); + return *this; +} + + +template +typename std::enable_if::value, + CppItems &>::type +CppItems::operator<<(T value) { + elements_.push_back(std::unique_ptr( + new CppPointerLiteral(reinterpret_cast(value)))); + return *this; +} + + +// +// Primary entry-points for tflite(u) post-compiler... +// + +void writeStaticOpDataHeaders(std::ostream &os); + +void writeStaticOpDataDefinitions(const std::string &prefix, std::ostream &os); + +void recordStaticOpdata(const char *op_name, CppItems *op_data); + +void recordLiteralForPointer(const std::string &literal, void *ptr); + +size_t initDataUsage(); + +size_t uninitDataUsage(); + +size_t constDataUsage(); + +class DefineStaticOpDataHeaders { + public: + DefineStaticOpDataHeaders(const char *op_name, const char *headers, + const char *user_data_type); +}; +#endif + +#if TF_LITE_MICRO_AUTO_DUMP_POINTER_TABLES +/** + * @brief Record new scratch buffers offset in tensor arena + * + * @param offset_from_head Tensor arena offset + * @return int Globally unique index to identify this scratch buffer + */ + +int recordScratchBuffer(ptrdiff_t offset_from_head); +#endif + +#if TF_LITE_MICRO_AUTO_DUMPED_OPDATA || TF_LITE_MICRO_AUTO_DUMP_POINTER_TABLES +/** + * @brief Get offset in tensor arena of start of specified (allocated) scratch buffer + * + * @param globally_unique_buf_idx (Globally unique buffer index from @c recordScratchBuffer) + * @return ptrdiff_t Scratch buffer start as offset into tensor arena. + * + */ + +ptrdiff_t getRecordedScratchBufferStart(int globally_unique_buf_idx); +#endif + +#if TF_LITE_MICRO_AUTO_DUMPED_OPDATA + void selectAutoDumpedOfflineOpUserData(); +#endif + +} // namespace micro +} // namespace tflite + + + +#endif /* TFLMCOMPILER_STATIC_INIT_SUPPORT_H_ */ From 18c063a14d903cc824f9ad3a18649f83bada3b4d Mon Sep 17 00:00:00 2001 From: "Philipp v. K" Date: Mon, 13 Jun 2022 11:57:15 +0200 Subject: [PATCH 02/14] Fix cmake standalone build of tflite-micro lib --- CMakeLists.txt | 1 + cmake/FindTFLite.cmake | 87 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b93a33..c5f9021 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,6 +74,7 @@ TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC ${TF_LIB}) TARGET_COMPILE_DEFINITIONS(${PROJECT_NAME} PUBLIC TF_LITE_STATIC_MEMORY + TFLMC_USE_INTERPRETER_HOOKS TF_LITE_DISABLE_X86_NEON SUFFICIENT_ARENA_SIZE=128*1024*1024 ) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index c8389bc..f8fc3d2 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -87,15 +87,31 @@ ENDIF() SET(TFL_SRC ${TF_SRC}/tensorflow/lite) SET(TFLM_SRC ${TFL_SRC}/micro) SET(TFLD_SRC ${TFL_SRC}/tools/make/downloads) +SET(TFLMD_SRC ${TFLM_SRC}/tools/make/downloads) IF(EXISTS ${TFLD_SRC}/flatbuffers/include) LIST(APPEND TFL_INC_DIRS ${TFLD_SRC}/flatbuffers/include) +ELSEIF(EXISTS ${TFLMD_SRC}/flatbuffers/include) + LIST(APPEND TFL_INC_DIRS ${TFLMD_SRC}/flatbuffers/include) ENDIF() IF(EXISTS ${TFLD_SRC}/gemmlowp) - LIST(APPEND ${TFLD_SRC}/gemmlowp) + LIST(APPEND TFL_INC_DIRS ${TFLD_SRC}/gemmlowp) +ELSEIF(EXISTS ${TFLMD_SRC}/gemmlowp) + LIST(APPEND TFL_INC_DIRS ${TFLMD_SRC}/gemmlowp) ENDIF() +IF(EXISTS ${TFLD_SRC}/ruy) + LIST(APPEND TFL_INC_DIRS ${TFLD_SRC}/ruy) +ELSEIF(EXISTS ${TFLMD_SRC}/ruy) + LIST(APPEND TFL_INC_DIRS ${TFLMD_SRC}/ruy) +ENDIF() + +# SET(CUSTOM_QUANT_SRC ${TFL_SRC}/experimental/custom_quantization_util.cc) +# IF(EXISTS ${CUSTOM_QUANT_SRC}) +# SET(TFL_OPT_SRCS ${CUSTOM_QUANT_SRC}) +# ENDIF() + LIST(APPEND TFL_INC_DIRS ${TF_SRC} ) @@ -103,19 +119,55 @@ LIST(APPEND TFL_INC_DIRS FILE(GLOB TFL_ROOT_SRCS ${TFLM_SRC}/*.cc ) +# schema_utils.cc only exists for newer TF versions +IF(EXISTS ${TFL_SRC}/schema/schema_utils.cc) + LIST(APPEND TFL_ROOT_SRCS ${TFL_SRC}/schema/schema_utils.cc) +ENDIF() FILE(GLOB TFL_KERNELS_SRCS ${TFLM_SRC}/kernels/*.cc ${TFL_SRC}/kernels/internal/quantization_util.cc ${TFL_SRC}/kernels/kernel_util.cc + ${TFLM_SRC}/kernels/kernel_util.cc ) +# These ones carry an unwanted dependecy (TODO: Fix) +FILE(GLOB TFL_KERNELS_TO_REMOVE + # ${TFLM_SRC}/kernels/depth_to_space.cc + # ${TFLM_SRC}/kernels/space_to_depth.cc + # ${TFLM_SRC}/kernels/gather.cc + # ${TFLM_SRC}/kernels/transpose.cc + # ${TFLM_SRC}/kernels/floor_mod.cc + # ${TFLM_SRC}/kernels/floor_div.cc + ) +FOREACH(src ${TFL_KERNELS_TO_REMOVE}) + LIST(FIND TFL_KERNELS_SRCS ${src} TFL_KERNELS_SRCS_FOUND_INDEX) + IF(${TFL_KERNELS_SRCS_FOUND_INDEX} GREATER_EQUAL 0) + LIST(REMOVE_ITEM TFL_KERNELS_SRCS ${src}) + ENDIF() +ENDFOREACH() + +IF(TFLM_EXTRA_KERNELS) + FILE(GLOB TFL_EXTRA_KERNEL_SRCS + ${TFLM_SRC}/kernels/${TFLM_EXTRA_KERNELS}/*.cc + ) + FOREACH(src ${TFL_EXTRA_KERNEL_SRCS}) + GET_FILENAME_COMPONENT(src_name ${src} NAME) + SET(src_path "${TFLM_SRC}/kernels/${src_name}") + LIST(FIND TFL_KERNELS_SRCS ${src_path} TFL_KERNELS_SRCS_FOUND_INDEX) + IF(${TFL_KERNELS_SRCS_FOUND_INDEX} GREATER_EQUAL 0) + MESSAGE(STATUS "Replacing TFLM version of ${src_name} by ${TFLM_EXTRA_KERNELS} variant...") + LIST(REMOVE_ITEM TFL_KERNELS_SRCS ${src_path}) + ENDIF() + ENDFOREACH() +ENDIF() + FILE(GLOB TFL_CORE_API_SRCS ${TFL_SRC}/core/api/*.cc ) FILE(GLOB TFL_C_SRCS - ${TFL_SRC}/c/common.c + ${TFL_SRC}/c/common.cc ) FILE(GLOB TFL_MEM_PLANNER_SRCS @@ -125,30 +177,61 @@ FILE(GLOB TFL_MEM_PLANNER_SRCS SET(TFL_SRCS ${TFL_ROOT_SRCS} ${TFL_KERNELS_SRCS} + ${TFL_EXTRA_KERNEL_SRCS} ${TFL_CORE_API_SRCS} ${TFL_C_SRCS} ${TFL_MEM_PLANNER_SRCS} + ${TFL_OPT_SRCS} ) +MESSAGE(STATUS "TFL_SRCS=${TFL_SRCS}") + LIST(FILTER TFL_SRCS EXCLUDE REGEX "([a-z0-9_]+_test.cc)$") +IF(RECORD_STATIC_KERNELS) + LIST(APPEND TFL_INC_DIRS ${TFLITE_STATIC_INIT_PATH}) + LIST(APPEND TFL_SRCS + ${TFLITE_STATIC_INIT_PATH}/static_data_utils.cc + ${TFLITE_STATIC_INIT_PATH}/static_init_support.cc + ) +ENDIF() + ADD_LIBRARY(${LIB_NAME} STATIC ${TFL_SRCS} ) TARGET_INCLUDE_DIRECTORIES(${LIB_NAME} PUBLIC ${TFL_INC_DIRS} + /home/philipp/src/tflmc/CMSIS_5/ + /home/philipp/src/tflmc/CMSIS_5/CMSIS/Core/Include/ + /home/philipp/src/tflmc/CMSIS_5/CMSIS/NN/Include/ + /home/philipp/src/tflmc/CMSIS_5/CMSIS/DSP/Include/ ) +TARGET_LINK_LIBRARIES(${LIB_NAME} PUBLIC /home/philipp/src/tflmc/CMSIS_5/CMSIS/NN/build/Source/libcmsis-nn.a) + TARGET_COMPILE_DEFINITIONS(${LIB_NAME} PUBLIC TF_LITE_USE_GLOBAL_MAX TF_LITE_USE_GLOBAL_MIN TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS TF_LITE_STATIC_MEMORY TFLITE_EMULATE_FLOAT + CMSIS_NN "$<$:TF_LITE_STRIP_ERROR_STRINGS>" ) +TARGET_COMPILE_DEFINITIONS(${LIB_NAME} PUBLIC + PREINTERPRETER +) + +IF(RECORD_STATIC_KERNELS) + TARGET_COMPILE_DEFINITIONS(${LIB_NAME} PUBLIC + TF_LITE_MICRO_RECORD_STATIC_KERNEL_VARIANT + TF_LITE_MICRO_AUTO_DUMP_POINTER_TABLES + STATIC_INIT_OUT_FILE="${TF_SRC}/tensorflow/lite/micro/kernels/recorded_model/static_eval_tables.cc" + ) +ENDIF() + SET(TFLite_INCLUDE_DIRS ${TFL_INC_DIRS} ) From e3f16d2f1f2712bb04b8c4d0df804c03f8a38e4e Mon Sep 17 00:00:00 2001 From: "Philipp v. K" Date: Wed, 22 Jun 2022 17:31:22 +0200 Subject: [PATCH 03/14] CMake: link arena_allocator sources --- cmake/FindTFLite.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index f8fc3d2..5991c4e 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -174,6 +174,10 @@ FILE(GLOB TFL_MEM_PLANNER_SRCS ${TFLM_SRC}/memory_planner/*.cc ) +FILE(GLOB TFL_ARENA_ALLOCATOR_SRCS + ${TFLM_SRC}/arena_allocator/*.cc + ) + SET(TFL_SRCS ${TFL_ROOT_SRCS} ${TFL_KERNELS_SRCS} @@ -181,6 +185,7 @@ SET(TFL_SRCS ${TFL_CORE_API_SRCS} ${TFL_C_SRCS} ${TFL_MEM_PLANNER_SRCS} + ${TFL_ARENA_ALLOCATOR_SRCS} ${TFL_OPT_SRCS} ) From b2d6b36ceab2000f0a1c3e0725a52001ce783482 Mon Sep 17 00:00:00 2001 From: "Philipp v. K" Date: Wed, 22 Jun 2022 17:52:14 +0200 Subject: [PATCH 04/14] CMake: use standalone tflite repository --- cmake/FindTFLite.cmake | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index 5991c4e..09a3079 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -12,7 +12,7 @@ IF(NOT TF_SRC) MESSAGE(STATUS "Getting TF tag '${TF_TAG}' and not master") FetchContent_Declare( tf - GIT_REPOSITORY https://github.com/tensorflow/tensorflow.git + GIT_REPOSITORY https://github.com/tensorflow/tflite-micro.git GIT_PROGRESS FALSE GIT_REMOTE_UPDATE_STRATEGY REBASE_CHECKOUT GIT_TAG ${TF_TAG} @@ -22,7 +22,7 @@ IF(NOT TF_SRC) MESSAGE(STATUS "Getting TF commit '${TF_COMMIT}' and not master") FetchContent_Declare( tf - GIT_REPOSITORY https://github.com/tensorflow/tensorflow.git + GIT_REPOSITORY https://github.com/tensorflow/tflite-micro.git GIT_PROGRESS FALSE GIT_REMOTE_UPDATE_STRATEGY REBASE_CHECKOUT GIT_TAG ${TF_COMMIT} @@ -31,9 +31,10 @@ IF(NOT TF_SRC) ELSE() FetchContent_Declare( tf - GIT_REPOSITORY https://github.com/tensorflow/tensorflow.git + GIT_REPOSITORY https://github.com/tensorflow/tflite-micro.git GIT_PROGRESS FALSE GIT_REMOTE_UPDATE_STRATEGY REBASE_CHECKOUT + GIT_TAG main QUIET ) ENDIF() From ac12e0acc517f9816f28c6e904d3c8181f103b15 Mon Sep 17 00:00:00 2001 From: "Philipp v. K" Date: Wed, 22 Jun 2022 17:57:08 +0200 Subject: [PATCH 05/14] CMake: remove unused code --- cmake/FindTFLite.cmake | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index 09a3079..e9e3fe0 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -132,22 +132,6 @@ FILE(GLOB TFL_KERNELS_SRCS ${TFLM_SRC}/kernels/kernel_util.cc ) -# These ones carry an unwanted dependecy (TODO: Fix) -FILE(GLOB TFL_KERNELS_TO_REMOVE - # ${TFLM_SRC}/kernels/depth_to_space.cc - # ${TFLM_SRC}/kernels/space_to_depth.cc - # ${TFLM_SRC}/kernels/gather.cc - # ${TFLM_SRC}/kernels/transpose.cc - # ${TFLM_SRC}/kernels/floor_mod.cc - # ${TFLM_SRC}/kernels/floor_div.cc - ) -FOREACH(src ${TFL_KERNELS_TO_REMOVE}) - LIST(FIND TFL_KERNELS_SRCS ${src} TFL_KERNELS_SRCS_FOUND_INDEX) - IF(${TFL_KERNELS_SRCS_FOUND_INDEX} GREATER_EQUAL 0) - LIST(REMOVE_ITEM TFL_KERNELS_SRCS ${src}) - ENDIF() -ENDFOREACH() - IF(TFLM_EXTRA_KERNELS) FILE(GLOB TFL_EXTRA_KERNEL_SRCS ${TFLM_SRC}/kernels/${TFLM_EXTRA_KERNELS}/*.cc @@ -190,7 +174,6 @@ SET(TFL_SRCS ${TFL_OPT_SRCS} ) -MESSAGE(STATUS "TFL_SRCS=${TFL_SRCS}") LIST(FILTER TFL_SRCS EXCLUDE REGEX "([a-z0-9_]+_test.cc)$") From 727adae28c59a5eb83fb331414a3b5189160a367 Mon Sep 17 00:00:00 2001 From: "Philipp v. K" Date: Wed, 22 Jun 2022 17:53:35 +0200 Subject: [PATCH 06/14] CMake: remove custom quant code --- cmake/FindTFLite.cmake | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index e9e3fe0..7a24eac 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -108,11 +108,6 @@ ELSEIF(EXISTS ${TFLMD_SRC}/ruy) LIST(APPEND TFL_INC_DIRS ${TFLMD_SRC}/ruy) ENDIF() -# SET(CUSTOM_QUANT_SRC ${TFL_SRC}/experimental/custom_quantization_util.cc) -# IF(EXISTS ${CUSTOM_QUANT_SRC}) -# SET(TFL_OPT_SRCS ${CUSTOM_QUANT_SRC}) -# ENDIF() - LIST(APPEND TFL_INC_DIRS ${TF_SRC} ) From a33b847435f7530480916f5ba047494a0ba1df65 Mon Sep 17 00:00:00 2001 From: "Philipp v. K" Date: Wed, 22 Jun 2022 17:57:17 +0200 Subject: [PATCH 07/14] CMake: do not hardcode cmsisnn --- cmake/FindTFLite.cmake | 51 +++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index 7a24eac..c979b00 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -90,6 +90,23 @@ SET(TFLM_SRC ${TFL_SRC}/micro) SET(TFLD_SRC ${TFL_SRC}/tools/make/downloads) SET(TFLMD_SRC ${TFLM_SRC}/tools/make/downloads) + +IF(TFLM_OPTIMIZED_KERNEL) + # Suboptimal but we do not want to hardcode every kernel which should be replaced... + FILE(GLOB TFLM_EXTRA_KERNEL_SRCS ${TFLM_SRC}/kernels/${TFLM_OPTIMIZED_KERNEL}/*.cc) + # LIST(APPEND TFLM_EXTRA_KERNEL_INCS ${TFLM_SRC}/kernels/${TFLM_OPTIMIZED_KERNEL}/) + STRING(TOUPPER "${TFLM_OPTIMIZED_KERNEL}" TFLM_OPTIMIZED_KERNEL_UPPER) +ENDIF() + +IF(TFLM_OPTIMIZED_KERNEL_LIB) + LIST(APPEND TFLM_EXTRA_KERNEL_LIBS ${TFLM_OPTIMIZED_KERNEL_LIB}) +ENDIF() + +IF(TFLM_OPTIMIZED_KERNEL_INCLUDE_DIR) + LIST(APPEND TFLM_EXTRA_KERNEL_INCS ${TFLM_OPTIMIZED_KERNEL_INCLUDE_DIR}) +ENDIF() + + IF(EXISTS ${TFLD_SRC}/flatbuffers/include) LIST(APPEND TFL_INC_DIRS ${TFLD_SRC}/flatbuffers/include) ELSEIF(EXISTS ${TFLMD_SRC}/flatbuffers/include) @@ -127,20 +144,19 @@ FILE(GLOB TFL_KERNELS_SRCS ${TFLM_SRC}/kernels/kernel_util.cc ) -IF(TFLM_EXTRA_KERNELS) - FILE(GLOB TFL_EXTRA_KERNEL_SRCS - ${TFLM_SRC}/kernels/${TFLM_EXTRA_KERNELS}/*.cc - ) - FOREACH(src ${TFL_EXTRA_KERNEL_SRCS}) - GET_FILENAME_COMPONENT(src_name ${src} NAME) +FOREACH(src ${TFLM_EXTRA_KERNEL_SRCS}) + GET_FILENAME_COMPONENT(src_name ${src} NAME) + IF(${src_name} MATCHES ".*_test.*") + LIST(REMOVE_ITEM TFLM_EXTRA_KERNEL_SRCS ${src}) + ELSE() SET(src_path "${TFLM_SRC}/kernels/${src_name}") - LIST(FIND TFL_KERNELS_SRCS ${src_path} TFL_KERNELS_SRCS_FOUND_INDEX) - IF(${TFL_KERNELS_SRCS_FOUND_INDEX} GREATER_EQUAL 0) - MESSAGE(STATUS "Replacing TFLM version of ${src_name} by ${TFLM_EXTRA_KERNELS} variant...") - LIST(REMOVE_ITEM TFL_KERNELS_SRCS ${src_path}) + LIST(FIND TFLM_REFERENCE_KERNEL_SRCS ${src_path} TFLM_KERNEL_SRCS_FOUND_INDEX) + IF(${TFLM_KERNEL_SRCS_FOUND_INDEX} GREATER_EQUAL 0) + MESSAGE(STATUS "Replacing TFLM version of ${src_name} by optimized variant...") + LIST(REMOVE_ITEM TFLM_REFERENCE_KERNEL_SRCS ${src_path}) ENDIF() - ENDFOREACH() -ENDIF() + ENDIF() +ENDFOREACH() FILE(GLOB TFL_CORE_API_SRCS ${TFL_SRC}/core/api/*.cc @@ -186,13 +202,12 @@ ADD_LIBRARY(${LIB_NAME} STATIC TARGET_INCLUDE_DIRECTORIES(${LIB_NAME} PUBLIC ${TFL_INC_DIRS} - /home/philipp/src/tflmc/CMSIS_5/ - /home/philipp/src/tflmc/CMSIS_5/CMSIS/Core/Include/ - /home/philipp/src/tflmc/CMSIS_5/CMSIS/NN/Include/ - /home/philipp/src/tflmc/CMSIS_5/CMSIS/DSP/Include/ + ${TFLM_EXTRA_KERNEL_INCS} ) -TARGET_LINK_LIBRARIES(${LIB_NAME} PUBLIC /home/philipp/src/tflmc/CMSIS_5/CMSIS/NN/build/Source/libcmsis-nn.a) +IF(TFLM_EXTRA_KERNEL_LIBS) + TARGET_LINK_LIBRARIES(${LIB_NAME} PUBLIC ${TFLM_EXTRA_KERNEL_LIBS}) +ENDIF() TARGET_COMPILE_DEFINITIONS(${LIB_NAME} PUBLIC TF_LITE_USE_GLOBAL_MAX @@ -200,7 +215,7 @@ TARGET_COMPILE_DEFINITIONS(${LIB_NAME} PUBLIC TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS TF_LITE_STATIC_MEMORY TFLITE_EMULATE_FLOAT - CMSIS_NN + ${TFLM_OPTIMIZED_KERNEL_UPPER} "$<$:TF_LITE_STRIP_ERROR_STRINGS>" ) From 30664f54a470fc593585e5097dfc73c87f4cd2ad Mon Sep 17 00:00:00 2001 From: "Philipp v. K" Date: Wed, 22 Jun 2022 18:00:13 +0200 Subject: [PATCH 08/14] CMake: make tf repo url customizable --- cmake/FindTFLite.cmake | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index c979b00..51b0f19 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -7,12 +7,18 @@ IF(TF_RECACHE) ENDIF() IF(NOT TF_SRC) + + IF(TF_URL) + SET(TF_REPO ${TF_URL}) + ELSE() + SET(TF_REPO https://github.com/tensorflow/tflite-micro.git) + ENDIF() INCLUDE(FetchContent) IF(TF_TAG) MESSAGE(STATUS "Getting TF tag '${TF_TAG}' and not master") FetchContent_Declare( tf - GIT_REPOSITORY https://github.com/tensorflow/tflite-micro.git + GIT_REPOSITORY ${TF_REPO} GIT_PROGRESS FALSE GIT_REMOTE_UPDATE_STRATEGY REBASE_CHECKOUT GIT_TAG ${TF_TAG} @@ -22,7 +28,7 @@ IF(NOT TF_SRC) MESSAGE(STATUS "Getting TF commit '${TF_COMMIT}' and not master") FetchContent_Declare( tf - GIT_REPOSITORY https://github.com/tensorflow/tflite-micro.git + GIT_REPOSITORY ${TF_REPO} GIT_PROGRESS FALSE GIT_REMOTE_UPDATE_STRATEGY REBASE_CHECKOUT GIT_TAG ${TF_COMMIT} @@ -31,7 +37,7 @@ IF(NOT TF_SRC) ELSE() FetchContent_Declare( tf - GIT_REPOSITORY https://github.com/tensorflow/tflite-micro.git + GIT_REPOSITORY ${TF_REPO} GIT_PROGRESS FALSE GIT_REMOTE_UPDATE_STRATEGY REBASE_CHECKOUT GIT_TAG main From 38d0eff11e8e2d426f66ae25d128a3c0f569ff2c Mon Sep 17 00:00:00 2001 From: "Philipp v. K" Date: Wed, 29 Jun 2022 10:44:43 +0200 Subject: [PATCH 09/14] CMake: use fixed flatbuffers commit --- cmake/FindTFLite.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index 51b0f19..3751cb4 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -53,8 +53,9 @@ IF(NOT TF_SRC) FetchContent_Declare( flatbuffers - GIT_REPOSITORY https://github.com/google/flatbuffers.git + GIT_REPOSITORY https://github.com/google/flatbuffers.git GIT_PROGRESS FALSE + GIT_TAG f28c2b29364970e23c8ba3d751ca188f8a08c71e QUIET ) FetchContent_GetProperties(flatbuffers) From 69c3f6c7f01682b09688ac2f83778cab479ac233 Mon Sep 17 00:00:00 2001 From: Philipp van Kempen Date: Wed, 29 Jun 2022 12:34:00 +0200 Subject: [PATCH 10/14] Cmake: fixes --- cmake/FindTFLite.cmake | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index 3751cb4..4c6e8cd 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -144,13 +144,19 @@ IF(EXISTS ${TFL_SRC}/schema/schema_utils.cc) LIST(APPEND TFL_ROOT_SRCS ${TFL_SRC}/schema/schema_utils.cc) ENDIF() -FILE(GLOB TFL_KERNELS_SRCS +FILE(GLOB TFLM_REFERENCE_KERNEL_SRCS ${TFLM_SRC}/kernels/*.cc ${TFL_SRC}/kernels/internal/quantization_util.cc ${TFL_SRC}/kernels/kernel_util.cc ${TFLM_SRC}/kernels/kernel_util.cc ) + +# Remove broken kernel +IF(EXISTS ${TFLM_SRC}/kernels/unidirectional_sequence_lstm.cc) + LIST(REMOVE_ITEM TFLM_REFERENCE_KERNEL_SRCS ${TFLM_SRC}/kernels/unidirectional_sequence_lstm.cc) +ENDIF() + FOREACH(src ${TFLM_EXTRA_KERNEL_SRCS}) GET_FILENAME_COMPONENT(src_name ${src} NAME) IF(${src_name} MATCHES ".*_test.*") @@ -183,7 +189,7 @@ FILE(GLOB TFL_ARENA_ALLOCATOR_SRCS SET(TFL_SRCS ${TFL_ROOT_SRCS} - ${TFL_KERNELS_SRCS} + ${TFLM_REFERENCE_KERNEL_SRCS} ${TFL_EXTRA_KERNEL_SRCS} ${TFL_CORE_API_SRCS} ${TFL_C_SRCS} From c97a127c3e03bb14698e7eb4619e70fc3df063e6 Mon Sep 17 00:00:00 2001 From: Philipp van Kempen Date: Wed, 17 Aug 2022 12:02:15 +0200 Subject: [PATCH 11/14] CMake: lint --- cmake/FindTFLite.cmake | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/cmake/FindTFLite.cmake b/cmake/FindTFLite.cmake index 4c6e8cd..31a345b 100644 --- a/cmake/FindTFLite.cmake +++ b/cmake/FindTFLite.cmake @@ -17,7 +17,7 @@ IF(NOT TF_SRC) IF(TF_TAG) MESSAGE(STATUS "Getting TF tag '${TF_TAG}' and not master") FetchContent_Declare( - tf + tf GIT_REPOSITORY ${TF_REPO} GIT_PROGRESS FALSE GIT_REMOTE_UPDATE_STRATEGY REBASE_CHECKOUT @@ -36,7 +36,7 @@ IF(NOT TF_SRC) ) ELSE() FetchContent_Declare( - tf + tf GIT_REPOSITORY ${TF_REPO} GIT_PROGRESS FALSE GIT_REMOTE_UPDATE_STRATEGY REBASE_CHECKOUT @@ -52,9 +52,9 @@ IF(NOT TF_SRC) SET(TF_SRC ${tf_SOURCE_DIR}) FetchContent_Declare( - flatbuffers + flatbuffers GIT_REPOSITORY https://github.com/google/flatbuffers.git - GIT_PROGRESS FALSE + GIT_PROGRESS FALSE GIT_TAG f28c2b29364970e23c8ba3d751ca188f8a08c71e QUIET ) @@ -66,10 +66,10 @@ IF(NOT TF_SRC) LIST(APPEND TFL_INC_DIRS ${flatbuffers_SOURCE_DIR}/include) FetchContent_Declare( - fixedpoint - GIT_REPOSITORY https://github.com/google/gemmlowp.git - GIT_PROGRESS FALSE - QUIET + fixedpoint + GIT_REPOSITORY https://github.com/google/gemmlowp.git + GIT_PROGRESS FALSE + QUIET ) FetchContent_GetProperties(fixedpoint) IF(NOT fixedpoint_POPULATED) @@ -79,10 +79,10 @@ IF(NOT TF_SRC) LIST(APPEND TFL_INC_DIRS ${fixedpoint_SOURCE_DIR}) FetchContent_Declare( - ruy - GIT_REPOSITORY https://github.com/google/ruy.git - GIT_PROGRESS FALSE - QUIET + ruy + GIT_REPOSITORY https://github.com/google/ruy.git + GIT_PROGRESS FALSE + QUIET ) FetchContent_GetProperties(ruy) IF(NOT ruy_POPULATED) @@ -132,12 +132,12 @@ ELSEIF(EXISTS ${TFLMD_SRC}/ruy) LIST(APPEND TFL_INC_DIRS ${TFLMD_SRC}/ruy) ENDIF() -LIST(APPEND TFL_INC_DIRS +LIST(APPEND TFL_INC_DIRS ${TF_SRC} ) FILE(GLOB TFL_ROOT_SRCS - ${TFLM_SRC}/*.cc + ${TFLM_SRC}/*.cc ) # schema_utils.cc only exists for newer TF versions IF(EXISTS ${TFL_SRC}/schema/schema_utils.cc) @@ -145,8 +145,8 @@ IF(EXISTS ${TFL_SRC}/schema/schema_utils.cc) ENDIF() FILE(GLOB TFLM_REFERENCE_KERNEL_SRCS - ${TFLM_SRC}/kernels/*.cc - ${TFL_SRC}/kernels/internal/quantization_util.cc + ${TFLM_SRC}/kernels/*.cc + ${TFL_SRC}/kernels/internal/quantization_util.cc ${TFL_SRC}/kernels/kernel_util.cc ${TFLM_SRC}/kernels/kernel_util.cc ) @@ -172,7 +172,7 @@ FOREACH(src ${TFLM_EXTRA_KERNEL_SRCS}) ENDFOREACH() FILE(GLOB TFL_CORE_API_SRCS - ${TFL_SRC}/core/api/*.cc + ${TFL_SRC}/core/api/*.cc ) FILE(GLOB TFL_C_SRCS @@ -187,10 +187,10 @@ FILE(GLOB TFL_ARENA_ALLOCATOR_SRCS ${TFLM_SRC}/arena_allocator/*.cc ) -SET(TFL_SRCS +SET(TFL_SRCS ${TFL_ROOT_SRCS} ${TFLM_REFERENCE_KERNEL_SRCS} - ${TFL_EXTRA_KERNEL_SRCS} + ${TFLM_EXTRA_KERNEL_SRCS} ${TFL_CORE_API_SRCS} ${TFL_C_SRCS} ${TFL_MEM_PLANNER_SRCS} @@ -244,11 +244,11 @@ IF(RECORD_STATIC_KERNELS) ) ENDIF() -SET(TFLite_INCLUDE_DIRS +SET(TFLite_INCLUDE_DIRS ${TFL_INC_DIRS} ) -SET(TFLite_SOURCES +SET(TFLite_SOURCES ${TFL_SRCS} ) From a8d28531c04377012acd6ce34485a26f0e62bbd1 Mon Sep 17 00:00:00 2001 From: Philipp van Kempen Date: Mon, 27 Jun 2022 14:44:07 +0200 Subject: [PATCH 12/14] include micro_mutable_op_resolver.h to get all required header files for the kernels --- src/Compiler.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Compiler.cc b/src/Compiler.cc index 82a957a..b23d2fc 100644 --- a/src/Compiler.cc +++ b/src/Compiler.cc @@ -1100,6 +1100,7 @@ void tflmc::Compiler::writeSource(std::ostream &out) { #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/kernels/micro_ops.h" +#include "tensorflow/lite/micro/micro_mutable_op_resolver.h" #include "tensorflow/lite/micro/compatibility.h" #include "tensorflow/lite/micro/micro_context.h" #if LOG_OP_INPUTS From 8bd78e1d5c1e674035828ac3c0968fc3f0a96514 Mon Sep 17 00:00:00 2001 From: Philipp van Kempen Date: Wed, 29 Jun 2022 12:34:22 +0200 Subject: [PATCH 13/14] add missing SUPPORT_CUSTOM_QUANT --- src/Compiler.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Compiler.cc b/src/Compiler.cc index b23d2fc..8e3bf8a 100644 --- a/src/Compiler.cc +++ b/src/Compiler.cc @@ -912,11 +912,15 @@ void tflmc::Compiler::writeInitSource(CodeWriter &wr) { TfLiteAffineQuantization const* quant = ((TfLiteAffineQuantization const*)(tensorData[i].quantization.params)); tflTensors[i].params.scale = quant->scale->data[0]; tflTensors[i].params.zero_point = quant->zero_point->data[0]; - } else if (tflTensors[i].quantization.type == kTfLitePackedAffineQuantization) { +)"; +#if SUPPORT_CUSTOM_QUANT + wr << R"( } else if (tflTensors[i].quantization.type == kTfLitePackedAffineQuantization) { TfLitePackedAffineQuantization const* quant = (TfLitePackedAffineQuantization const*)(tensorData[i].quantization.params); tflTensors[i].params.scale = quant->affine.scale->data[0]; tflTensors[i].params.zero_point = quant->affine.zero_point->data[0]; - } +)"; +#endif // SUPPORT_CUSTOM_QUANT + wr << R"( } )"; } else { wr << " tflTensors[i].quantization.type = kTfLiteNoQuantization;\n"; From 5a3c29e2ec8e35d43d1e6d7945ad7b4457abafa4 Mon Sep 17 00:00:00 2001 From: Philipp van Kempen Date: Wed, 29 Jun 2022 12:35:10 +0200 Subject: [PATCH 14/14] turn off LOG_ARGC_ARGV --- src/main.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main.cc b/src/main.cc index 418ed7a..cfa7317 100644 --- a/src/main.cc +++ b/src/main.cc @@ -2,7 +2,9 @@ #include "Compiler.h" #include "Options.h" -#define LOG_ARGC_ARGV 1 +#ifndef LOG_ARGC_ARGV +#define LOG_ARGC_ARGV 0 +#endif // LOG_ARGC_ARGV /** The "real" main - physical main has workarounds for various