diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml
index d193eed..87b7e4c 100644
--- a/.github/workflows/c-cpp.yml
+++ b/.github/workflows/c-cpp.yml
@@ -8,7 +8,6 @@ jobs:
       matrix:
         config:
           - {name: "Linux", os: ubuntu-latest, cmake-generator: ""}
-          - {name: "Windows", os: windows-latest, cmake-generator: "-G \"MinGW Makefiles\""}
     runs-on: ${{ matrix.config.os }}
     name: ${{ matrix.config.name }}
 
@@ -17,10 +16,15 @@ jobs:
     - name: Requirements
       shell: bash
       run: |
+        pip install pillow
         cd ..
-        git clone https://github.com/tensorflow/tensorflow.git
+        git clone https://github.com/tensorflow/tflite-micro.git tensorflow
         cd tensorflow
-        make -f tensorflow/lite/micro/tools/make/Makefile hello_world
+        git checkout f474248365ad48654ba8a27ac5bf49a6afbb80e7
+        git apply ../tflite_micro_compiler/patches/tflite-micro.patch
+        git apply ../tflite_micro_compiler/patches/tflite-micro-makefile.patch
+        make -Bf tensorflow/lite/micro/tools/make/Makefile NO_INTERPRETER=TRUE microlite -j2
+        make -Bf tensorflow/lite/micro/tools/make/Makefile microlite -j2
     - name: Requirements (Windows)
       if: matrix.config.os == 'windows-latest'
       run: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index aec255e..5f72a87 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,9 +21,9 @@ IF(NOT GET_TF_SRC)
             ${TFLMD_SRC}/ruy
         )
         IF(WIN32)
-            SET(TF_LIB ${TFLM_SRC}/tools/make/gen/windows_x86_64/lib/libtensorflow-microlite.a)
+            SET(TF_LIB ${TFLM_SRC}/tools/make/gen/windows_x86_64_default/lib/libtensorflow-microlite.a)
         ELSE()
-            SET(TF_LIB ${TFLM_SRC}/tools/make/gen/linux_x86_64/lib/libtensorflow-microlite.a)
+            SET(TF_LIB ${TFLM_SRC}/tools/make/gen/linux_x86_64_default/lib/libtensorflow-microlite.a)
         ENDIF()
     ELSE()
         MESSAGE(FATAL_ERROR "\
@@ -45,7 +45,6 @@ SET(COMPILER_HEADERS
     ${PROJECT_SOURCE_DIR}/src/Compiler.h
     ${PROJECT_SOURCE_DIR}/src/CustomOperators.h
     ${PROJECT_SOURCE_DIR}/src/MemMap.h
-    ${PROJECT_SOURCE_DIR}/src/RecordAllocations.h
     ${PROJECT_SOURCE_DIR}/src/TypeToString.h
     )
 
@@ -54,7 +53,6 @@ SET(COMPILER_SRCS
     ${PROJECT_SOURCE_DIR}/src/Compiler.cc
     ${PROJECT_SOURCE_DIR}/src/CustomOperators.cc
     ${PROJECT_SOURCE_DIR}/src/MemMap.cc
-    ${PROJECT_SOURCE_DIR}/src/RecordAllocations.cc
     ${PROJECT_SOURCE_DIR}/src/TypeToString.cc
     ${PROJECT_SOURCE_DIR}/src/main.cc
     )
diff --git a/examples/generic_test/CMakeLists.txt b/examples/generic_test/CMakeLists.txt
index 254e7d2..3159cbb 100644
--- a/examples/generic_test/CMakeLists.txt
+++ b/examples/generic_test/CMakeLists.txt
@@ -19,13 +19,18 @@ TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PUBLIC
 )
 
 IF(WIN32)
-    TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/windows_x86_64/lib)
+    TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/windows_x86_64_default/lib)
 ELSE()
-    TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/linux_x86_64/lib)
+    TARGET_LINK_DIRECTORIES(${PROJECT_NAME} PUBLIC ${TFLM_SRC}/tools/make/gen/linux_x86_64_default/lib)
 ENDIF()
-TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC tensorflow-microlite)
+TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC tensorflow-microlite-no-interpreter)
 
 TARGET_COMPILE_DEFINITIONS(${PROJECT_NAME} PUBLIC
     TF_LITE_STATIC_MEMORY
     TF_LITE_DISABLE_X86_NEON
+    NO_INTERPRETER
+)
+
+TARGET_COMPILE_FEATURES(${PROJECT_NAME} PUBLIC
+    cxx_std_14
 )
diff --git a/patches/tflite-micro-makefile.patch b/patches/tflite-micro-makefile.patch
new file mode 100644
index 0000000..1d6fcc2
--- /dev/null
+++ b/patches/tflite-micro-makefile.patch
@@ -0,0 +1,57 @@
+diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile
+index e25b3bc..09ce481 100644
+--- a/tensorflow/lite/micro/tools/make/Makefile
++++ b/tensorflow/lite/micro/tools/make/Makefile
+@@ -150,6 +150,10 @@ COMMON_FLAGS := \
+   $(CC_WARNINGS) \
+   $(ADDITIONAL_DEFINES)
+ 
++ifeq ($(NO_INTERPRETER),TRUE)
++  COMMON_FLAGS += -DNO_INTERPRETER
++endif
++
+ ifeq ($(TARGET), $(HOST_OS))
+   # If we are not doing a cross-compilation then -DTF_LITE_USE_CTIME is what we
+   # want to have by default.
+@@ -233,7 +237,11 @@ endif
+ 
+ # This library is the main target for this makefile. It will contain a minimal
+ # runtime that can be linked in to other programs.
+-MICROLITE_LIB_NAME := libtensorflow-microlite.a
++ifeq ($(NO_INTERPRETER),TRUE)
++  MICROLITE_LIB_NAME := libtensorflow-microlite-no-interpreter.a
++else
++  MICROLITE_LIB_NAME := libtensorflow-microlite.a
++endif
+ 
+ # Where compiled objects are stored.
+ GENDIR := $(MAKEFILE_DIR)/gen/$(TARGET)_$(TARGET_ARCH)_$(BUILD_TYPE)/
+@@ -469,6 +477,28 @@ THIRD_PARTY_KERNEL_CC_SRCS :=
+ # Load custom kernels.
+ include $(MAKEFILE_DIR)/additional_kernels.inc
+ 
++ifeq ($(NO_INTERPRETER),TRUE)
++  MICRO_LITE_NO_INTERPRETER_SRCS := \
++  tensorflow/lite/micro/fake_micro_context.cc \
++  tensorflow/lite/micro/micro_context.cc \
++  tensorflow/lite/micro/micro_allocator.cc \
++  tensorflow/lite/micro/micro_graph.cc \
++  tensorflow/lite/micro/micro_interpreter.cc
++
++  MICRO_LITE_UNSUPPORTED_KERNELS_SRCS := \
++  tensorflow/lite/micro/kernels/assign_variable.cc \
++  tensorflow/lite/micro/kernels/call_once.cc \
++  tensorflow/lite/micro/kernels/if.cc \
++  tensorflow/lite/micro/kernels/kernel_runner.cc \
++  tensorflow/lite/micro/kernels/read_variable.cc \
++  tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc \
++  tensorflow/lite/micro/kernels/while.cc \
++  tensorflow/lite/micro/kernels/var_handle.cc
++
++  MICROLITE_CC_KERNEL_SRCS := $(filter-out $(MICRO_LITE_UNSUPPORTED_KERNELS_SRCS), $(MICROLITE_CC_KERNEL_SRCS))
++  MICROLITE_CC_BASE_SRCS := $(filter-out $(MICRO_LITE_NO_INTERPRETER_SRCS), $(MICROLITE_CC_BASE_SRCS))
++endif
++
+ MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_TEST_SRCS), $(MICROLITE_CC_BASE_SRCS))
+ MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_BENCHMARK_SRCS), $(MICROLITE_CC_SRCS))
+ 
diff --git a/patches/tflite-micro.patch b/patches/tflite-micro.patch
new file mode 100644
index 0000000..0058678
--- /dev/null
+++ b/patches/tflite-micro.patch
@@ -0,0 +1,52 @@
+diff --git a/tensorflow/lite/micro/micro_context.h b/tensorflow/lite/micro/micro_context.h
+index e7be654..2693ff2 100644
+--- a/tensorflow/lite/micro/micro_context.h
++++ b/tensorflow/lite/micro/micro_context.h
+@@ -20,6 +20,22 @@ limitations under the License.
+ #include "tensorflow/lite/micro/micro_allocator.h"
+ #include "tensorflow/lite/micro/micro_graph.h"
+ 
++#ifdef NO_INTERPRETER
++
++namespace tflite {
++  struct MicroContext{
++      TfLiteTensor* (*AllocateTempInputTensor)(const TfLiteNode* node, int index);
++      TfLiteTensor* (*AllocateTempOutputTensor)(const TfLiteNode* node, int index);
++      void (*DeallocateTempTfLiteTensor)(TfLiteTensor* tensor);
++      void* (*external_context)();
++  };
++  static inline MicroContext* GetMicroContext(const struct TfLiteContext* context){
++      return reinterpret_cast<MicroContext*>(context->impl_);
++  }
++}
++
++#else
++
+ namespace tflite {
+ // MicroContext is eventually going to become the API between TFLM and the
+ // kernels, replacing all the functions in TfLiteContext. The end state is code
+@@ -158,4 +174,6 @@ void MicroContextReportOpError(struct TfLiteContext* context,
+ 
+ }  // namespace tflite
+ 
++#endif  // NO_INTERPRETER
++
+ #endif  // TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_
+diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h
+index ae7fc8f..5c2fd6e 100644
+--- a/tensorflow/lite/micro/micro_interpreter.h
++++ b/tensorflow/lite/micro/micro_interpreter.h
+@@ -140,6 +140,13 @@ class MicroInterpreter {
+   // arena_used_bytes() + 16.
+   size_t arena_used_bytes() const { return allocator_.used_bytes(); }
+ 
++  size_t operators_size() const { return model_->subgraphs()->Get(0)->operators()->size(); }
++
++  // For debugging only.
++  const NodeAndRegistration node_and_registration(int node_index)  {
++    return graph_.GetAllocations()[0].node_and_registrations[node_index];
++  }
++
+  protected:
+   const MicroAllocator& allocator() const { return allocator_; }
+   const TfLiteContext& context() const { return context_; }
diff --git a/src/CodeWriter.cc b/src/CodeWriter.cc
index 564c0b1..1a84665 100644
--- a/src/CodeWriter.cc
+++ b/src/CodeWriter.cc
@@ -114,11 +114,25 @@ void tflmc::CodeWriter::writeBuiltin(tflite::BuiltinOperator op,
       TfLiteAddParams const* p = (TfLiteAddParams const*)data;
       out_ << to_string(p->activation) << " };";
     } break;
+    case tflite::BuiltinOperator_MEAN: {
+      out_ << "TfLiteReducerParams " << name << " = { ";
+      TfLiteReducerParams const* p = (TfLiteReducerParams const*)data;
+      out_ << p->keep_dims << " };";
+    } break;
     case tflite::BuiltinOperator_MUL: {
       out_ << "TfLiteMulParams " << name << " = { ";
       TfLiteMulParams const* p = (TfLiteMulParams const*)data;
       out_ << to_string(p->activation) << " };";
     } break;
+    case tflite::BuiltinOperator_PACK: {
+      out_ << "TfLitePackParams " << name << " = { ";
+      TfLitePackParams const* p = (TfLitePackParams const*)data;
+      out_ << p->values_count << ", " << p->axis << " };";
+    } break;
+    case tflite::BuiltinOperator_SHAPE: {
+      out_ << "TfLiteShapeParams " << name << " = { "
+           << " };";
+    } break;
     case tflite::BuiltinOperator_SUB: {
       out_ << "TfLiteSubParams " << name << " = { ";
       TfLiteSubParams const* p = (TfLiteSubParams const*)data;
@@ -130,6 +144,19 @@ void tflmc::CodeWriter::writeBuiltin(tflite::BuiltinOperator op,
           (TfLiteConcatenationParams const*)data;
       out_ << p->axis << ", " << to_string(p->activation) << " };";
     } break;
+    case tflite::BuiltinOperator_STRIDED_SLICE: {
+      out_ << "TfLiteStridedSliceParams " << name << " = { ";
+      TfLiteStridedSliceParams const* p = (TfLiteStridedSliceParams const*)data;
+      out_ << p->begin_mask << ", " << p->end_mask << ", " << p->ellipsis_mask
+           << ", " << p->new_axis_mask << ", " << p->shrink_axis_mask << " };";
+    } break;
+    case tflite::BuiltinOperator_TRANSPOSE_CONV: {
+      out_ << "TfLiteTransposeConvParams " << name << " = { ";
+      TfLiteTransposeConvParams const* p =
+          (TfLiteTransposeConvParams const*)data;
+      out_ << to_string(p->padding) << ", " << p->stride_width << ", "
+           << p->stride_height << " };";
+    } break;
     default: {
       size_t datalen = GetBuiltinDataSize(op, subgraph_);
       uint32_t alignment = datalen >= 4 ? 4 : datalen >= 2 ? 2 : 1;
@@ -224,8 +251,7 @@ static void dump_tensor_contents(std::ostream& out_, const TfLiteTensor& t,
     int outer_dim = t.dims->data[0];
     int middle_dim = t.dims->data[t.dims->size - 2];
     int inner_dim = t.dims->data[t.dims->size - 1];
-    for (int i = 1; i < t.dims->size - 2; ++i)
-      outer_dim *= t.dims->data[i];
+    for (int i = 1; i < t.dims->size - 2; ++i) outer_dim *= t.dims->data[i];
     for (int i = 0; i < outer_dim; ++i) {
       // out_ << "\n  ";
       // uint32_t outer_index = inner_dim * middle_dim;
diff --git a/src/CodeWriter.h b/src/CodeWriter.h
index 18a659b..e8c69b7 100644
--- a/src/CodeWriter.h
+++ b/src/CodeWriter.h
@@ -4,7 +4,6 @@
 #include <iostream>
 
 #include "tensorflow/lite/micro/micro_interpreter.h"
-#include "tensorflow/lite/version.h"
 
 namespace tflmc {
 
diff --git a/src/Compiler.cc b/src/Compiler.cc
index 7e5ffeb..bfdf0b4 100644
--- a/src/Compiler.cc
+++ b/src/Compiler.cc
@@ -1,19 +1,17 @@
-
 #include "Compiler.h"
 
-#include <memory>
 #include <fstream>
+#include <memory>
 #include <regex>
+#include <sstream>
 #include <vector>
 
 #include "CodeWriter.h"
 #include "CustomOperators.h"
-#include "RecordAllocations.h"
 #include "TypeToString.h"
-#include "tensorflow/lite/version.h"
 
 #ifndef SUFFICIENT_ARENA_SIZE
-#define SUFFICIENT_ARENA_SIZE (128*1024*1024)
+#define SUFFICIENT_ARENA_SIZE (128 * 1024 * 1024)
 #endif
 
 #if TF_LITE_PACKED_QUANTIZED_DATA_VERSION
@@ -22,6 +20,133 @@
 #endif
 #endif
 
+static std::vector<tflmc::Allocation> g_loggedAllocations;
+static int g_currentNodeIndex = -1;
+static uint8_t *g_arenaPtr = nullptr;
+static ptrdiff_t g_arena_size = 0;
+
+static void *LoggingAllocatePersistentBuffer(struct TfLiteContext *ctx,
+                                             size_t bytes) {
+  void *ptr = tflite::GetMicroContext(ctx)->AllocatePersistentBuffer(bytes);
+  assert(ptr != nullptr && "Alloc failure");
+  g_loggedAllocations.push_back({-(g_arenaPtr - (uint8_t *)ptr + g_arena_size),
+                                 bytes, g_currentNodeIndex});
+  return ptr;
+}
+
+TfLiteStatus tflmc::AllocateTensors(
+    std::unique_ptr<tflite::MicroInterpreter> &interpreter) {
+  tflite::SubgraphAllocations *allocations =
+      interpreter->allocator_.StartModelAllocation(interpreter->model_);
+
+  if (allocations == nullptr) {
+    TF_LITE_REPORT_ERROR(interpreter->error_reporter_,
+                         "Failed starting model allocation.\n");
+    interpreter->initialization_status_ = kTfLiteError;
+    return kTfLiteError;
+  }
+
+  interpreter->graph_.SetSubgraphAllocations(allocations);
+
+  TF_LITE_ENSURE_STATUS(
+      interpreter->PrepareNodeAndRegistrationDataFromFlatbuffer());
+
+  // Only allow AllocatePersistentBuffer in Init stage.
+  interpreter->context_.AllocatePersistentBuffer =
+      &LoggingAllocatePersistentBuffer;
+  interpreter->context_.RequestScratchBufferInArena = nullptr;
+  interpreter->context_.GetScratchBuffer = nullptr;
+  interpreter->context_.GetExternalContext = nullptr;
+  TF_LITE_ENSURE_STATUS(interpreter->graph_.InitSubgraphs());
+
+  // Both AllocatePersistentBuffer and RequestScratchBufferInArena is
+  // available in Prepare stage.
+  interpreter->context_.RequestScratchBufferInArena =
+      tflite::MicroContextRequestScratchBufferInArena;
+  // external_context become available in Prepare stage.
+  interpreter->context_.GetExternalContext =
+      tflite::MicroContextGetExternalContext;
+
+  TF_LITE_ENSURE_STATUS(interpreter->graph_.PrepareSubgraphs());
+
+  // Prepare is done, we're ready for Invoke. Memory allocation is no longer
+  // allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
+  interpreter->context_.AllocatePersistentBuffer = nullptr;
+  interpreter->context_.RequestScratchBufferInArena = nullptr;
+  interpreter->context_.GetScratchBuffer = tflite::MicroContextGetScratchBuffer;
+
+  TF_LITE_ENSURE_OK(
+      &interpreter->context_,
+      interpreter->allocator_.FinishModelAllocation(
+          interpreter->model_, interpreter->graph_.GetAllocations(),
+          &interpreter->scratch_buffer_handles_));
+
+  interpreter->micro_context_.SetScratchBufferHandles(
+      interpreter->scratch_buffer_handles_);
+
+  // TODO(b/162311891): Drop these allocations when the interpreter supports
+  // handling buffers from TfLiteEvalTensor.
+  interpreter->input_tensors_ = reinterpret_cast<TfLiteTensor **>(
+      interpreter->allocator_.AllocatePersistentBuffer(
+          sizeof(TfLiteTensor *) * interpreter->inputs_size()));
+  if (interpreter->input_tensors_ == nullptr) {
+    TF_LITE_REPORT_ERROR(
+        interpreter->error_reporter_,
+        "Failed to allocate memory for context->input_tensors_, "
+        "%d bytes required",
+        sizeof(TfLiteTensor *) * interpreter->inputs_size());
+    return kTfLiteError;
+  }
+
+  for (size_t i = 0; i < interpreter->inputs_size(); ++i) {
+    interpreter->input_tensors_[i] =
+        interpreter->allocator_.AllocatePersistentTfLiteTensor(
+            interpreter->model_, interpreter->graph_.GetAllocations(),
+            interpreter->inputs().Get(i), 0);
+    if (interpreter->input_tensors_[i] == nullptr) {
+      TF_LITE_REPORT_ERROR(interpreter->error_reporter_,
+                           "Failed to initialize input tensor %d", i);
+      return kTfLiteError;
+    }
+  }
+
+  // TODO(b/162311891): Drop these allocations when the interpreter supports
+  // handling buffers from TfLiteEvalTensor.
+  interpreter->output_tensors_ = reinterpret_cast<TfLiteTensor **>(
+      interpreter->allocator_.AllocatePersistentBuffer(
+          sizeof(TfLiteTensor *) * interpreter->outputs_size()));
+  if (interpreter->output_tensors_ == nullptr) {
+    TF_LITE_REPORT_ERROR(
+        interpreter->error_reporter_,
+        "Failed to allocate memory for context->output_tensors_, "
+        "%d bytes required",
+        sizeof(TfLiteTensor *) * interpreter->outputs_size());
+    return kTfLiteError;
+  }
+
+  for (size_t i = 0; i < interpreter->outputs_size(); ++i) {
+    interpreter->output_tensors_[i] =
+        interpreter->allocator_.AllocatePersistentTfLiteTensor(
+            interpreter->model_, interpreter->graph_.GetAllocations(),
+            interpreter->outputs().Get(i), 0);
+    if (interpreter->output_tensors_[i] == nullptr) {
+      TF_LITE_REPORT_ERROR(interpreter->error_reporter_,
+                           "Failed to initialize output tensor %d", i);
+      return kTfLiteError;
+    }
+  }
+
+  TF_LITE_ENSURE_STATUS(interpreter->ResetVariableTensors());
+
+  interpreter->tensors_allocated_ = true;
+  return kTfLiteOk;
+}
+
+TfLiteTensor *tflmc::GetTensor(tflite::MicroInterpreter *interpreter, int i) {
+  auto ctx = &interpreter->context_;
+  return ctx->GetTensor(ctx, i);
+}
+
 bool tflmc::CompileFile(const std::string &modelFileName,
                         const std::string &outFileName,
                         const std::string &prefix) {
@@ -108,13 +233,19 @@ bool tflmc::Compiler::init(const void *modelData) {
 
   // Build an interpreter to run the model with.
   arena_buf_.resize(SUFFICIENT_ARENA_SIZE);
+
+  g_arena_size = SUFFICIENT_ARENA_SIZE;
+  std::vector<uint8_t> arena_buf(g_arena_size);
+  g_arenaPtr = arena_buf_.data();
+
   interpreter_ = std::unique_ptr<tflite::MicroInterpreter>(
-      new tflite::MicroInterpreter(
-        model_, resolver_, arena_buf_.data(), arena_buf_.size(),
-        &microErrReporter_));
+      new tflite::MicroInterpreter(model_, resolver_, arena_buf_.data(),
+                                   arena_buf_.size(), &microErrReporter_));
+
+  assert(interpreter_->graph_.NumSubgraphs() == 1);
 
   // Allocate memory from the tensor_arena for the model's tensors.
-  TfLiteStatus allocate_status = interpreter_->AllocateTensors();
+  TfLiteStatus allocate_status = AllocateTensors(interpreter_);
   if (allocate_status != kTfLiteOk) {
     errReporter().Report("AllocateTensors() failed");
     return false;
@@ -155,6 +286,21 @@ bool tflmc::Compiler::init(const void *modelData) {
     }
   }
 
+  for (size_t k = 0; k < interpreter_->allocator_.scratch_buffer_request_count_;
+       k++) {
+    void *data = interpreter_->micro_context_.GetScratchBuffer(k);
+    ptrdiff_t offset = (uint8_t *)data - arena_buf_.data();
+    tflite::internal::ScratchBufferRequest *requests =
+        interpreter_->allocator_.GetScratchBufferRequests();
+    int bytes = requests[k].bytes;
+    ptrdiff_t highSize = offset + bytes;
+    ramTensorBufferSize = std::max(ramTensorBufferSize, highSize);
+    memMap_.recordRAM(offset, bytes,
+                      "Scratch_idx" + std::to_string((int)k) + "_op" +
+                          std::to_string((int)requests[k].node_idx));
+    scratchBufferOffsets.push_back(offset);
+  }
+
   for (size_t i = 0; i < interpreter_->operators_size(); i++) {
     auto nodeAndReg = interpreter_->node_and_registration(i);
     auto node = &nodeAndReg.node;
@@ -168,6 +314,9 @@ bool tflmc::Compiler::init(const void *modelData) {
     regInfo.code = code;
     if (code == tflite::BuiltinOperator_CUSTOM) {
       regInfo.custom_name = reg->custom_name;
+      if (regInfo.custom_name == "TFLite_Detection_PostProcess") {
+        has_tflite_custom_ops = true;
+      }
       has_custom_ops = true;
     }
     auto itOp =
@@ -180,13 +329,12 @@ bool tflmc::Compiler::init(const void *modelData) {
     nodes_.push_back(NodeInfo{*node, itOp - registrations_.begin()});
   }
 
-  auto runtimeAllocations = tflmc::RecordAllocations(model_, SUFFICIENT_ARENA_SIZE);
   ptrdiff_t minRuntimeOffset = 0;  // These are negative so zero start is fine.
-  for (const auto &alloc : runtimeAllocations) {
+  for (const auto &alloc : g_loggedAllocations) {
     minRuntimeOffset = std::min(minRuntimeOffset, alloc.offset);
   }
   size_t totalRuntimeAllocSize = 0;
-  for (const auto &alloc : runtimeAllocations) {
+  for (const auto &alloc : g_loggedAllocations) {
     // TODO: This drops the alignment between buffers. Is this fine?
     totalRuntimeAllocSize += alloc.len;
     ptrdiff_t offset = alloc.offset - minRuntimeOffset + ramTensorBufferSize;
@@ -220,9 +368,15 @@ void tflmc::Compiler::writeSource(std::ostream &out) {
   CodeWriter wr(out, subgraph_);
 
   wr << R"(
+
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/kernels/conv.h"
+#include "tensorflow/lite/micro/kernels/fully_connected.h"
 #include "tensorflow/lite/micro/kernels/micro_ops.h"
+#include "tensorflow/lite/micro/kernels/reduce.h"
+#include "tensorflow/lite/micro/kernels/softmax.h"
+#include "tensorflow/lite/micro/micro_context.h"
 
 #if defined __GNUC__
 #define ALIGN(X) __attribute__((aligned(X)))
@@ -240,8 +394,9 @@ namespace ops {
 namespace micro {
 )";
     for (size_t i = 0; i < registrations_.size(); i++) {
-      if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM) {
-        wr << "extern TfLiteRegistration Register_"
+      if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM &&
+          registrations_[i].custom_name != "TFLite_Detection_PostProcess") {
+        wr << "extern TfLiteRegistration *Register_"
            << registrations_[i].custom_name << "(void);\n";
       }
     }
@@ -249,6 +404,20 @@ namespace micro {
 }  // namespace ops
 }  // namespace tflite
 
+)";
+  }
+  if (has_tflite_custom_ops) {
+    wr << R"(namespace tflite {
+)";
+    for (size_t i = 0; i < registrations_.size(); i++) {
+      if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM &&
+          registrations_[i].custom_name == "TFLite_Detection_PostProcess") {
+        wr << "extern TfLiteRegistration "
+              "*Register_DETECTION_POSTPROCESS(void);\n";
+      }
+    }
+    wr << R"(} // namespace tflite
+
 )";
   }
   wr << R"(namespace {
@@ -316,7 +485,8 @@ TfLiteNode tflNodes[)"
     wr.writeIntArray(*t->dims, "tensor_dimension" + std::to_string(i));
     wr.writeQuantization(t->quantization, "quant" + std::to_string(i));
 #if TF_LITE_PACKED_QUANTIZED_DATA_VERSION
-    wr.writeQuantizationDetails(t->quantization, "quant_details" + std::to_string(i));
+    wr.writeQuantizationDetails(t->quantization,
+                                "quant_details" + std::to_string(i));
 #endif
   }
   for (size_t i = 0; i < nodes_.size(); i++) {
@@ -409,6 +579,21 @@ TfLiteNode tflNodes[)"
   // TODO: This code assumes that persistent allocations are made from the end
   // (which is true for the current implementation)
   wr << R"(
+
+// Scratch buffer variables
+int scratch_buffer_idx = 0;
+const int scratch_buffer_offsets[)"
+     << scratchBufferOffsets.size() << R"(] = { )";
+  if (scratchBufferOffsets.size() > 0) {
+    wr << scratchBufferOffsets[0];
+    for (int i = 1; i < scratchBufferOffsets.size(); i++) {
+      wr << ", " << scratchBufferOffsets[i];
+    }
+  }
+  wr << R"( };
+tflite::MicroContext mc;
+
+// Functions to be used as function pointers for TfLiteContext and MicroContext
 static void* AllocatePersistentBuffer(struct TfLiteContext* ctx,
                                                  size_t bytes) {
   static uint8_t *AllocPtr = tensor_arena + sizeof(tensor_arena);
@@ -421,12 +606,51 @@ static TfLiteEvalTensor *GetEvalTensor(const struct TfLiteContext *context,
                                        int tensor_idx) {
   return &evalTensors[tensor_idx];
 }
+
+static TfLiteStatus RequestScratchBufferInArena(struct TfLiteContext *context, size_t bytes,
+                                       int *buffer_idx) {
+  *buffer_idx = scratch_buffer_idx++;
+  return kTfLiteOk;
+};
+
+static void *GetScratchBuffer(struct TfLiteContext *context,
+                                       int buffer_idx) {
+  return tensor_arena + scratch_buffer_offsets[buffer_idx];
+}
+
+static TfLiteTensor* AllocateTempInputTensor(const TfLiteNode* node, int index) {
+      return &ctx.tensors[node->inputs->data[index]];
+}
+
+static TfLiteTensor* AllocateTempOutputTensor(const TfLiteNode* node, int index) {
+      return &ctx.tensors[node->outputs->data[index]];
+}
+
+static void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
+}
+
+static void* external_context() {
+  return nullptr;
+}
+
 } // namespace
 
 TfLiteStatus )"
      << prefix_ << R"(init() {
+  // Setup microcontext functions
+  mc.AllocateTempInputTensor = &AllocateTempInputTensor;
+  mc.AllocateTempOutputTensor = &AllocateTempOutputTensor;
+  mc.DeallocateTempTfLiteTensor = &DeallocateTempTfLiteTensor;
+  mc.external_context = &external_context;
+
+  // Setup tflitecontext functions
   ctx.AllocatePersistentBuffer = &AllocatePersistentBuffer;
   ctx.GetEvalTensor = &GetEvalTensor;
+  ctx.RequestScratchBufferInArena = &RequestScratchBufferInArena;
+  ctx.GetScratchBuffer = &GetScratchBuffer;
+
+  // Set microcontext as the context ptr
+  ctx.impl_ = (void*)&mc;
   ctx.tensors = tflTensors;
 )";
   wr << "  ctx.tensors_size = " << tensors_.size() << ";\n";
@@ -474,11 +698,39 @@ TfLiteStatus )"
     std::string opName;
     if (registrations_[i].code == tflite::BuiltinOperator_CUSTOM) {
       opName = registrations_[i].custom_name;
+      if (opName == "TFLite_Detection_PostProcess") {
+        wr << "  registrations[OP_" << opName
+           << "] = *(tflite::Register_DETECTION_POSTPROCESS());\n";
+      }
+    } else if ((registrations_[i].code == tflite::BuiltinOperator_ADD) ||
+               (registrations_[i].code ==
+                tflite::BuiltinOperator_AVERAGE_POOL_2D) ||
+               (registrations_[i].code == tflite::BuiltinOperator_CONV_2D) ||
+               (registrations_[i].code ==
+                tflite::BuiltinOperator_DEPTHWISE_CONV_2D) ||
+               (registrations_[i].code == tflite::BuiltinOperator_DEQUANTIZE) ||
+               (registrations_[i].code ==
+                tflite::BuiltinOperator_FULLY_CONNECTED) ||
+               (registrations_[i].code == tflite::BuiltinOperator_LOGISTIC) ||
+               (registrations_[i].code ==
+                tflite::BuiltinOperator_MAX_POOL_2D) ||
+               (registrations_[i].code == tflite::BuiltinOperator_MEAN) ||
+               (registrations_[i].code == tflite::BuiltinOperator_MUL) ||
+               (registrations_[i].code == tflite::BuiltinOperator_PRELU) ||
+               (registrations_[i].code == tflite::BuiltinOperator_QUANTIZE) ||
+               (registrations_[i].code == tflite::BuiltinOperator_RELU) ||
+               (registrations_[i].code == tflite::BuiltinOperator_SHAPE) ||
+               (registrations_[i].code == tflite::BuiltinOperator_SOFTMAX) ||
+               (registrations_[i].code ==
+                tflite::BuiltinOperator_TRANSPOSE_CONV)) {
+      opName = tflite::EnumNameBuiltinOperator(registrations_[i].code);
+      wr << "  registrations[OP_" << opName << "] = tflite::Register_" << opName
+         << "();\n";
     } else {
       opName = tflite::EnumNameBuiltinOperator(registrations_[i].code);
+      wr << "  registrations[OP_" << opName
+         << "] = tflite::ops::micro::Register_" << opName << "();\n";
     }
-    wr << "  registrations[OP_" << opName << "] = tflite::ops::micro::Register_"
-       << opName << "();\n";
   }
   wr << "\n";
   wr << "  for(size_t i = 0; i < " << nodes_.size() << R"(; ++i) {
diff --git a/src/Compiler.h b/src/Compiler.h
index 3a69b27..6e5a21d 100644
--- a/src/Compiler.h
+++ b/src/Compiler.h
@@ -1,16 +1,26 @@
 #ifndef TFLMCOMPILER_COMPILER_H
 #define TFLMCOMPILER_COMPILER_H
 
-#include <iostream>
-
 #include "MemMap.h"
 #include "tensorflow/lite/micro/all_ops_resolver.h"
 #include "tensorflow/lite/micro/micro_error_reporter.h"
+#define private public
 #include "tensorflow/lite/micro/micro_interpreter.h"
+#undef private
 #include "tensorflow/lite/schema/schema_generated.h"
 
 namespace tflmc {
 
+struct Allocation {
+  ptrdiff_t offset;
+  size_t len;
+  int nodeIndex;
+};
+
+TfLiteStatus AllocateTensors(
+    std::unique_ptr<tflite::MicroInterpreter> &interpreter);
+TfLiteTensor *GetTensor(tflite::MicroInterpreter *interpreter, int i);
+
 bool CompileFile(const std::string &modelFileName,
                  const std::string &outFileName,
                  const std::string &prefix = "model_");
@@ -33,9 +43,7 @@ class Compiler {
 
  private:
   struct TensorInfo {
-    TensorInfo(const TfLiteTensor *tensor_ptr) :
-      tensor(tensor_ptr)
-    {}
+    TensorInfo(const TfLiteTensor *tensor_ptr) : tensor(tensor_ptr) {}
     const TfLiteTensor *tensor = nullptr;
   };
   struct RegistrationInfo {
@@ -52,10 +60,8 @@ class Compiler {
   };
   struct NodeInfo {
     NodeInfo() {}
-    NodeInfo(TfLiteNode tfl_node, ptrdiff_t reg_index) :
-      node(tfl_node),
-      regIndex(reg_index)
-    {}
+    NodeInfo(TfLiteNode tfl_node, ptrdiff_t reg_index)
+        : node(tfl_node), regIndex(reg_index) {}
     TfLiteNode node;
     ptrdiff_t regIndex = -1;
   };
@@ -89,8 +95,10 @@ class Compiler {
   std::vector<NodeInfo> nodes_;
   std::vector<int32_t> inputTensorIndices_;
   std::vector<int32_t> outputTensorIndices_;
+  std::vector<int32_t> scratchBufferOffsets;
 
   bool has_custom_ops = false;
+  bool has_tflite_custom_ops = false;
   bool has_quantization = false;
   Option<TfLiteType> common_tensor_type;
   Option<bool> common_tensor_is_variable;
diff --git a/src/MemMap.h b/src/MemMap.h
index 55295b1..61258de 100644
--- a/src/MemMap.h
+++ b/src/MemMap.h
@@ -1,9 +1,9 @@
 #ifndef TFLMCOMPILER_MEMMAP_H
 #define TFLMCOMPILER_MEMMAP_H
 
+#include <cstddef>
 #include <string>
 #include <vector>
-#include <cstddef>
 
 namespace tflmc {
 
diff --git a/src/RecordAllocations.cc b/src/RecordAllocations.cc
deleted file mode 100644
index dc7b3ce..0000000
--- a/src/RecordAllocations.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-#include <sstream>
-#define private public
-#include "tensorflow/lite/micro/micro_interpreter.h"
-#undef private
-
-#include "CustomOperators.h"
-#include "RecordAllocations.h"
-#include "tensorflow/lite/micro/all_ops_resolver.h"
-#include "tensorflow/lite/micro/micro_error_reporter.h"
-
-static std::vector<tflmc::Allocation> g_loggedAllocations;
-static tflite::MicroAllocator *g_allocator;
-static int g_currentNodeIndex = -1;
-static uint8_t *g_arenaPtr = nullptr;
-
-static ptrdiff_t g_arena_size = 0;
-
-static void* LoggingAllocatePersistentBuffer(struct TfLiteContext *ctx,
-                                                    size_t bytes) {
-  void* ptr = g_allocator->AllocatePersistentBuffer(bytes);
-  assert(ptr!=nullptr && "Alloc failure");
-  g_loggedAllocations.push_back(
-      {-(g_arenaPtr - (uint8_t *)ptr + g_arena_size), bytes,
-       g_currentNodeIndex});
-  return ptr;
-}
-static TfLiteStatus LoggingRequestScratchBufferInArena(TfLiteContext *ctx,
-                                                       size_t bytes,
-                                                       int *buffer_idx) {
-  assert(false && "Not handling scratch buffers currently");
-  return g_allocator->RequestScratchBufferInArena(g_currentNodeIndex, bytes,
-                                                  buffer_idx);
-}
-
-std::vector<tflmc::Allocation> tflmc::RecordAllocations(
-    const tflite::Model *model, ptrdiff_t arena_size) {
-  g_arena_size = arena_size;
-  std::vector<uint8_t> arena_buf(g_arena_size);
-  g_arenaPtr = arena_buf.data();
-
-  tflite::MicroErrorReporter error_reporter;
-  tflite::AllOpsResolver resolver;
-  tflmc::custom_operator_handle custom = tflmc::LoadCustom(&resolver);
-  tflite::MicroInterpreter interpreter(model, resolver, arena_buf.data(),
-                                       g_arena_size, &error_reporter);
-
-  auto ctx = &interpreter.context_;
-  auto allocator = &interpreter.allocator_;
-
-  tflite::NodeAndRegistration *nodeAndRegs;
-  TfLiteEvalTensor *eval_tensors=nullptr;
-  allocator->StartModelAllocation(model, resolver, &nodeAndRegs, &eval_tensors);
-  allocator->FinishModelAllocation(model, eval_tensors);
-
-  g_allocator = allocator;
-  ctx->AllocatePersistentBuffer = &LoggingAllocatePersistentBuffer;
-  ctx->RequestScratchBufferInArena = nullptr;
-  ctx->GetScratchBuffer = nullptr;
-
-  auto subgraph = model->subgraphs()->Get(0);
-  for (size_t i = 0; i < subgraph->operators()->size(); i++) {
-    auto node = &nodeAndRegs[i].node;
-    auto reg = nodeAndRegs[i].registration;
-    if (reg->init) {
-      g_currentNodeIndex = i;
-      node->user_data = reg->init(ctx, (const char *)node->builtin_data, 0);
-    }
-  }
-
-  ctx->RequestScratchBufferInArena = &LoggingRequestScratchBufferInArena;
-
-  for (size_t i = 0; i < subgraph->operators()->size(); i++) {
-    auto node = &nodeAndRegs[i].node;
-    auto reg = nodeAndRegs[i].registration;
-    if (reg->prepare) {
-      g_currentNodeIndex = i;
-      reg->prepare(ctx, node);
-    }
-  }
-  tflmc::UnloadCustom(custom);
-  return g_loggedAllocations;
-}
-
-TfLiteEvalTensor *tflmc::GetEvalTensor(tflite::MicroInterpreter *interpreter, int i) {
-  auto ctx = &interpreter->context_;
-  return ctx->GetEvalTensor(ctx, i);
-}
-
-TfLiteTensor *tflmc::GetTensor(tflite::MicroInterpreter *interpreter, int i) {
-  auto ctx = &interpreter->context_;
-  return ctx->GetTensor(ctx, i);
-}
diff --git a/src/RecordAllocations.h b/src/RecordAllocations.h
deleted file mode 100644
index a8848ac..0000000
--- a/src/RecordAllocations.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef TFLMCOMPILER_RECORDALLOCATIONS_H
-#define TFLMCOMPILER_RECORDALLOCATIONS_H
-
-#include "tensorflow/lite/schema/schema_generated.h"
-#include <cinttypes>
-
-namespace tflmc {
-
-struct Allocation {
-  ptrdiff_t offset;
-  size_t len;
-  int nodeIndex;
-};
-
-std::vector<Allocation> RecordAllocations(const tflite::Model *model, ptrdiff_t arena_size);
-
-
-TfLiteEvalTensor *GetEvalTensor(tflite::MicroInterpreter *interpreter, int i);
-TfLiteTensor *GetTensor(tflite::MicroInterpreter *interpreter, int i);
-
-}  // namespace tflmc
-
-#endif
diff --git a/src/TypeToString.cc b/src/TypeToString.cc
index 2747904..57a4f8c 100644
--- a/src/TypeToString.cc
+++ b/src/TypeToString.cc
@@ -67,7 +67,7 @@ std::string tflmc::to_string(TfLiteFusedActivation t) {
   switch (t) {
     NAME(kTfLiteActNone);
     NAME(kTfLiteActRelu);
-    NAME(kTfLiteActRelu1);
+    NAME(kTfLiteActReluN1To1);
     NAME(kTfLiteActRelu6);
     NAME(kTfLiteActTanh);
     NAME(kTfLiteActSignBit);