Merge branch 'Samsung:master' into master

jyoungyun · May 24, 2024 · ce042be · ce042be
2 parents a94b4b8 + b4b9349
commit ce042be
Show file tree

Hide file tree

Showing 171 changed files with 22,898 additions and 1,064 deletions.
diff --git a/Makefile.template b/Makefile.template
@@ -9,6 +9,7 @@ TARGET_OS?=linux
 COVERAGE_BUILD?=0
 OPTIONS?=
 OPTIONS_NNCC?=
+INSTALL_OPTIONS?=
 
 # make TARGET and TYPE to lowercase
 HOST_ARCH_LC=$(shell echo $(HOST_ARCH) | tr A-Z a-z)
@@ -82,6 +83,10 @@ else
 	NPROCS?=1
 endif
 
+ifeq ($(BUILD_TYPE_LC),release)
+	INSTALL_OPTIONS+= --strip
+endif
+
 WORKHOME=$(CURDIR)/Product
 WORKFOLDER=$(TARGET_ARCH_LC)-$(TARGET_OS).$(BUILD_TYPE_LC)
 WORKSPACE=$(WORKHOME)/$(WORKFOLDER)
@@ -160,7 +165,7 @@ ifeq (,$(findstring android,$(TARGET_OS)))
 		-DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle08;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str" \
 		$(OPTIONS_NNCC)
 	./nncc build -j$(NPROCS)
-	cmake --install $(NNCC_FOLDER)
+	cmake --install $(NNCC_FOLDER) $(INSTALL_OPTIONS)
 # install angkor TensorIndex and oops InternalExn header (TODO: Remove this)
 	@mkdir -p ${OVERLAY_FOLDER}/include/nncc/core/ADT/tensor
 	@mkdir -p ${OVERLAY_FOLDER}/include/oops
@@ -184,7 +189,7 @@ build_internal:
 	./nnfw build -j $(NPROCS)
 
 install_internal:
-	./nnfw install --prefix $(INSTALL_PATH)
+	./nnfw install --prefix $(INSTALL_PATH) $(INSTALL_OPTIONS)
 	rm -rf $(INSTALL_ALIAS)
 	ln -s $(INSTALL_PATH) $(INSTALL_ALIAS)
 

diff --git a/compiler/circle-interpreter/src/CircleInterpreter.cpp b/compiler/circle-interpreter/src/CircleInterpreter.cpp
@@ -136,7 +136,8 @@ int entry(int argc, char **argv)
   for (int i = 0; i < module->graph()->outputs()->size(); i++)
   {
     const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
-    std::vector<char> output_data(getTensorSize(output_node));
+    size_t output_size = interpreter.getOutputTensorSize(output_node);
+    std::vector<char> output_data(output_size);
     interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
 
     // Output data is written in ${output_file}n

diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -81,6 +81,7 @@ int entry(int argc, char **argv)
   add_switch(arser, "--fold_fully_connected",
              "This will fold FullyConnected operator with constant inputs");
   add_switch(arser, "--fold_gather", "This will fold Gather operator");
+  add_switch(arser, "--fold_reshape", "This will fold Reshape operator");
   add_switch(arser, "--fold_shape", "This will fold Shape operator");
   add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator");
   add_switch(arser, "--fold_squeeze", "This will fold Squeeze operator");
@@ -124,6 +125,7 @@ int entry(int argc, char **argv)
              "This will fuse BatchNorm operators of pre-activations to Convolution operator");
   add_switch(arser, "--fuse_prelu", "This will fuse operators to PReLU operator");
   add_switch(arser, "--fuse_gelu", "This will fuse operators to GeLU operator");
+  add_switch(arser, "--fuse_rsqrt", "This will fuse operators to Rsqrt operator");
   add_switch(arser, "--remove_duplicate_const", "This will remove all duplicate constant nodes");
   add_switch(arser, "--remove_fakequant", "This will remove FakeQuant operators");
   add_switch(arser, "--remove_gather_guard",
@@ -271,6 +273,8 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::FoldFullyConnected);
   if (arser.get<bool>("--fold_gather"))
     options->enable(Algorithms::FoldGather);
+  if (arser.get<bool>("--fold_reshape"))
+    options->enable(Algorithms::FoldReshape);
   if (arser.get<bool>("--fold_shape"))
     options->enable(Algorithms::FoldShape);
   if (arser.get<bool>("--fold_sparse_to_dense"))
@@ -317,6 +321,8 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::FusePRelu);
   if (arser.get<bool>("--fuse_gelu"))
     options->enable(Algorithms::FuseGelu);
+  if (arser.get<bool>("--fuse_rsqrt"))
+    options->enable(Algorithms::FuseRsqrt);
   if (arser.get<bool>("--fuse_transpose_with_mean"))
     options->enable(Algorithms::FuseTransposeWithMean);
   if (arser.get<bool>("--remove_duplicate_const"))

diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -60,6 +60,8 @@ class Interpreter
 
   void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size);
 
+  size_t getOutputTensorSize(const luci::CircleOutput *output_node);
+
   void interpret();
 
   void attachObserver(ExecutionObserver *observer);

diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp
@@ -125,6 +125,20 @@ void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *
     tensor->readData(data, data_size);
 }
 
+size_t Interpreter::getOutputTensorSize(const luci::CircleOutput *output_node)
+{
+  Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()];
+  if (tensor == nullptr)
+  {
+    const std::string &name = output_node->name();
+    throw std::runtime_error("Cannot find tensor size for output node named \"" + name + "\".");
+  }
+
+  size_t tensor_size = luci_interpreter::getDataTypeSize(tensor->element_type());
+  tensor_size *= tensor->shape().num_elements();
+  return tensor_size;
+}
+
 void Interpreter::interpret() { _runtime_module->execute(); }
 
 void Interpreter::attachObserver(ExecutionObserver *observer)

diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -61,6 +61,7 @@ class CircleOptimizer final
       FoldFullyConnected,
       FoldDequantize,
       FoldGather,
+      FoldReshape,
       FoldShape,
       FoldSparseToDense,
       FoldSqueeze,
@@ -72,6 +73,7 @@ class CircleOptimizer final
       FuseActivationFunction,
       FusePRelu,
       FuseGelu,
+      FuseRsqrt,
       ShuffleWeightTo16x1Float32,
       RemoveRedundantTranspose,
       ReplaceMulAddWithDepthwiseConv,

diff --git a/compiler/luci/pass/include/luci/Pass/FoldReshapePass.h b/compiler/luci/pass/include/luci/Pass/FoldReshapePass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_RESHAPE_PASS_H__
+#define __LUCI_FOLD_RESHAPE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold Reshape to a constant tensor
+ *
+ */
+struct FoldReshapePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldReshapePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_RESHAPE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseRsqrtPass.h b/compiler/luci/pass/include/luci/Pass/FuseRsqrtPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_RSQRT_PASS_H__
+#define __LUCI_FUSE_RSQRT_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse certain pattern of subgraph into CircleRsqrt
+ */
+struct FuseRsqrtPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseRsqrtPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_RSQRT_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -26,6 +26,7 @@
 #include "luci/Pass/FoldDequantizePass.h"
 #include "luci/Pass/FoldFullyConnectedPass.h"
 #include "luci/Pass/FoldGatherPass.h"
+#include "luci/Pass/FoldReshapePass.h"
 #include "luci/Pass/FoldShapePass.h"
 #include "luci/Pass/FoldSparseToDensePass.h"
 #include "luci/Pass/FoldSqueezePass.h"
@@ -46,6 +47,7 @@
 #include "luci/Pass/FusePreActivationBatchNormPass.h"
 #include "luci/Pass/FusePReluPass.h"
 #include "luci/Pass/FuseGeluPass.h"
+#include "luci/Pass/FuseRsqrtPass.h"
 #include "luci/Pass/FuseSliceWithTConvPass.h"
 #include "luci/Pass/FuseHorizontalFullyConnectedPass.h"
 #include "luci/Pass/FuseTransposeWithMeanPass.h"
@@ -338,6 +340,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<FuseGeluPass>());
   }
+  if (_options->query(Options::Algorithm::FuseRsqrt))
+  {
+    phase.emplace_back(std::make_unique<FuseRsqrtPass>());
+  }
   if (_options->query(Options::Algorithm::FuseHorizontalFullyConnected))
   {
     phase.emplace_back(std::make_unique<FuseHorizontalFullyConnectedPass>());
@@ -374,6 +380,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::FoldGatherPass>());
   }
+  if (_options->query(Options::Algorithm::FoldReshape))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldReshapePass>());
+  }
   if (_options->query(Options::Algorithm::FoldShape))
   {
     phase.emplace_back(std::make_unique<luci::FoldShapePass>());

diff --git a/compiler/luci/pass/src/FoldReshapePass.cpp b/compiler/luci/pass/src/FoldReshapePass.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldReshapePass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+
+namespace
+{
+
+/**
+ * Fold Reshape to const if it has const input
+ **/
+bool fold_reshape(luci::CircleReshape *reshape)
+{
+  // Check const input
+  auto const_input = dynamic_cast<luci::CircleConst *>(reshape->tensor());
+  if (not const_input)
+    return false;
+
+  // Check const shape
+  auto const_shape = dynamic_cast<luci::CircleConst *>(reshape->shape());
+  if (not const_shape)
+    return false;
+
+  // Check all dimensions are known
+  const auto input_rank = const_input->rank();
+  for (uint32_t i = 0; i < input_rank; i++)
+  {
+    if (not const_input->dim(i).known())
+      return false;
+  }
+
+  // Check all dimensions are known
+  const auto shape_rank = const_shape->rank();
+  if (shape_rank != 1)
+    return false;
+
+  if (not const_shape->dim(0).known())
+    return false;
+
+  std::vector<uint32_t> new_shape;
+  switch (const_shape->dtype())
+  {
+    case loco::DataType::S32:
+      for (uint32_t i = 0; i < const_shape->size<loco::DataType::S32>(); i++)
+      {
+        const auto val = const_shape->at<loco::DataType::S32>(i);
+        if (val < 0)
+          return false;
+
+        new_shape.push_back(static_cast<uint32_t>(val));
+      }
+      break;
+    // TODO Support S64
+    default:
+      return false;
+  }
+
+  if (auto input_qparam = const_input->quantparam())
+  {
+    // Only support per-tensor quantization
+    if (input_qparam->scale.size() != 1)
+      return false;
+
+    if (input_qparam->zerop.size() != 1)
+      return false;
+  }
+
+  auto new_const = luci::clone(const_input);
+  new_const->rank(new_shape.size());
+  for (uint32_t i = 0; i < new_shape.size(); i++)
+  {
+    new_const->dim(i).set(new_shape[i]);
+  }
+
+  new_const->shape_status(luci::ShapeStatus::VALID);
+
+  new_const->name(const_input->name() + "_reshaped");
+  luci::add_origin(
+    new_const, luci::composite_origin({luci::get_origin(reshape), luci::get_origin(const_input)}));
+
+  loco::replace(reshape).with(new_const);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for Reshape Op
+ **/
+bool FoldReshapePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+    {
+      if (fold_reshape(reshape))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci