Skip to content

Commit

Permalink
Merge branch 'Samsung:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
jyoungyun authored May 24, 2024
2 parents a94b4b8 + b4b9349 commit ce042be
Show file tree
Hide file tree
Showing 171 changed files with 22,898 additions and 1,064 deletions.
9 changes: 7 additions & 2 deletions Makefile.template
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ TARGET_OS?=linux
COVERAGE_BUILD?=0
OPTIONS?=
OPTIONS_NNCC?=
INSTALL_OPTIONS?=

# make TARGET and TYPE to lowercase
HOST_ARCH_LC=$(shell echo $(HOST_ARCH) | tr A-Z a-z)
Expand Down Expand Up @@ -82,6 +83,10 @@ else
NPROCS?=1
endif

ifeq ($(BUILD_TYPE_LC),release)
INSTALL_OPTIONS+= --strip
endif

WORKHOME=$(CURDIR)/Product
WORKFOLDER=$(TARGET_ARCH_LC)-$(TARGET_OS).$(BUILD_TYPE_LC)
WORKSPACE=$(WORKHOME)/$(WORKFOLDER)
Expand Down Expand Up @@ -160,7 +165,7 @@ ifeq (,$(findstring android,$(TARGET_OS)))
-DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle08;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str" \
$(OPTIONS_NNCC)
./nncc build -j$(NPROCS)
cmake --install $(NNCC_FOLDER)
cmake --install $(NNCC_FOLDER) $(INSTALL_OPTIONS)
# install angkor TensorIndex and oops InternalExn header (TODO: Remove this)
@mkdir -p ${OVERLAY_FOLDER}/include/nncc/core/ADT/tensor
@mkdir -p ${OVERLAY_FOLDER}/include/oops
Expand All @@ -184,7 +189,7 @@ build_internal:
./nnfw build -j $(NPROCS)

install_internal:
./nnfw install --prefix $(INSTALL_PATH)
./nnfw install --prefix $(INSTALL_PATH) $(INSTALL_OPTIONS)
rm -rf $(INSTALL_ALIAS)
ln -s $(INSTALL_PATH) $(INSTALL_ALIAS)

Expand Down
3 changes: 2 additions & 1 deletion compiler/circle-interpreter/src/CircleInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ int entry(int argc, char **argv)
for (int i = 0; i < module->graph()->outputs()->size(); i++)
{
const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
std::vector<char> output_data(getTensorSize(output_node));
size_t output_size = interpreter.getOutputTensorSize(output_node);
std::vector<char> output_data(output_size);
interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());

// Output data is written in ${output_file}n
Expand Down
6 changes: 6 additions & 0 deletions compiler/circle2circle/src/Circle2Circle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ int entry(int argc, char **argv)
add_switch(arser, "--fold_fully_connected",
"This will fold FullyConnected operator with constant inputs");
add_switch(arser, "--fold_gather", "This will fold Gather operator");
add_switch(arser, "--fold_reshape", "This will fold Reshape operator");
add_switch(arser, "--fold_shape", "This will fold Shape operator");
add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator");
add_switch(arser, "--fold_squeeze", "This will fold Squeeze operator");
Expand Down Expand Up @@ -124,6 +125,7 @@ int entry(int argc, char **argv)
"This will fuse BatchNorm operators of pre-activations to Convolution operator");
add_switch(arser, "--fuse_prelu", "This will fuse operators to PReLU operator");
add_switch(arser, "--fuse_gelu", "This will fuse operators to GeLU operator");
add_switch(arser, "--fuse_rsqrt", "This will fuse operators to Rsqrt operator");
add_switch(arser, "--remove_duplicate_const", "This will remove all duplicate constant nodes");
add_switch(arser, "--remove_fakequant", "This will remove FakeQuant operators");
add_switch(arser, "--remove_gather_guard",
Expand Down Expand Up @@ -271,6 +273,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::FoldFullyConnected);
if (arser.get<bool>("--fold_gather"))
options->enable(Algorithms::FoldGather);
if (arser.get<bool>("--fold_reshape"))
options->enable(Algorithms::FoldReshape);
if (arser.get<bool>("--fold_shape"))
options->enable(Algorithms::FoldShape);
if (arser.get<bool>("--fold_sparse_to_dense"))
Expand Down Expand Up @@ -317,6 +321,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::FusePRelu);
if (arser.get<bool>("--fuse_gelu"))
options->enable(Algorithms::FuseGelu);
if (arser.get<bool>("--fuse_rsqrt"))
options->enable(Algorithms::FuseRsqrt);
if (arser.get<bool>("--fuse_transpose_with_mean"))
options->enable(Algorithms::FuseTransposeWithMean);
if (arser.get<bool>("--remove_duplicate_const"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ class Interpreter

void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size);

size_t getOutputTensorSize(const luci::CircleOutput *output_node);

void interpret();

void attachObserver(ExecutionObserver *observer);
Expand Down
14 changes: 14 additions & 0 deletions compiler/luci-interpreter/src/Interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,20 @@ void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *
tensor->readData(data, data_size);
}

size_t Interpreter::getOutputTensorSize(const luci::CircleOutput *output_node)
{
Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()];
if (tensor == nullptr)
{
const std::string &name = output_node->name();
throw std::runtime_error("Cannot find tensor size for output node named \"" + name + "\".");
}

size_t tensor_size = luci_interpreter::getDataTypeSize(tensor->element_type());
tensor_size *= tensor->shape().num_elements();
return tensor_size;
}

void Interpreter::interpret() { _runtime_module->execute(); }

void Interpreter::attachObserver(ExecutionObserver *observer)
Expand Down
2 changes: 2 additions & 0 deletions compiler/luci/pass/include/luci/CircleOptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class CircleOptimizer final
FoldFullyConnected,
FoldDequantize,
FoldGather,
FoldReshape,
FoldShape,
FoldSparseToDense,
FoldSqueeze,
Expand All @@ -72,6 +73,7 @@ class CircleOptimizer final
FuseActivationFunction,
FusePRelu,
FuseGelu,
FuseRsqrt,
ShuffleWeightTo16x1Float32,
RemoveRedundantTranspose,
ReplaceMulAddWithDepthwiseConv,
Expand Down
38 changes: 38 additions & 0 deletions compiler/luci/pass/include/luci/Pass/FoldReshapePass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __LUCI_FOLD_RESHAPE_PASS_H__
#define __LUCI_FOLD_RESHAPE_PASS_H__

#include <logo/Pass.h>

namespace luci
{

/**
* @brief Class to fold Reshape to a constant tensor
*
*/
struct FoldReshapePass final : public logo::Pass
{
const char *name(void) const final { return "luci::FoldReshapePass"; }

bool run(loco::Graph *g) final;
};

} // namespace luci

#endif // __LUCI_FOLD_RESHAPE_PASS_H__
37 changes: 37 additions & 0 deletions compiler/luci/pass/include/luci/Pass/FuseRsqrtPass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __LUCI_FUSE_RSQRT_PASS_H__
#define __LUCI_FUSE_RSQRT_PASS_H__

#include <logo/Pass.h>

namespace luci
{

/**
* @brief Class to fuse certain pattern of subgraph into CircleRsqrt
*/
struct FuseRsqrtPass final : public logo::Pass
{
const char *name(void) const final { return "luci::FuseRsqrtPass"; }

bool run(loco::Graph *g) final;
};

} // namespace luci

#endif // __LUCI_FUSE_RSQRT_PASS_H__
10 changes: 10 additions & 0 deletions compiler/luci/pass/src/CircleOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "luci/Pass/FoldDequantizePass.h"
#include "luci/Pass/FoldFullyConnectedPass.h"
#include "luci/Pass/FoldGatherPass.h"
#include "luci/Pass/FoldReshapePass.h"
#include "luci/Pass/FoldShapePass.h"
#include "luci/Pass/FoldSparseToDensePass.h"
#include "luci/Pass/FoldSqueezePass.h"
Expand All @@ -46,6 +47,7 @@
#include "luci/Pass/FusePreActivationBatchNormPass.h"
#include "luci/Pass/FusePReluPass.h"
#include "luci/Pass/FuseGeluPass.h"
#include "luci/Pass/FuseRsqrtPass.h"
#include "luci/Pass/FuseSliceWithTConvPass.h"
#include "luci/Pass/FuseHorizontalFullyConnectedPass.h"
#include "luci/Pass/FuseTransposeWithMeanPass.h"
Expand Down Expand Up @@ -338,6 +340,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<FuseGeluPass>());
}
if (_options->query(Options::Algorithm::FuseRsqrt))
{
phase.emplace_back(std::make_unique<FuseRsqrtPass>());
}
if (_options->query(Options::Algorithm::FuseHorizontalFullyConnected))
{
phase.emplace_back(std::make_unique<FuseHorizontalFullyConnectedPass>());
Expand Down Expand Up @@ -374,6 +380,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<luci::FoldGatherPass>());
}
if (_options->query(Options::Algorithm::FoldReshape))
{
phase.emplace_back(std::make_unique<luci::FoldReshapePass>());
}
if (_options->query(Options::Algorithm::FoldShape))
{
phase.emplace_back(std::make_unique<luci::FoldShapePass>());
Expand Down
126 changes: 126 additions & 0 deletions compiler/luci/pass/src/FoldReshapePass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "luci/Pass/FoldReshapePass.h"

#include <luci/IR/CircleNodes.h>
#include <luci/Profile/CircleNodeOrigin.h>
#include <luci/Service/Nodes/CircleConst.h>

namespace
{

/**
* Fold Reshape to const if it has const input
**/
bool fold_reshape(luci::CircleReshape *reshape)
{
// Check const input
auto const_input = dynamic_cast<luci::CircleConst *>(reshape->tensor());
if (not const_input)
return false;

// Check const shape
auto const_shape = dynamic_cast<luci::CircleConst *>(reshape->shape());
if (not const_shape)
return false;

// Check all dimensions are known
const auto input_rank = const_input->rank();
for (uint32_t i = 0; i < input_rank; i++)
{
if (not const_input->dim(i).known())
return false;
}

// Check all dimensions are known
const auto shape_rank = const_shape->rank();
if (shape_rank != 1)
return false;

if (not const_shape->dim(0).known())
return false;

std::vector<uint32_t> new_shape;
switch (const_shape->dtype())
{
case loco::DataType::S32:
for (uint32_t i = 0; i < const_shape->size<loco::DataType::S32>(); i++)
{
const auto val = const_shape->at<loco::DataType::S32>(i);
if (val < 0)
return false;

new_shape.push_back(static_cast<uint32_t>(val));
}
break;
// TODO Support S64
default:
return false;
}

if (auto input_qparam = const_input->quantparam())
{
// Only support per-tensor quantization
if (input_qparam->scale.size() != 1)
return false;

if (input_qparam->zerop.size() != 1)
return false;
}

auto new_const = luci::clone(const_input);
new_const->rank(new_shape.size());
for (uint32_t i = 0; i < new_shape.size(); i++)
{
new_const->dim(i).set(new_shape[i]);
}

new_const->shape_status(luci::ShapeStatus::VALID);

new_const->name(const_input->name() + "_reshaped");
luci::add_origin(
new_const, luci::composite_origin({luci::get_origin(reshape), luci::get_origin(const_input)}));

loco::replace(reshape).with(new_const);

return true;
}

} // namespace

namespace luci
{

/**
* Constant Folding for Reshape Op
**/
bool FoldReshapePass::run(loco::Graph *g)
{
bool changed = false;
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
{
if (fold_reshape(reshape))
changed = true;
}
}

return changed;
}

} // namespace luci
Loading

0 comments on commit ce042be

Please sign in to comment.