From e8286880ab36168cb0a1319f76f3b76fb84d9921 Mon Sep 17 00:00:00 2001 From: novakovicdj Date: Tue, 11 Feb 2025 17:16:30 +0200 Subject: [PATCH 1/6] initial changes --- driver/tensor_driver.hpp | 46 +++-- src/batch_norm.cpp | 4 +- src/convolution.cpp | 4 +- src/graphapi/tensor.cpp | 20 +- .../miopen/conv/problem_description.hpp | 10 +- src/include/miopen/fusion.hpp | 4 +- src/include/miopen/graphapi/tensor.hpp | 28 +-- src/include/miopen/graphapi/util.hpp | 3 +- src/include/miopen/inline_vector.hpp | 105 +++++++++- .../miopen/problem_description_base.hpp | 30 +++ src/include/miopen/reduce/solvers.hpp | 8 +- src/include/miopen/rnn.hpp | 2 +- .../rnn/algorithms/default_algo_utils.hpp | 29 +-- .../rnn/algorithms/dynamic_algo_utils.hpp | 8 +- src/include/miopen/rnn/tmp_buffer_utils.hpp | 5 +- src/include/miopen/rnn_util.hpp | 5 +- src/include/miopen/seq_tensor.hpp | 31 +-- src/include/miopen/tensor.hpp | 58 +++--- src/include/miopen/tensor_layout.hpp | 9 +- src/include/miopen/tensor_ops.hpp | 4 +- src/include/miopen/util.hpp | 3 +- .../miopen/utility/transposing_solver.hpp | 18 +- src/ocl/ctcocl.cpp | 2 +- src/ocl/dropoutocl.cpp | 24 +-- src/ocl/fusionopbiasbnactivocl.cpp | 4 +- src/ocl/rnnocl.cpp | 64 +++--- src/ocl/utilocl.cpp | 3 +- src/pooling.cpp | 6 +- src/pooling/problem_description.cpp | 11 ++ src/reduce/problem_description.cpp | 5 +- src/rnn.cpp | 24 +-- src/rnn/Solutions/Base/bw_data_modular.cpp | 28 +-- src/rnn/Solutions/Base/bw_weights_modular.cpp | 5 +- src/rnn/Solutions/Base/fw_data_modular.cpp | 43 ++-- src/rnn/rnn_util.cpp | 51 ++--- src/seq_tensor.cpp | 87 ++++++-- src/solver/mha/mha_solver_backward.cpp | 5 +- src/solver/mha/mha_solver_forward.cpp | 2 +- src/solver/reduce/forward_argmax.cpp | 2 +- src/solver/reduce/forward_argmin.cpp | 2 +- src/solver/reduce/forward_max.cpp | 2 +- src/solver/reduce/forward_min.cpp | 2 +- src/solver/tensorOp/tensor_op_helpers.hpp | 7 +- src/tensor.cpp | 135 +++++++------ test/conv_common.hpp | 28 +-- test/cpu_reduce_util.hpp | 69 ++++++- test/dropout_util.hpp | 32 +-- test/gpu_reference_kernel.cpp | 26 +-- test/gru_common.hpp | 16 +- test/gtest/bad_fusion_plan.cpp | 4 +- test/gtest/binary_tensor_ops.cpp | 28 +-- test/gtest/conv3d_test_case.hpp | 4 +- test/gtest/conv_tensor_gen.hpp | 2 +- test/gtest/graphapi_capi_mha_common.hpp | 5 +- .../graphapi_conv_bias_res_add_activ_fwd.cpp | 3 +- test/gtest/graphapi_mha_bwd.cpp | 10 +- test/gtest/graphapi_mha_cpp_common.hpp | 5 +- test/gtest/graphapi_mha_fwd.cpp | 8 +- .../graphapi_operationgraph_descriptor.cpp | 8 +- test/gtest/graphapi_tensor.cpp | 8 +- test/gtest/group_conv.hpp | 4 +- test/gtest/inline_vector_basic_ops.cpp | 186 ++++++++++++++++++ test/gtest/kthvalue.hpp | 3 +- test/gtest/layout_transpose.cpp | 22 +-- test/gtest/nonpack_conv3d_fwd.hpp | 6 +- test/gtest/reduceextreme.hpp | 2 +- test/gtest/ternary_tensor_ops.cpp | 93 ++++----- test/gtest/unary_tensor_ops.cpp | 11 +- test/gtest/unit_TensorDescriptor.hpp | 19 +- test/gtest/unit_conv_solver.cpp | 8 +- test/gtest/unit_conv_solver.hpp | 8 +- test/lstm_common.hpp | 16 +- test/pooling_common.hpp | 3 +- test/rnn_seq_api.hpp | 2 +- test/rnn_util.hpp | 2 +- test/rnn_vanilla_common.hpp | 16 +- test/tensor_holder.hpp | 54 ++++- test/tensor_reorder.cpp | 10 +- test/tensor_transform.cpp | 34 ++-- 79 files changed, 1138 insertions(+), 565 deletions(-) diff --git a/driver/tensor_driver.hpp b/driver/tensor_driver.hpp index c353a6ee11..722e51fdc5 100644 --- a/driver/tensor_driver.hpp +++ b/driver/tensor_driver.hpp @@ -55,10 +55,10 @@ inline miopenTensorLayout_t StringToLayoutType(std::string layout) } } -inline void LengthReorder(std::vector& lens, const std::initializer_list& indices) +inline void LengthReorder(miopen::InlineVector& lens, + const std::initializer_list& indices) { - std::vector out_lens; - out_lens.reserve(indices.size()); + miopen::InlineVector out_lens(indices.size()); for(int index : indices) { assert(0 <= index && index < lens.size()); @@ -78,7 +78,7 @@ inline std::size_t GetTensorVectorLength(const miopenTensorDescriptor_t& tensor) return vectorLength; } -inline std::vector GetTensorLengths(const miopenTensorDescriptor_t& tensor) +inline miopen::InlineVector GetTensorLengths(const miopenTensorDescriptor_t& tensor) { int n; int c; @@ -92,22 +92,21 @@ inline std::vector GetTensorLengths(const miopenTensorDescriptor_t& tensor) if(size == 5) { miopenGet5dTensorDescriptorLengths(tensor, &n, &c, &d, &h, &w); - return std::vector({n, c, d, h, w}); + return miopen::InlineVector({n, c, d, h, w}); } else if(size == 4) { miopenGet4dTensorDescriptorLengths(tensor, &n, &c, &h, &w); - return std::vector({n, c, h, w}); + return miopen::InlineVector({n, c, h, w}); } - std::vector tensor_len; - tensor_len.resize(miopen::deref(tensor).GetNumDims()); + miopen::InlineVector tensor_len(miopen::deref(tensor).GetNumDims()); miopenGetTensorDescriptor(tensor, nullptr, tensor_len.data(), nullptr); return tensor_len; } -inline std::vector GetTensorStrides(const miopenTensorDescriptor_t& tensor) +inline miopen::InlineVector GetTensorStrides(const miopenTensorDescriptor_t& tensor) { int nstride; int cstride; @@ -122,16 +121,15 @@ inline std::vector GetTensorStrides(const miopenTensorDescriptor_t& tensor) { miopenGet5dTensorDescriptorStrides( tensor, &nstride, &cstride, &dstride, &hstride, &wstride); - return std::vector({nstride, cstride, dstride, hstride, wstride}); + return miopen::InlineVector({nstride, cstride, dstride, hstride, wstride}); } else if(size == 4) { miopenGet4dTensorDescriptorStrides(tensor, &nstride, &cstride, &hstride, &wstride); - return std::vector({nstride, cstride, hstride, wstride}); + return miopen::InlineVector({nstride, cstride, hstride, wstride}); } - std::vector tensor_strides; - tensor_strides.resize(miopen::deref(tensor).GetNumDims()); + miopen::InlineVector tensor_strides(miopen::deref(tensor).GetNumDims()); miopenGetTensorDescriptor(tensor, nullptr, nullptr, tensor_strides.data()); @@ -139,14 +137,14 @@ inline std::vector GetTensorStrides(const miopenTensorDescriptor_t& tensor) } inline int SetTensor4d(miopenTensorDescriptor_t t, - std::vector& len, + miopen::InlineVector& len, miopenDataType_t data_type = miopenFloat) { return miopenSet4dTensorDescriptor(t, data_type, UNPACK_VEC4(len)); } inline int SetTensorNdVector(miopenTensorDescriptor_t t, - std::vector& len, + miopen::InlineVector& len, miopenTensorLayout_t layout, miopenDataType_t data_type = miopenFloat) { @@ -167,37 +165,37 @@ inline int SetTensorNdVector(miopenTensorDescriptor_t t, } inline int SetTensorNd(miopenTensorDescriptor_t t, - std::vector& len, + miopen::InlineVector& len, miopenDataType_t data_type = miopenFloat) { return miopenSetTensorDescriptor(t, data_type, len.size(), len.data(), nullptr); } inline int SetTensorNd(miopenTensorDescriptor_t t, - std::vector& len, + miopen::InlineVector& len, miopenDataType_t data_type = miopenFloat) { return miopenSetTensorDescriptorV2(t, data_type, len.size(), len.data(), nullptr); } inline int SetTensorNd(miopenTensorDescriptor_t t, - std::vector& len, - std::vector& strides, + miopen::InlineVector& len, + miopen::InlineVector& strides, miopenDataType_t data_type = miopenFloat) { return miopenSetTensorDescriptor(t, data_type, len.size(), len.data(), strides.data()); } inline int SetTensorNd(miopenTensorDescriptor_t t, - std::vector& len, - std::vector& strides, + miopen::InlineVector& len, + miopen::InlineVector& strides, miopenDataType_t data_type = miopenFloat) { return miopenSetTensorDescriptorV2(t, data_type, len.size(), len.data(), strides.data()); } inline int SetTensorNd(miopenTensorDescriptor_t t, - std::vector& len, + miopen::InlineVector& len, const std::string& layout, miopenDataType_t data_type = miopenFloat) { @@ -223,8 +221,8 @@ inline int SetTensorNd(miopenTensorDescriptor_t t, return SetTensorNd(t, len, data_type); } - std::vector strides2; - std::vector len2(len.cbegin(), len.cend()); + miopen::InlineVector strides2; + miopen::InlineVector len2(len.cbegin(), len.cend()); miopen::tensor_layout_to_strides(len2, len_layout, layout, strides2); return SetTensorNd(t, len2, strides2, data_type); } diff --git a/src/batch_norm.cpp b/src/batch_norm.cpp index 2c5486f307..e88a20bbc3 100644 --- a/src/batch_norm.cpp +++ b/src/batch_norm.cpp @@ -44,7 +44,7 @@ void DeriveBNTensorDescriptor(TensorDescriptor& derivedBnDesc, { auto lengths = xDesc.GetLengths(); - std::vector newlens(lengths.size()); + miopen::InlineVector newlens(lengths.size()); newlens[1] = lengths[1]; if(bn_mode == miopenBNSpatial) { @@ -66,7 +66,7 @@ void DeriveBNTensorDescriptor(TensorDescriptor& derivedBnDesc, TensorDescriptor BuildReshaped4DTensorDescriptor(const miopen::TensorDescriptor& tDesc) { - std::vector dims(tDesc.GetLengths()); + miopen::InlineVector dims(tDesc.GetLengths()); auto dataType = tDesc.GetType(); auto layout = tDesc.GetLayout_t(); diff --git a/src/convolution.cpp b/src/convolution.cpp index 51d94ece7b..81918ec0be 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -291,7 +291,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& } std::size_t out_c = 0; - std::vector out_lens(spatial_dim + 2); + miopen::InlineVector out_lens(spatial_dim + 2); auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim); @@ -356,7 +356,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& out_lens[1] = out_c; const std::string default_layout = tensor_layout_get_default(xDesc.GetNumDims()); - std::vector out_strides; + miopen::InlineVector out_strides; tensor_layout_to_strides( out_lens, default_layout, yLayout, xDesc.GetVectorLength(), out_strides); return {(xDesc.GetType() == miopenInt8 diff --git a/src/graphapi/tensor.cpp b/src/graphapi/tensor.cpp index 4b9367a7b1..56d2b1e8e8 100644 --- a/src/graphapi/tensor.cpp +++ b/src/graphapi/tensor.cpp @@ -40,7 +40,7 @@ TensorBuilder& TensorBuilder::setDataType(miopenDataType_t dataType) & return *this; } -TensorBuilder& TensorBuilder::setDim(const std::vector& dimensions) & +TensorBuilder& TensorBuilder::setDim(const miopen::InlineVector& dimensions) & { if(dimensions.empty() || miopen::any_of(dimensions, [](std::size_t val) { return val <= 0; })) { @@ -52,7 +52,7 @@ TensorBuilder& TensorBuilder::setDim(const std::vector& dimensions) return *this; } -TensorBuilder& TensorBuilder::setDim(std::vector&& dimensions) & +TensorBuilder& TensorBuilder::setDim(miopen::InlineVector&& dimensions) & { if(dimensions.empty() || miopen::any_of(dimensions, [](std::size_t val) { return val <= 0; })) { @@ -64,7 +64,7 @@ TensorBuilder& TensorBuilder::setDim(std::vector&& dimensions) & return *this; } -TensorBuilder& TensorBuilder::setStride(const std::vector& strides) & +TensorBuilder& TensorBuilder::setStride(const miopen::InlineVector& strides) & { if(strides.empty() || miopen::any_of(strides, [](std::size_t val) { return val <= 0; })) { @@ -76,7 +76,7 @@ TensorBuilder& TensorBuilder::setStride(const std::vector& strides) return *this; } -TensorBuilder& TensorBuilder::setStride(std::vector&& strides) & +TensorBuilder& TensorBuilder::setStride(miopen::InlineVector&& strides) & { if(strides.empty() || miopen::any_of(strides, [](std::size_t val) { return val <= 0; })) { @@ -162,9 +162,9 @@ void BackendTensorDescriptor::setAttribute(miopenBackendAttributeName_t attribut case MIOPEN_ATTR_TENSOR_DIMENSIONS: if(attributeType == MIOPEN_TYPE_INT64 && elementCount > 0) { - mBuilder.setDim( - std::vector(static_cast(arrayOfElements), - static_cast(arrayOfElements) + elementCount)); + mBuilder.setDim(miopen::InlineVector( + static_cast(arrayOfElements), + static_cast(arrayOfElements) + elementCount)); return; } else @@ -175,9 +175,9 @@ void BackendTensorDescriptor::setAttribute(miopenBackendAttributeName_t attribut case MIOPEN_ATTR_TENSOR_STRIDES: if(attributeType == MIOPEN_TYPE_INT64 && elementCount > 0) { - mBuilder.setStride( - std::vector(static_cast(arrayOfElements), - static_cast(arrayOfElements) + elementCount)); + mBuilder.setStride(miopen::InlineVector( + static_cast(arrayOfElements), + static_cast(arrayOfElements) + elementCount)); return; } else diff --git a/src/include/miopen/conv/problem_description.hpp b/src/include/miopen/conv/problem_description.hpp index 3148713fc8..b191f14179 100644 --- a/src/include/miopen/conv/problem_description.hpp +++ b/src/include/miopen/conv/problem_description.hpp @@ -71,31 +71,31 @@ constexpr TElement GetW3(unsigned spatial_dims, const std::vector& dat return std::get<2>(GetDHW(spatial_dims, data)); } template -constexpr auto GetCHWN(const std::vector& data) +constexpr auto GetCHWN(const miopen::InlineVector& data) { return miopen::tien<4>(data, 1); } template -constexpr TElement GetNofCHWN(const std::vector& data) +constexpr TElement GetNofCHWN(const miopen::InlineVector& data) { return std::get<3>(GetCHWN(data)); } template -constexpr TElement GetCofCHWN(const std::vector& data) +constexpr TElement GetCofCHWN(const miopen::InlineVector& data) { return std::get<0>(GetCHWN(data)); } template -constexpr TElement GetHofCHWN(const std::vector& data) +constexpr TElement GetHofCHWN(const miopen::InlineVector& data) { return std::get<1>(GetCHWN(data)); } template -constexpr TElement GetWofCHWN(const std::vector& data) +constexpr TElement GetWofCHWN(const miopen::InlineVector& data) { return std::get<2>(GetCHWN(data)); } diff --git a/src/include/miopen/fusion.hpp b/src/include/miopen/fusion.hpp index 4636cf0c43..32caffefef 100644 --- a/src/include/miopen/fusion.hpp +++ b/src/include/miopen/fusion.hpp @@ -171,7 +171,7 @@ struct MIOPEN_INTERNALS_EXPORT BatchNormFwdTrainFusionOpDescriptor : FusionOpDes miopenFusionOp_t kind() const override { return miopenFusionOpBatchNormFwdTrain; }; std::vector GetLocalWGSz(); std::vector GetGlobalWGSz(); - void calcBNParams(std::vector in_lens, + void calcBNParams(miopen::InlineVector in_lens, int& variant, size_t& in_cstride, size_t& in_nstride, @@ -204,7 +204,7 @@ struct MIOPEN_INTERNALS_EXPORT BatchNormBwdTrainFusionOpDescriptor : FusionOpDes miopenFusionOp_t kind() const override { return miopenFusionOpBatchNormBwdTrain; }; std::vector GetLocalWGSz(); std::vector GetGlobalWGSz(); - void calcBNParams(std::vector in_lens, + void calcBNParams(miopen::InlineVector in_lens, int& variant, size_t& in_cstride, size_t& in_nstride, diff --git a/src/include/miopen/graphapi/tensor.hpp b/src/include/miopen/graphapi/tensor.hpp index 4425a4f138..e4d6cc7075 100644 --- a/src/include/miopen/graphapi/tensor.hpp +++ b/src/include/miopen/graphapi/tensor.hpp @@ -60,16 +60,16 @@ class Tensor : public TensorDescriptor { } Tensor(miopenDataType_t dataType, - const std::vector& dimensions, - const std::vector& strides, + const miopen::InlineVector& dimensions, + const miopen::InlineVector& strides, int64_t id, bool isVirtual) : TensorDescriptor(dataType, dimensions, strides), mId(id), mVirtual(isVirtual) { } Tensor(miopenDataType_t dataType, - std::vector&& dimensions, - std::vector&& strides, + miopen::InlineVector&& dimensions, + miopen::InlineVector&& strides, int64_t id, bool isVirtual) noexcept : TensorDescriptor(dataType, std::move(dimensions), std::move(strides)), @@ -94,8 +94,8 @@ class Tensor : public TensorDescriptor class MIOPEN_INTERNALS_EXPORT TensorBuilder { private: - std::vector mDimensions; - std::vector mStrides; + miopen::InlineVector mDimensions; + miopen::InlineVector mStrides; int64_t mId = 0; miopenDataType_t mDataType = miopenFloat; bool mVirtual = false; @@ -106,10 +106,10 @@ class MIOPEN_INTERNALS_EXPORT TensorBuilder public: TensorBuilder& setDataType(miopenDataType_t dataType) &; - TensorBuilder& setDim(const std::vector& dimensions) &; - TensorBuilder& setDim(std::vector&& dimensions) &; - TensorBuilder& setStride(const std::vector& strides) &; - TensorBuilder& setStride(std::vector&& strides) &; + TensorBuilder& setDim(const miopen::InlineVector& dimensions) &; + TensorBuilder& setDim(miopen::InlineVector&& dimensions) &; + TensorBuilder& setStride(const miopen::InlineVector& strides) &; + TensorBuilder& setStride(miopen::InlineVector&& strides) &; TensorBuilder& setId(int64_t id) &; TensorBuilder& setVirtual(bool isVirtual) &; @@ -117,19 +117,19 @@ class MIOPEN_INTERNALS_EXPORT TensorBuilder { return std::move(setDataType(dataType)); } - TensorBuilder&& setDim(const std::vector& dimensions) && + TensorBuilder&& setDim(const miopen::InlineVector& dimensions) && { return std::move(setDim(dimensions)); } - TensorBuilder&& setDim(std::vector&& dimensions) && + TensorBuilder&& setDim(miopen::InlineVector&& dimensions) && { return std::move(setDim(std::move(dimensions))); } - TensorBuilder&& setStride(const std::vector& strides) && + TensorBuilder&& setStride(const miopen::InlineVector& strides) && { return std::move(setStride(strides)); } - TensorBuilder&& setStride(std::vector&& strides) && + TensorBuilder&& setStride(miopen::InlineVector&& strides) && { return std::move(setStride(std::move(strides))); } diff --git a/src/include/miopen/graphapi/util.hpp b/src/include/miopen/graphapi/util.hpp index 303cefaeca..6bd96b587f 100644 --- a/src/include/miopen/graphapi/util.hpp +++ b/src/include/miopen/graphapi/util.hpp @@ -165,7 +165,8 @@ struct PatternGraphGenerator inline Tensor* makeDummyTensor(std::string_view name) { - return mAlloc.allocate(makeTensor(name, miopenFloat, std::vector({1}))); + return mAlloc.allocate( + makeTensor(name, miopenFloat, miopen::InlineVector({1}))); } private: diff --git a/src/include/miopen/inline_vector.hpp b/src/include/miopen/inline_vector.hpp index e8b123b032..49d853adf8 100644 --- a/src/include/miopen/inline_vector.hpp +++ b/src/include/miopen/inline_vector.hpp @@ -30,6 +30,8 @@ #include #include +#include + namespace miopen { template @@ -56,7 +58,35 @@ class InlineVector InlineVector(const InlineVector& inline_vec) = default; InlineVector(InlineVector&& inline_vec) noexcept = default; - InlineVector(std::initializer_list data) : real_size(data.size()) + InlineVector(size_type sz) : real_size(sz) + { + if(real_size > N) + { + MIOPEN_THROW("Input data size is bigger than InlineVector's capacity"); + } + } + + InlineVector(const size_type& cnt, const T& val) : real_size(cnt) + { + if(real_size > N) + { + MIOPEN_THROW("Input data size is bigger than InlineVector's capacity"); + } + + std::fill(storage.begin(), storage.begin() + cnt, val); + } + + InlineVector(size_type&& cnt, T&& val) : real_size(cnt) + { + if(real_size > N) + { + MIOPEN_THROW("Input data size is bigger than InlineVector's capacity"); + } + + std::fill(storage.begin(), storage.begin() + cnt, val); + } + + InlineVector(const std::initializer_list& data) : real_size(data.size()) { if(real_size > N) { @@ -66,21 +96,44 @@ class InlineVector std::copy(data.begin(), data.end(), storage.begin()); } - template - InlineVector(InputIterator first, InputIterator last) : real_size(std::distance(first, last)) + InlineVector(std::initializer_list&& data) : real_size(data.size()) { if(real_size > N) { MIOPEN_THROW("Input data size is bigger than InlineVector's capacity"); } - std::copy(first, last, storage.begin()); + std::copy(data.begin(), data.end(), storage.begin()); + } + + template + InlineVector(InputIterator first, InputIterator last) + { + if constexpr(std::is_integral::value) + { + InlineVector(size_t(first), T(last)); + } + else + { + real_size = std::distance(first, last); + if(real_size > N) + { + MIOPEN_THROW("Input data size is bigger than InlineVector's capacity"); + } + std::copy(first, last, storage.begin()); + } } // Copy/move operator InlineVector& operator=(const InlineVector& inline_vec) = default; InlineVector& operator=(InlineVector&& inline_vec) noexcept = default; + // Compare operators + bool operator==(const InlineVector& riv) const { return storage == riv.storage; } + bool operator!=(const InlineVector& riv) const { return storage != riv.storage; } + bool operator<(const InlineVector& riv) const { return storage < riv.storage; } + bool operator>(const InlineVector& riv) const { return storage > riv.storage; } + // Iterators iterator begin() noexcept { return iterator(data()); } @@ -202,6 +255,36 @@ class InlineVector real_size = n; } + // Insert + // Insert 'value' before 'pos' + iterator insert(iterator pos, const T& value) + { + if(real_size == N) + { + MIOPEN_THROW("InlineVector already full"); + } + int idx = std::distance(begin(), pos); + if(idx < 0 || idx > real_size) + { + std::cout << idx << " " << real_size << std::endl; + MIOPEN_THROW("Cannot insert data at this position"); + } + real_size += 1; + for(int i = real_size - 1; i > 0; i--) + { + if(i > idx) + { + storage[i] = storage[i - 1]; + } + else + { + break; + } + } + storage[idx] = value; + return iterator(data() + idx); + } + // Add element to the back void push_back(const T& e) { @@ -246,6 +329,20 @@ class InlineVector size_type real_size = 0; }; +template +std::ostream& operator<<(std::ostream& os, const InlineVector& iv) +{ + // TODO: check if this function is correct + os << "{"; + for(int i = 0; i < iv.size() - 1; i++) + { + os << iv[i] << ", "; + } + os << iv.back(); + os << "}"; + return os; +} + } // namespace miopen #endif diff --git a/src/include/miopen/problem_description_base.hpp b/src/include/miopen/problem_description_base.hpp index a7914a8b5a..f109b2fce7 100644 --- a/src/include/miopen/problem_description_base.hpp +++ b/src/include/miopen/problem_description_base.hpp @@ -53,6 +53,36 @@ inline std::string GetDataTypeName(miopenDataType_t data_type) return "Unknown(" + std::to_string(data_type) + ")"; } +template +constexpr TElement GetN5(unsigned spatial_dims, const miopen::InlineVector& data) +{ + return std::get<0>(GetNCDHW(spatial_dims, data)); +} + +template +constexpr TElement GetC5(unsigned spatial_dims, const miopen::InlineVector& data) +{ + return std::get<1>(GetNCDHW(spatial_dims, data)); +} + +template +constexpr TElement GetD5(unsigned spatial_dims, const miopen::InlineVector& data) +{ + return std::get<2>(GetNCDHW(spatial_dims, data)); +} + +template +constexpr TElement GetH5(unsigned spatial_dims, const miopen::InlineVector& data) +{ + return std::get<3>(GetNCDHW(spatial_dims, data)); +} + +template +constexpr TElement GetW5(unsigned spatial_dims, const miopen::InlineVector& data) +{ + return std::get<4>(GetNCDHW(spatial_dims, data)); +} + template constexpr TElement GetN5(unsigned spatial_dims, const std::vector& data) { diff --git a/src/include/miopen/reduce/solvers.hpp b/src/include/miopen/reduce/solvers.hpp index 7a8ea83ba8..caf339b3a5 100644 --- a/src/include/miopen/reduce/solvers.hpp +++ b/src/include/miopen/reduce/solvers.hpp @@ -43,7 +43,7 @@ using ReduceCalculationSolver = struct ArgmaxForward final : ReduceExtremeSolver { const std::string& SolverDbId() const override { return GetSolverDbId(); } - size_t XGridSize(std::vector indicedims) const; + size_t XGridSize(miopen::InlineVector indicedims) const; bool OverMaxGridSize(const ExecutionContext& context, const miopen::reduce::ProblemDescriptionExtreme& problem) const; @@ -57,7 +57,7 @@ struct ArgmaxForward final : ReduceExtremeSolver struct ArgminForward final : ReduceExtremeSolver { const std::string& SolverDbId() const override { return GetSolverDbId(); } - size_t XGridSize(std::vector indicedims) const; + size_t XGridSize(miopen::InlineVector indicedims) const; bool OverMaxGridSize(const ExecutionContext& context, const miopen::reduce::ProblemDescriptionExtreme& problem) const; @@ -71,7 +71,7 @@ struct ArgminForward final : ReduceExtremeSolver struct MaxForward final : ReduceExtremeSolver { const std::string& SolverDbId() const override { return GetSolverDbId(); } - size_t XGridSize(std::vector ydims) const; + size_t XGridSize(miopen::InlineVector ydims) const; bool OverMaxGridSize(const ExecutionContext& context, const miopen::reduce::ProblemDescriptionExtreme& problem) const; @@ -85,7 +85,7 @@ struct MaxForward final : ReduceExtremeSolver struct MinForward final : ReduceExtremeSolver { const std::string& SolverDbId() const override { return GetSolverDbId(); } - size_t XGridSize(std::vector ydims) const; + size_t XGridSize(miopen::InlineVector ydims) const; bool OverMaxGridSize(const ExecutionContext& context, const miopen::reduce::ProblemDescriptionExtreme& problem) const; diff --git a/src/include/miopen/rnn.hpp b/src/include/miopen/rnn.hpp index 49e31910ee..1264799eef 100644 --- a/src/include/miopen/rnn.hpp +++ b/src/include/miopen/rnn.hpp @@ -110,7 +110,7 @@ struct MIOPEN_INTERNALS_EXPORT RNNDescriptor : miopenRNNDescriptor size_t paramsOffsetCalculation(const TensorDescriptor& xDesc, int layer, int paramID) const; - std::vector + miopen::InlineVector pTensorLengthsCalculation(const TensorDescriptor& xDesc, int layer, int paramID) const; static SeqTensorDescriptor makeSeqTensorDescriptor(miopenDataType_t t, diff --git a/src/include/miopen/rnn/algorithms/default_algo_utils.hpp b/src/include/miopen/rnn/algorithms/default_algo_utils.hpp index f0503d6654..5821dcdeb1 100644 --- a/src/include/miopen/rnn/algorithms/default_algo_utils.hpp +++ b/src/include/miopen/rnn/algorithms/default_algo_utils.hpp @@ -275,9 +275,9 @@ class RNNModuleAlgoBase // 2 dims batch, vec inline miopen::TensorDescriptor BuildHxCxDesc2D(size_t batch_size) const { - const std::vector hx_size{batch_size, hiddenHxCxInfo.getHiddenSize()}; - const std::vector hx_stride{hiddenHxCxInfo.getStrides()[1], - hiddenHxCxInfo.getStrides()[2]}; + const miopen::InlineVector hx_size{batch_size, hiddenHxCxInfo.getHiddenSize()}; + const miopen::InlineVector hx_stride{hiddenHxCxInfo.getStrides()[1], + hiddenHxCxInfo.getStrides()[2]}; return miopen::TensorDescriptor{rnnDesc.dataType, hx_size, hx_stride}; } @@ -285,23 +285,28 @@ class RNNModuleAlgoBase // 3 dims layer, batch, vec inline miopen::TensorDescriptor BuildHxCxDesc3D(size_t layer_size, size_t batch_size) const { - const std::vector hx_accum_size{ + const miopen::InlineVector hx_accum_size{ layer_size, batch_size, hiddenHxCxInfo.getHiddenSize()}; return miopen::TensorDescriptor{ - rnnDesc.dataType, hx_accum_size, hiddenHxCxInfo.getStrides()}; + rnnDesc.dataType, + hx_accum_size, + miopen::InlineVector(hiddenHxCxInfo.getStrides().begin(), + hiddenHxCxInfo.getStrides().end())}; } // 3 dims layer, batch, vec inline miopen::TensorDescriptor BuildTempDhtDesc3D(size_t layer_size, size_t batch_size) const { - const std::vector dy_dhy_accum_size{ + const miopen::InlineVector dy_dhy_accum_size{ layer_size, batch_size, hiddenHxCxInfo.getHiddenSize()}; - const auto ws_dy_stride = [](const auto& ws_4dim_strides) -> std::vector { + const auto ws_dy_stride = + [](const auto& ws_4dim_strides) -> miopen::InlineVector { // convert 4dim stride to 3 dim without direction // TODO change hiddenBufferDesc - return std::vector{ws_4dim_strides[0], ws_4dim_strides[1], ws_4dim_strides[3]}; + return miopen::InlineVector{ + ws_4dim_strides[0], ws_4dim_strides[1], ws_4dim_strides[3]}; }(workspaceInfo.getHiddenStateStride()); return miopen::TensorDescriptor{rnnDesc.dataType, dy_dhy_accum_size, ws_dy_stride}; @@ -310,14 +315,16 @@ class RNNModuleAlgoBase // 3 dims layer, batch, vec inline miopen::TensorDescriptor BuildWeiBiasDesc2D() const { - const std::vector bias_size = [](const auto& wei_4dim_size) -> std::vector { + const miopen::InlineVector bias_size = + [](const auto& wei_4dim_size) -> miopen::InlineVector { // wei_4dim_size{layer, dir, gate, vec} return {1, wei_4dim_size[1] * wei_4dim_size[2] * wei_4dim_size[3]}; }(weightsLayout.getBiasSize()); - const auto bias_stride = [](const auto& wei_4dim_strides) -> std::vector { + const auto bias_stride = + [](const auto& wei_4dim_strides) -> miopen::InlineVector { // convert 4dim stride to 2 dim without direction - return std::vector{wei_4dim_strides[0], wei_4dim_strides[3]}; + return miopen::InlineVector{wei_4dim_strides[0], wei_4dim_strides[3]}; }(weightsLayout.getBiasStride()); return miopen::TensorDescriptor{rnnDesc.dataType, bias_size, bias_stride}; diff --git a/src/include/miopen/rnn/algorithms/dynamic_algo_utils.hpp b/src/include/miopen/rnn/algorithms/dynamic_algo_utils.hpp index 2771724294..695f95e398 100644 --- a/src/include/miopen/rnn/algorithms/dynamic_algo_utils.hpp +++ b/src/include/miopen/rnn/algorithms/dynamic_algo_utils.hpp @@ -33,7 +33,7 @@ namespace miopen { namespace rnn_base { -inline std::vector roundedDynamicLengths(const SeqTensorDescriptor& desc) +inline miopen::InlineVector roundedDynamicLengths(const SeqTensorDescriptor& desc) { auto src_lens = desc.GetLengths(); src_lens[1] = [](size_t v) { @@ -141,7 +141,7 @@ class RNNModuleAlgoDynamic : public RNNForwardDataModularAlgo static auto getTempBuffersSize(const RNNDescriptor& rnnD, const SeqTensorDescriptor& xDesc) { auto y_desc = [](const RNNDescriptor& rnnD, const SeqTensorDescriptor& xDesc) { - std::vector y_lenghts{xDesc.GetLengths()}; + miopen::InlineVector y_lenghts{xDesc.GetLengths()}; y_lenghts[2] = rnnD.hsize * (rnnD.dirMode == miopenRNNbidirection ? 2 : 1); return SeqTensorDescriptor{xDesc.GetType(), y_lenghts}; }(rnnD, xDesc); @@ -253,7 +253,7 @@ class RNNBackwardModuleAlgoDynamic : public RNNBackwardDataModularAlgo static auto getTempBuffersSize(const RNNDescriptor& rnnD, const SeqTensorDescriptor& xDesc) { auto y_desc = [](const RNNDescriptor& rnnD, const SeqTensorDescriptor& xDesc) { - std::vector y_lenghts{xDesc.GetLengths()}; + miopen::InlineVector y_lenghts{xDesc.GetLengths()}; y_lenghts[2] = rnnD.hsize * (rnnD.dirMode == miopenRNNbidirection ? 2 : 1); return SeqTensorDescriptor{xDesc.GetType(), y_lenghts}; }(rnnD, xDesc); @@ -368,7 +368,7 @@ class RNNBackwardWeiModuleAlgoDynamic : public RNNBackwardWeightsModularAlgo static auto getTempBuffersSize(const RNNDescriptor& rnnD, const SeqTensorDescriptor& xDesc) { auto y_desc = [](const RNNDescriptor& rnnD, const SeqTensorDescriptor& xDesc) { - std::vector y_lenghts{xDesc.GetLengths()}; + miopen::InlineVector y_lenghts{xDesc.GetLengths()}; y_lenghts[2] = rnnD.hsize * (rnnD.dirMode == miopenRNNbidirection ? 2 : 1); return SeqTensorDescriptor{xDesc.GetType(), y_lenghts}; }(rnnD, xDesc); diff --git a/src/include/miopen/rnn/tmp_buffer_utils.hpp b/src/include/miopen/rnn/tmp_buffer_utils.hpp index 04ab1720a6..549a6cf9e5 100644 --- a/src/include/miopen/rnn/tmp_buffer_utils.hpp +++ b/src/include/miopen/rnn/tmp_buffer_utils.hpp @@ -901,8 +901,9 @@ class IOBufferDescriptor static IOBufferDescriptor build(const SeqTensorDescriptor& xyDesc) { //{batch, seq_cnt, vector} - auto lens = xyDesc.GetLengths(); - auto strides = xyDesc.GetPaddedStrides(); + auto lens = std::vector(xyDesc.GetLengths().begin(), xyDesc.GetLengths().end()); + auto strides = + std::vector(xyDesc.GetPaddedStrides().begin(), xyDesc.GetPaddedStrides().end()); //{ combine(batch, seq_cnt), vector} std::vector packed_lens{xyDesc.GetTotalSequenceLen(), lens[2]}; diff --git a/src/include/miopen/rnn_util.hpp b/src/include/miopen/rnn_util.hpp index 72e84b20ff..e265509ff6 100644 --- a/src/include/miopen/rnn_util.hpp +++ b/src/include/miopen/rnn_util.hpp @@ -351,13 +351,14 @@ inline size_t ReductionWorkspaceSize(const Handle& handle, size_t bias_total_cnt = hsize * bidirect_mp * nHiddenTensorsPerLayer; - const std::vector ws_bias_strides{ + const miopen::InlineVector ws_bias_strides{ batchLenSum * workspaceScale * hsize * bidirect_mp, hy_stride, 1}; const miopen::TensorDescriptor ws_desc{ rnn_data_t, {1, batchLenSum, bias_total_cnt}, ws_bias_strides}; - const std::vector dw_bias_strides{bias_total_cnt, bias_total_cnt, 1}; + const miopen::InlineVector dw_bias_strides{ + bias_total_cnt, bias_total_cnt, 1}; const miopen::TensorDescriptor dw_desc{rnn_data_t, {1, 1, bias_total_cnt}, dw_bias_strides}; reduction_ws = red_add.GetWorkspaceSize(handle, ws_desc, dw_desc) + // WA CK bug diff --git a/src/include/miopen/seq_tensor.hpp b/src/include/miopen/seq_tensor.hpp index 57fbd4bcb4..64c3e57d07 100644 --- a/src/include/miopen/seq_tensor.hpp +++ b/src/include/miopen/seq_tensor.hpp @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -55,13 +56,13 @@ struct MIOPEN_INTERNALS_EXPORT SeqTensorDescriptor : miopenSeqTensorDescriptor // code for better dependency tracking SeqTensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in); - SeqTensorDescriptor(miopenDataType_t t, const std::vector& lens_in); + SeqTensorDescriptor(miopenDataType_t t, const miopen::InlineVector& lens_in); SeqTensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in); - SeqTensorDescriptor(miopenDataType_t t, const std::vector& lens_in); + SeqTensorDescriptor(miopenDataType_t t, const miopen::InlineVector& lens_in); SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, bool with_padded_seq_layout); SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, @@ -69,19 +70,19 @@ struct MIOPEN_INTERNALS_EXPORT SeqTensorDescriptor : miopenSeqTensorDescriptor bool with_padded_seq_layout); SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, bool with_padded_seq_layout); SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, const std::vector& seq_len, const std::vector& padding_marker_in, bool use_seq_len, bool with_padded_seq_layout); SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, const std::vector& seq_len, const std::vector& padding_marker_in, bool use_seq_len, @@ -89,28 +90,28 @@ struct MIOPEN_INTERNALS_EXPORT SeqTensorDescriptor : miopenSeqTensorDescriptor SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, - const std::vector& padding_in, + const miopen::InlineVector& lens_in, + const miopen::InlineVector& padding_in, bool with_padded_seq_layout); SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, const std::vector& seq_len, - const std::vector& padding_in, + const miopen::InlineVector& padding_in, const std::vector& padding_marker_in, bool use_seq_len, bool with_padded_seq_layout); const std::vector& GetLayoutVector() const; - const std::vector& GetLengths() const; - const std::vector& GetPadding() const; + const miopen::InlineVector& GetLengths() const; + const miopen::InlineVector& GetPadding() const; const std::vector& GetSequenceLengthsVector() const; const std::vector& GetPaddingMarkerHolder() const; // Get vector of strides only for padded tensor, // if IsPaddedSeqLayout()==false function returns an empty vector - std::vector GetPaddedStrides() const; + miopen::InlineVector GetPaddedStrides() const; bool IsPacked() const; bool IsPaddedSeqLayout() const; @@ -165,8 +166,8 @@ struct MIOPEN_INTERNALS_EXPORT SeqTensorDescriptor : miopenSeqTensorDescriptor std::vector dim_order; - std::vector lens; // length of each dimension - std::vector padds; // padding for each dimension + miopen::InlineVector lens; // length of each dimension + miopen::InlineVector padds; // padding for each dimension std::vector sequence_len; // sequence length of each sample, sequence_len.size()=lens[0] diff --git a/src/include/miopen/tensor.hpp b/src/include/miopen/tensor.hpp index 48a05a5a98..852c6b6dc1 100644 --- a/src/include/miopen/tensor.hpp +++ b/src/include/miopen/tensor.hpp @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -152,45 +153,45 @@ struct MIOPEN_INTERNALS_EXPORT TensorDescriptor : miopenTensorDescriptor // code for better dependency tracking TensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in); - TensorDescriptor(miopenDataType_t t, const std::vector& lens_in); + TensorDescriptor(miopenDataType_t t, const miopen::InlineVector& lens_in); TensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in); - TensorDescriptor(miopenDataType_t t, const std::vector& lens_in); - TensorDescriptor(miopenDataType_t t, std::vector&& lens_in); + TensorDescriptor(miopenDataType_t t, const miopen::InlineVector& lens_in); + TensorDescriptor(miopenDataType_t t, miopen::InlineVector&& lens_in); TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - const std::vector& lens_in); + const miopen::InlineVector& lens_in); TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, const std::initializer_list& lens_in); TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - const std::vector& lens_in); + const miopen::InlineVector& lens_in); TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - std::vector&& lens_in); + miopen::InlineVector&& lens_in); TensorDescriptor(miopenDataType_t t, - const std::vector& lens_in, - const std::vector& strides_in); + const miopen::InlineVector& lens_in, + const miopen::InlineVector& strides_in); TensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in, const std::initializer_list& strides_in); TensorDescriptor(miopenDataType_t t, - const std::vector& lens_in, - const std::vector& strides_in); + const miopen::InlineVector& lens_in, + const miopen::InlineVector& strides_in); TensorDescriptor(miopenDataType_t t, - std::vector&& lens_in, - std::vector&& strides_in); + miopen::InlineVector&& lens_in, + miopen::InlineVector&& strides_in); TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - const std::vector& lens_in, - const std::vector& strides_in); + const miopen::InlineVector& lens_in, + const miopen::InlineVector& strides_in); TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - std::vector&& lens_in, - std::vector&& strides_in); + miopen::InlineVector&& lens_in, + miopen::InlineVector&& strides_in); // Use only for external API static TensorDescriptor MakeDescriptor(miopenDataType_t t, const int* plens, int size); @@ -210,8 +211,8 @@ struct MIOPEN_INTERNALS_EXPORT TensorDescriptor : miopenTensorDescriptor bool IsVectorized() const; - const std::vector& GetLengths() const; - const std::vector& GetStrides() const; + const miopen::InlineVector& GetLengths() const; + const miopen::InlineVector& GetStrides() const; unsigned GetNumDims() const; miopenDataType_t GetType() const; @@ -261,8 +262,9 @@ struct MIOPEN_INTERNALS_EXPORT TensorDescriptor : miopenTensorDescriptor // Layout could be NCHW, NHWC, NCDHW, NDHWC, NCHWc, ... bool IsPossibleLayout4D5D(const std::string& layout) const; - static std::vector find_permutation(const std::vector& lens, - const std::vector& strides); + static miopen::InlineVector + find_permutation(const miopen::InlineVector& lens, + const miopen::InlineVector& strides); // storage_layout must be NCHW or NCHWc for NCHWc, CHWN or CHWNc for CHWNc, NCHW for other 4D // layouts, NCDHW for 5D layouts @@ -277,20 +279,20 @@ struct MIOPEN_INTERNALS_EXPORT TensorDescriptor : miopenTensorDescriptor private: TensorDescriptor(miopenDataType_t t, const std::optional& layout_in, - const std::vector& lens_in, - const std::vector& strides_in, + const miopen::InlineVector& lens_in, + const miopen::InlineVector& strides_in, bool use_strides); TensorDescriptor(miopenDataType_t t, const std::optional& layout_in, - std::vector&& lens_in, - std::vector&& strides_in, + miopen::InlineVector&& lens_in, + miopen::InlineVector&& strides_in, bool use_strides); void CheckArgsAndInit(bool use_strides); - std::vector lens; - std::vector strides; + miopen::InlineVector lens; + miopen::InlineVector strides; bool packed; std::size_t vector_length = 1; @@ -307,7 +309,7 @@ struct MIOPEN_INTERNALS_EXPORT TensorDescriptor : miopenTensorDescriptor mutable std::string cached_layout_str; // For GetLayout - mutable std::vector cached_permutation; + mutable miopen::InlineVector cached_permutation; // For AllLengthsFitIntoInt() mutable std::optional cached_lengths_fit_into_int; @@ -316,7 +318,7 @@ struct MIOPEN_INTERNALS_EXPORT TensorDescriptor : miopenTensorDescriptor }; template -constexpr auto GetNCDHW(unsigned spatial_dims, const std::vector& data) +constexpr auto GetNCDHW(unsigned spatial_dims, const miopen::InlineVector& data) { if(spatial_dims == 3) return miopen::tien<5>(data, 1); diff --git a/src/include/miopen/tensor_layout.hpp b/src/include/miopen/tensor_layout.hpp index f5659d7dd3..a47ccb45c4 100644 --- a/src/include/miopen/tensor_layout.hpp +++ b/src/include/miopen/tensor_layout.hpp @@ -32,14 +32,15 @@ #include #include #include +#include namespace miopen { template -void tensor_layout_to_strides(const std::vector& len, +void tensor_layout_to_strides(const miopen::InlineVector& len, const std::string& len_layout, const std::string& layout, - std::vector& strides) + miopen::InlineVector& strides) { // Bind the layout and the dimension lengths together into a map. std::map dim_to_len; @@ -73,11 +74,11 @@ void tensor_layout_to_strides(const std::vector& len, /// /// \todo Generalize with non-vectorized version, 90% of code is the same. template -void tensor_layout_to_strides(const std::vector& len, +void tensor_layout_to_strides(const miopen::InlineVector& len, const std::string& len_layout, const std::string& layout, const std::size_t vector_size, - std::vector& strides) + miopen::InlineVector& strides) { const std::string base_layout = layout.substr(0, len.size()); // Bind the layout and the dimension lengths together into a map. diff --git a/src/include/miopen/tensor_ops.hpp b/src/include/miopen/tensor_ops.hpp index 25d838598b..bd16ebda89 100644 --- a/src/include/miopen/tensor_ops.hpp +++ b/src/include/miopen/tensor_ops.hpp @@ -100,8 +100,8 @@ GetConsistentFlattenedTensorDescriptors(const TDescriptors&... real_descriptor_p } // start flattening tensors - std::array, NTensor> array_of_flat_lengths; - std::array, NTensor> array_of_flat_strides; + std::array, NTensor> array_of_flat_lengths; + std::array, NTensor> array_of_flat_strides; auto i = non1_length_strides.begin(); std::size_t flat_len = boost::get<0>(*i); diff --git a/src/include/miopen/util.hpp b/src/include/miopen/util.hpp index ac5503d2b0..1163d503f1 100644 --- a/src/include/miopen/util.hpp +++ b/src/include/miopen/util.hpp @@ -28,6 +28,7 @@ #include #include +#include #include @@ -96,7 +97,7 @@ MIOPEN_INTERNALS_EXPORT float transpose_CNHW2NCHW(const Handle& handle, miopenDataType_t type); MIOPEN_INTERNALS_EXPORT float transpose_NCHW2Vec(const Handle& handle, - const std::vector& lens, + const miopen::InlineVector& lens, ConstData_t in, Data_t out, std::size_t vec_size, diff --git a/src/include/miopen/utility/transposing_solver.hpp b/src/include/miopen/utility/transposing_solver.hpp index e7ca3bdc7a..cb687ee771 100644 --- a/src/include/miopen/utility/transposing_solver.hpp +++ b/src/include/miopen/utility/transposing_solver.hpp @@ -49,6 +49,22 @@ inline static std::array GetNCDHW(const std::vector& va return {cast(n), cast(c), cast(d), cast(h), cast(w)}; } +template +inline static std::array GetNCDHW(const miopen::InlineVector& values) +{ + const auto cast = [](auto v) { return static_cast(v); }; + std::size_t n = 1, c = 1, d = 1, h = 1, w = 1; + + switch(values.size()) + { + case 5: std::tie(n, c, d, h, w) = tien<5>(values); break; + case 4: std::tie(n, c, h, w) = tien<4>(values); break; + default: MIOPEN_THROW(miopenStatusBadParm); + } + + return {cast(n), cast(c), cast(d), cast(h), cast(w)}; +} + struct TransposeProblem { TensorDescriptor input; @@ -291,7 +307,7 @@ struct ProblemTensorTransposeDescriptor inline TensorDescriptor Transpose(const TensorDescriptor& in) const { const auto labels = tensor_layout_get_default(in.GetNumDims()); - auto derived_strides = std::vector{}; + miopen::InlineVector derived_strides{}; tensor_layout_to_strides( in.GetLengths(), labels, SyncLayoutDims(labels.c_str(), to), derived_strides); return {in.GetType(), in.GetLengths(), derived_strides}; diff --git a/src/ocl/ctcocl.cpp b/src/ocl/ctcocl.cpp index 7bf322a377..f629c0a2d1 100644 --- a/src/ocl/ctcocl.cpp +++ b/src/ocl/ctcocl.cpp @@ -193,7 +193,7 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, float time = 0.; if(apply_softmax_layer) { - std::vector sfm_size(4, 1); + miopen::InlineVector sfm_size(4, 1); sfm_size[0] = max_time_step * batch_size; sfm_size[1] = class_sz; auto sfm_desc = miopen::TensorDescriptor(probsDesc.GetType(), sfm_size); diff --git a/src/ocl/dropoutocl.cpp b/src/ocl/dropoutocl.cpp index 30f8e3d9e0..d63b45e80b 100644 --- a/src/ocl/dropoutocl.cpp +++ b/src/ocl/dropoutocl.cpp @@ -45,14 +45,14 @@ namespace miopen { template -inline void SquashPairedTensor(const std::vector x_len, - const std::vector x_str, - const std::vector y_len, - const std::vector y_str, - std::vector& in_len, - std::vector& in_str, - std::vector& out_len, - std::vector& out_str) +inline void SquashPairedTensor(const miopen::InlineVector x_len, + const miopen::InlineVector x_str, + const miopen::InlineVector y_len, + const miopen::InlineVector y_str, + miopen::InlineVector& in_len, + miopen::InlineVector& in_str, + miopen::InlineVector& out_len, + miopen::InlineVector& out_str) { if(!std::equal(x_len.begin(), x_len.end(), y_len.begin())) { @@ -240,10 +240,10 @@ void DropoutDescriptor::Dropout(const Handle& handle, } // support up to 5D tensor - std::vector in_len(5, 1); - std::vector in_str(5, 1); - std::vector out_len(5, 1); - std::vector out_str(5, 1); + miopen::InlineVector in_len{1, 1, 1, 1, 1}; + miopen::InlineVector in_str{1, 1, 1, 1, 1}; + miopen::InlineVector out_len{1, 1, 1, 1, 1}; + miopen::InlineVector out_str{1, 1, 1, 1, 1}; SquashPairedTensor(xDesc.GetLengths(), xDesc.GetStrides(), diff --git a/src/ocl/fusionopbiasbnactivocl.cpp b/src/ocl/fusionopbiasbnactivocl.cpp index aeb121369a..e83cca2c87 100644 --- a/src/ocl/fusionopbiasbnactivocl.cpp +++ b/src/ocl/fusionopbiasbnactivocl.cpp @@ -114,7 +114,7 @@ BatchNormInferenceFusionOpDescriptor::GetGlobalWGSz(const Handle& /*handle*/, /// END BN inference ------------------------------------------ // BN Bwd Training start -void BatchNormBwdTrainFusionOpDescriptor::calcBNParams(std::vector in_lens, +void BatchNormBwdTrainFusionOpDescriptor::calcBNParams(miopen::InlineVector in_lens, int& variant, size_t& in_cstride, size_t& in_nstride, @@ -246,7 +246,7 @@ std::vector BatchNormBwdTrainFusionOpDescriptor::GetGlobalWGSz() /// BATCH NORMALIZATION training forward start ================ -void BatchNormFwdTrainFusionOpDescriptor::calcBNParams(std::vector in_lens, +void BatchNormFwdTrainFusionOpDescriptor::calcBNParams(miopen::InlineVector in_lens, int& variant, size_t& in_cstride, size_t& in_nstride, diff --git a/src/ocl/rnnocl.cpp b/src/ocl/rnnocl.cpp index 0bee23908a..87e3e3d724 100644 --- a/src/ocl/rnnocl.cpp +++ b/src/ocl/rnnocl.cpp @@ -173,8 +173,9 @@ miopenStatus_t ReducAddBias(const miopen::Handle& handle, int m = 1, n = ws_desc.GetLengths()[2], k = ws_desc.GetLengths()[1]; int lda = k, ldb = ws_desc.GetStrides()[1], ldc = n; - const miopen::TensorDescriptor red_matrix{ - red_type, std::vector{1, 1, k}, std::vector{k, k, 1}}; + const miopen::TensorDescriptor red_matrix{red_type, + miopen::InlineVector{1, 1, k}, + miopen::InlineVector{k, k, 1}}; SetTensor(handle, red_matrix, red_workSpace, &alpha1); @@ -581,15 +582,15 @@ void RNNDescriptor::RNNForwardMS(const Handle& handle, float alpha1 = 1; const auto bias_stride = WeiBuf.bias_stride(); - const auto bias_desc = - miopen::TensorDescriptor(wDesc.GetType(), - std::vector{1, 1, WeiBuf.bias_vector_mul_gate()}, - std::vector{bias_stride, bias_stride, 1}); + const auto bias_desc = miopen::TensorDescriptor( + wDesc.GetType(), + miopen::InlineVector{1, 1, WeiBuf.bias_vector_mul_gate()}, + miopen::InlineVector{bias_stride, bias_stride, 1}); const auto hidden_interim_desc = miopen::TensorDescriptor( wDesc.GetType(), - std::vector{1, RBuff.batches, WeiBuf.bias_vector_mul_gate()}, - std::vector{ + miopen::InlineVector{1, RBuff.batches, WeiBuf.bias_vector_mul_gate()}, + miopen::InlineVector{ RBuff.batches * RBuff.gemm_write_stride(), RBuff.gemm_write_stride(), 1}); const auto RB_layer_out_off = RBuff.layer_offset(layer); @@ -778,17 +779,18 @@ void RNNDescriptor::RNNForwardMS(const Handle& handle, auto hcy_layer_offset = get_HxBuff_offset(layer_id); - const std::vector hcy_src_stride{ + const miopen::InlineVector hcy_src_stride{ RBuff.layer_stride(), static_cast(RBuff.gemm_write_stride()), 1}; - const std::vector hcy_dst_stride{ + const miopen::InlineVector hcy_dst_stride{ static_cast(hidden_size * max_batch), static_cast(hidden_size), 1}; if(in_n.at(0) < max_batch) { float beta = 0.; - const std::vector zero_set_size{1, - static_cast(max_batch - in_n.at(0)), - static_cast(hidden_size)}; + const miopen::InlineVector zero_set_size{ + 1, + static_cast(max_batch - in_n.at(0)), + static_cast(hidden_size)}; auto set_batch_offset = in_n.at(0) * hidden_size; auto set_desc = @@ -817,7 +819,7 @@ void RNNDescriptor::RNNForwardMS(const Handle& handle, auto src_batch_offset = RBuff.layer_offset(layer_id) + RBuff.gemm_write_relative_offset(batch_id_abs); - const std::vector hcy_copy_size{ + const miopen::InlineVector hcy_copy_size{ 1, static_cast(copy_batch), static_cast(hidden_size)}; auto src_desc = @@ -1128,13 +1130,13 @@ void RNNDescriptor::RNNForwardMS(const Handle& handle, // output tensor copy { - const std::vector y_copy_size{ + const miopen::InlineVector y_copy_size{ 1, static_cast(total_batch_size), static_cast(out_vec)}; - const std::vector y_src_stride{ + const miopen::InlineVector y_src_stride{ RBuff.layer_stride(), static_cast(RBuff.gemm_write_stride()), 1}; - const std::vector y_dst_stride{ + const miopen::InlineVector y_dst_stride{ static_cast(out_vec * total_batch_size), static_cast(out_vec), 1}; auto src_desc = miopen::TensorDescriptor(wDesc.GetType(), y_copy_size, y_src_stride); @@ -1421,8 +1423,9 @@ void RNNDescriptor::RNNForwardInferencePacked(const Handle& handle, float alpha0, alpha1, beta_t; float alpha = 1, beta = 0; - std::vector sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1), x_size(3, 1), - x_stride(3, 1), y_size(3, 1), y_stride(3, 1), hx_size(3, 1), hx_stride(3, 1); + miopen::InlineVector sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1), + x_size(3, 1), x_stride(3, 1), y_size(3, 1), y_stride(3, 1), hx_size(3, 1), + hx_stride(3, 1); miopen::TensorDescriptor sp_desc, w_desc, x_desc, y_desc, hx_desc; sp_size[2] = workSpaceSize / GetTypeSize(wDesc.GetType()); @@ -2857,8 +2860,8 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors( float alpha0, alpha1, beta_t; float alpha = 1, beta = 0; - std::vector sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1), x_size(3, 1), - x_stride(3, 1), y_size(3, 1), y_stride(3, 1), hx_size(3, 1), hx_stride(3, 1); + miopen::InlineVector sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1), + x_size(3, 1), x_stride(3, 1), y_size(3, 1), y_stride(3, 1), hx_size(3, 1), hx_stride(3, 1); miopen::TensorDescriptor sp_desc, w_desc, x_desc, y_desc, hx_desc; sp_size[2] = reserveSpaceSize / GetTypeSize(wDesc.GetType()); @@ -3005,7 +3008,7 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors( if(use_dropout) { - std::vector drop_size(2), drop_in_str(2, 1), drop_out_str(2, 1); + miopen::InlineVector drop_size(2), drop_in_str(2, 1), drop_out_str(2, 1); drop_size[0] = batch_n; drop_size[1] = hy_h * bi; drop_in_str[0] = hy_stride; @@ -4367,8 +4370,8 @@ void RNNDescriptor::RNNBackwardDataPackedTensors( float alpha0, alpha1, beta_t; float alpha = 1, beta = 0; - std::vector sp_size(3, 1), sp_stride(3, 1), x_size(3, 1), x_stride(3, 1), y_size(3, 1), - y_stride(3, 1), hx_size(3, 1), hx_stride(3, 1); + miopen::InlineVector sp_size(3, 1), sp_stride(3, 1), x_size(3, 1), x_stride(3, 1), + y_size(3, 1), y_stride(3, 1), hx_size(3, 1), hx_stride(3, 1); miopen::TensorDescriptor sp_desc, x_desc, y_desc, hx_desc; sp_size[2] = workSpaceSize / GetTypeSize(rnn_data_type); @@ -4519,7 +4522,7 @@ void RNNDescriptor::RNNBackwardDataPackedTensors( if(use_dropout) { - std::vector drop_size(2), drop_in_str(2, 1); + miopen::InlineVector drop_size(2), drop_in_str(2, 1); drop_size[0] = batch_n; drop_size[1] = hy_h * bi; drop_in_str[0] = hy_stride; @@ -5707,7 +5710,7 @@ void RNNDescriptor::RNNBackwardDataPackedTensors( // dinput if(inputMode == miopenRNNskip) { - const std::vector dx_size{1, batch_n, hy_h}; + const miopen::InlineVector dx_size{1, batch_n, hy_h}; x_desc = miopen::TensorDescriptor(rnn_data_type, dx_size, x_stride); sp_desc = miopen::TensorDescriptor(rnn_data_type, dx_size, sp_stride); @@ -6041,7 +6044,8 @@ void RNNDescriptor::RNNBackwardWeightsPackedTensors( float alpha0, alpha1, beta_t = 0; - std::vector sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1); + miopen::InlineVector sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), + w_stride(3, 1); miopen::TensorDescriptor sp_desc, w_desc; sp_stride[0] = batch_n * hy_stride; @@ -6166,14 +6170,14 @@ void RNNDescriptor::RNNBackwardWeightsPackedTensors( if(biasMode != 0u) { - const std::vector ws_bias_strides{ + const miopen::InlineVector ws_bias_strides{ static_cast(batch_n) * hy_stride, static_cast(hy_stride), 1}; const miopen::TensorDescriptor ws_desc{ rnn_data_t, {1, static_cast(batch_n), static_cast(wei_stride)}, ws_bias_strides}; - const std::vector dw_bias_strides{ + const miopen::InlineVector dw_bias_strides{ static_cast(wei_stride), static_cast(wei_stride), 1}; const miopen::TensorDescriptor dw_desc{ rnn_data_t, {1, 1, static_cast(wei_stride)}, dw_bias_strides}; @@ -6262,7 +6266,7 @@ void RNNDescriptor::RNNBackwardWeightsPackedTensors( else { // second dw bias equal to the first, so just copy reduction result - const std::vector dw_bias_strides{wei_stride, wei_stride, 1}; + const miopen::InlineVector dw_bias_strides{wei_stride, wei_stride, 1}; const miopen::TensorDescriptor dw_desc{ rnn_data_t, {1, 1, wei_stride}, dw_bias_strides}; diff --git a/src/ocl/utilocl.cpp b/src/ocl/utilocl.cpp index d5dd4661e5..0cfd9e4614 100644 --- a/src/ocl/utilocl.cpp +++ b/src/ocl/utilocl.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -1030,7 +1031,7 @@ float transpose_CNHW2NCHW(const Handle& handle, // NCHW (or NCDHW) to NCHW_C4 (or NCDHW_C4) float transpose_NCHW2Vec(const Handle& handle, - const std::vector& lens, + const miopen::InlineVector& lens, ConstData_t in, Data_t out, std::size_t vec_size, diff --git a/src/pooling.cpp b/src/pooling.cpp index a65cb3c0ab..388497ac3d 100644 --- a/src/pooling.cpp +++ b/src/pooling.cpp @@ -215,12 +215,12 @@ void PoolingDescriptor::GetForwardOutputDimNd(const TensorDescriptor& xDesc, TensorDescriptor PoolingDescriptor::GetForwardOutputTensor(const TensorDescriptor& xDesc) const { - std::vector out_dim(xDesc.GetNumDims()); + miopen::InlineVector out_dim(xDesc.GetNumDims()); GetForwardOutputDimNd(xDesc, xDesc.GetNumDims(), out_dim.data()); const std::string default_layout = tensor_layout_get_default(xDesc.GetNumDims()); const std::string in_layout = xDesc.GetLayout(default_layout); - std::vector out_strides; + miopen::InlineVector out_strides; tensor_layout_to_strides(out_dim, default_layout, in_layout, out_strides); return {xDesc.GetType(), out_dim, out_strides}; @@ -239,7 +239,7 @@ std::size_t PoolingDescriptor::GetWorkSpaceSize(const TensorDescriptor& yDesc) c if(yDesc.GetLayout(labels) != labels) { const auto e_size = get_data_size(yDesc.GetType()); - auto transposed_strides = std::vector{}; + auto transposed_strides = miopen::InlineVector{}; const auto in_layout = yDesc.GetLayout(labels); tensor_layout_to_strides(yDesc.GetLengths(), labels, in_layout, transposed_strides); const auto transposed_y = diff --git a/src/pooling/problem_description.cpp b/src/pooling/problem_description.cpp index 8e171a4ac0..386081b9ca 100644 --- a/src/pooling/problem_description.cpp +++ b/src/pooling/problem_description.cpp @@ -47,6 +47,17 @@ std::string get_vect_config(const std::vector& v) return str; } +template +std::string get_vect_config(const miopen::InlineVector& v) +{ + std::string str; + for(auto itr = v.begin(); itr < v.end(); itr++) + { + str += (std::to_string(*itr) + (itr == v.end() - 1 ? "" : "x")); + } + return str; +} + } // namespace NetworkConfig ProblemDescription::MakeNetworkConfig() const diff --git a/src/reduce/problem_description.cpp b/src/reduce/problem_description.cpp index 0dbac15451..4336fc1b50 100644 --- a/src/reduce/problem_description.cpp +++ b/src/reduce/problem_description.cpp @@ -26,6 +26,7 @@ #include #include +#include #include @@ -36,7 +37,7 @@ namespace reduce { NetworkConfig ProblemDescriptionExtreme::MakeNetworkConfig() const { auto xlength = xDesc.GetLengths(); - std::vector outputlength; + InlineVector outputlength; if((reduceExtremeOp == MIOPEN_REDUCE_EXTREME_MIN) || (reduceExtremeOp == MIOPEN_REDUCE_EXTREME_MAX)) outputlength = yDesc.GetLengths(); @@ -73,7 +74,7 @@ NetworkConfig ProblemDescriptionExtreme::MakeNetworkConfig() const NetworkConfig ProblemDescriptionCalculation::MakeNetworkConfig() const { auto xlength = xDesc.GetLengths(); - std::vector outputlength; + InlineVector outputlength; outputlength = yDesc.GetLengths(); auto size = xlength[dim]; diff --git a/src/rnn.cpp b/src/rnn.cpp index db50e8a3e2..b92e6abe76 100644 --- a/src/rnn.cpp +++ b/src/rnn.cpp @@ -216,9 +216,9 @@ size_t RNNDescriptor::paramsOffsetCalculation(const TensorDescriptor& xDesc, return layerJump; } -std::vector RNNDescriptor::pTensorLengthsCalculation(const TensorDescriptor& xDesc, - const int layer, - const int paramID) const +miopen::InlineVector RNNDescriptor::pTensorLengthsCalculation(const TensorDescriptor& xDesc, + const int layer, + const int paramID) const { auto inputVectorLen = xDesc.GetLengths()[1]; if(inputMode == miopenRNNskip) @@ -226,7 +226,7 @@ std::vector RNNDescriptor::pTensorLengthsCalculation(const TensorDescriptor inputVectorLen = 0; } - std::vector tdim(2, 0); + miopen::InlineVector tdim(2, 0); if(dirMode != 0u) { @@ -749,7 +749,7 @@ void RNNDescriptor::GetParamsDescriptor(const Handle& /* handle */, // Create weight super tensor descriptor int bi = (dirMode == miopenRNNbidirection) ? 2 : 1; - std::vector weight_lens(2, 0); + miopen::InlineVector weight_lens(2, 0); weight_lens[0] = inputVectorLen + ((nLayers - 1) * (bi + 1) + 1) * hsize; weight_lens[1] = bi * hsize * nHiddenTensorsPerLayer; if(biasMode == miopenRNNwithBias) @@ -905,11 +905,12 @@ void RNNDescriptor::SetLayerParam(const Handle& handle, auto poffset = paramsOffsetCalculation(xDesc, layer, paramID); // 2. Calculate the strides for the matrix - std::vector pstride(2, 1); + miopen::InlineVector pstride(2, 1); pstride[1] = paramDesc.GetLengths()[0]; - std::vector intLens(paramDesc.GetLengths().begin(), paramDesc.GetLengths().end()); + miopen::InlineVector intLens(paramDesc.GetLengths().begin(), + paramDesc.GetLengths().end()); // 3. Construct descriptor to access into w auto paramSrc = miopen::TensorDescriptor(dataType, intLens, pstride); @@ -958,9 +959,10 @@ void RNNDescriptor::SetLayerBias(const Handle& handle, auto boffset = biasOffsetCalculation(xDesc, layer, biasID) + poffset; // 2. Calculate the strides for the matrix - std::vector bstride(1, 1); + miopen::InlineVector bstride(1, 1); - std::vector intLens(biasDesc.GetLengths().begin(), biasDesc.GetLengths().end()); + miopen::InlineVector intLens(biasDesc.GetLengths().begin(), + biasDesc.GetLengths().end()); // 3. Construct descriptor to access into w auto biasSrc = miopen::TensorDescriptor(dataType, intLens, bstride); @@ -1127,7 +1129,7 @@ SeqTensorDescriptor RNNDescriptor::makeSeqTensorDescriptor(miopenDataType_t t, const int* lensPerSeq, const void* padding_marker_ptr) { - const std::vector lens = {batchSize, maxSeqLength, vectorSize}; + const miopen::InlineVector lens = {batchSize, maxSeqLength, vectorSize}; const auto [dim_order, padded_sequences] = convertRNNBaseLayout(layout); @@ -1190,7 +1192,7 @@ RNNDescriptor::makeSeqTensorDescriptor(c_array_view{}); } - const std::vector lens = {max_batch, seq_len, vec_size}; + const miopen::InlineVector lens = {max_batch, seq_len, vec_size}; const auto [dim_order, padded_sequences] = convertRNNBaseLayout(layout); diff --git a/src/rnn/Solutions/Base/bw_data_modular.cpp b/src/rnn/Solutions/Base/bw_data_modular.cpp index 95c1a2d239..aaf96294ee 100644 --- a/src/rnn/Solutions/Base/bw_data_modular.cpp +++ b/src/rnn/Solutions/Base/bw_data_modular.cpp @@ -318,14 +318,14 @@ void RNNBackwardDataModularAlgo::PropDy(const Handle& handle, size_t direc_scale = rnnD.dirMode == miopenRNNbidirection ? 2 : 1; - const auto dy_normalized_size = - std::vector{1, dy_raw_size[0], direc_scale, dy_raw_size[1] / direc_scale}; + const auto dy_normalized_size = miopen::InlineVector{ + 1, dy_raw_size[0], direc_scale, dy_raw_size[1] / direc_scale}; const auto dy_normalized_stride = - std::vector{dy_normalized_size[1] * dy_raw_stride[0] /*unused*/, - dy_raw_stride[0], - dy_normalized_size[3] * dy_raw_stride[1], - dy_raw_stride[1]}; + miopen::InlineVector{dy_normalized_size[1] * dy_raw_stride[0] /*unused*/, + dy_raw_stride[0], + dy_normalized_size[3] * dy_raw_stride[1], + dy_raw_stride[1]}; auto dy_desc = miopen::TensorDescriptor(rnnD.dataType, dy_normalized_size, dy_normalized_stride); @@ -333,12 +333,13 @@ void RNNBackwardDataModularAlgo::PropDy(const Handle& handle, return std::make_tuple(dy_desc, dy); }(yInfo, rnnDesc, dy); - const std::vector ws_dst_strides = [](const auto& full_stride_ref) { - return std::vector(full_stride_ref.begin(), full_stride_ref.end()); + const miopen::InlineVector ws_dst_strides = [](const auto& full_stride_ref) { + return miopen::InlineVector(full_stride_ref.begin(), full_stride_ref.end()); }(workspaceInfo.getHiddenStateStride()); - const std::vector ws_dst_size = [](const auto& full_size_ref) { - std::vector ws_ht_layer_size(full_size_ref.begin(), full_size_ref.end()); + const miopen::InlineVector ws_dst_size = [](const auto& full_size_ref) { + miopen::InlineVector ws_ht_layer_size(full_size_ref.begin(), + full_size_ref.end()); ws_ht_layer_size[0] = 1; @@ -464,7 +465,7 @@ void RNNBackwardDataModularAlgo::PropHiddenDy(const Handle& handle, auto h_state_sizes = reservLayout.hStateSizes; // TODO 3 dim vec, add direction as dim - std::vector drop_size(2), drop_in_str(2, 1); + miopen::InlineVector drop_size(2), drop_in_str(2, 1); drop_size[0] = h_state_sizes[1]; // batch_n; drop_size[1] = h_state_sizes[2] * h_state_sizes[3]; // hy_h* direction_mult; @@ -561,7 +562,10 @@ void RNNBackwardDataModularAlgo::PropDx(const Handle& handle, const auto& ht_size = buf_info.getFullSeqMajorSize(); // batch, vec_elements - return miopen::TensorDescriptor{dType, {batch_size, ht_size[1]}, ht_stride}; + return miopen::TensorDescriptor{ + dType, + {batch_size, ht_size[1]}, + miopen::InlineVector(ht_stride.begin(), ht_stride.end())}; }(rnnDesc.dataType, xInfo, gemm_batch_size); RnnBaseFunctions::BWD_GEMM_Hidden_Prop(handle, diff --git a/src/rnn/Solutions/Base/bw_weights_modular.cpp b/src/rnn/Solutions/Base/bw_weights_modular.cpp index 32ed4b6923..f13c021c7d 100644 --- a/src/rnn/Solutions/Base/bw_weights_modular.cpp +++ b/src/rnn/Solutions/Base/bw_weights_modular.cpp @@ -177,7 +177,10 @@ void RNNBackwardWeightsModularAlgo::PhisXInputWeights(const Handle& handle, const auto& ht_size = buf_info.getFullSeqMajorSize(); // batch, vec_elements - return miopen::TensorDescriptor{dType, {batch_size, ht_size[1]}, ht_stride}; + return miopen::TensorDescriptor{ + dType, + {batch_size, ht_size[1]}, + miopen::InlineVector(ht_stride.begin(), ht_stride.end())}; }(rnnDesc.dataType, xInfo, gemm_batch_size); RnnBaseFunctions::BWWei_GEMM(handle, diff --git a/src/rnn/Solutions/Base/fw_data_modular.cpp b/src/rnn/Solutions/Base/fw_data_modular.cpp index ee86a4a5a9..85faf63bbc 100644 --- a/src/rnn/Solutions/Base/fw_data_modular.cpp +++ b/src/rnn/Solutions/Base/fw_data_modular.cpp @@ -86,7 +86,10 @@ void RNNForwardDataModularAlgo::PropX(const Handle& handle, const auto& ht_size = buf_info.getFullSeqMajorSize(); // batch, vec_elements - return miopen::TensorDescriptor{dType, {batch_size, ht_size[1]}, ht_stride}; + return miopen::TensorDescriptor{ + dType, + {batch_size, ht_size[1]}, + miopen::InlineVector(ht_stride.begin(), ht_stride.end())}; }(rnnDesc.dataType, xInfo, gemm_batch_size); if(rnnDesc.inputMode == miopenRNNskip) @@ -204,18 +207,19 @@ void RNNForwardDataModularAlgo::AddBias(const Handle& handle, // single layer, single direction const auto bias_desc = miopen::TensorDescriptor( rnnDesc.dataType, - std::vector{1, 1, weightsLayout.getBiasSize()[2] * weightsLayout.getBiasSize()[3]}, - std::vector{weightsLayout.getBiasStride()[1], - weightsLayout.getBiasStride()[1], - weightsLayout.getBiasStride()[3]}); + miopen::InlineVector{ + 1, 1, weightsLayout.getBiasSize()[2] * weightsLayout.getBiasSize()[3]}, + miopen::InlineVector{weightsLayout.getBiasStride()[1], + weightsLayout.getBiasStride()[1], + weightsLayout.getBiasStride()[3]}); const auto hidden_interim_desc = miopen::TensorDescriptor( rnnDesc.dataType, - std::vector{ + miopen::InlineVector{ 1, reservLayout.getGateBlockSizeImpl()[1], reservLayout.getGateBlockSizeImpl()[3]}, - std::vector{reservLayout.getGateBlockStride()[0], - reservLayout.getGateBlockStride()[1], - reservLayout.getGateBlockStride()[3]}); + miopen::InlineVector{reservLayout.getGateBlockStride()[0], + reservLayout.getGateBlockStride()[1], + reservLayout.getGateBlockStride()[3]}); for(int layer = 0; layer < rnnDesc.nLayers; layer++) { @@ -481,14 +485,14 @@ void RNNForwardDataModularAlgo::PropY(const Handle& handle, const runtimeArgsFwd size_t direc_scale = rnnD.dirMode == miopenRNNbidirection ? 2 : 1; - const auto dy_normalized_size = - std::vector{1, dy_raw_size[0], direc_scale, dy_raw_size[1] / direc_scale}; + const auto dy_normalized_size = miopen::InlineVector{ + 1, dy_raw_size[0], direc_scale, dy_raw_size[1] / direc_scale}; const auto dy_normalized_stride = - std::vector{dy_normalized_size[1] * dy_raw_stride[0] /*unused*/, - dy_raw_stride[0], - dy_normalized_size[3] * dy_raw_stride[1], - dy_raw_stride[1]}; + miopen::InlineVector{dy_normalized_size[1] * dy_raw_stride[0] /*unused*/, + dy_raw_stride[0], + dy_normalized_size[3] * dy_raw_stride[1], + dy_raw_stride[1]}; auto dy_desc = miopen::TensorDescriptor(rnnD.dataType, dy_normalized_size, dy_normalized_stride); @@ -496,12 +500,13 @@ void RNNForwardDataModularAlgo::PropY(const Handle& handle, const runtimeArgsFwd return std::make_tuple(dy_desc, y); }(yInfo, rnnDesc, runtimeArgs.y); - const std::vector tmp_y_strides = [](const auto& full_stride_ref) { - return std::vector(full_stride_ref.begin(), full_stride_ref.end()); + const miopen::InlineVector tmp_y_strides = [](const auto& full_stride_ref) { + return miopen::InlineVector(full_stride_ref.begin(), full_stride_ref.end()); }(reservLayout.getHiddenStateStride()); - const std::vector tmp_y_size = [](const auto& full_size_ref) { - std::vector ws_ht_layer_size(full_size_ref.begin(), full_size_ref.end()); + const miopen::InlineVector tmp_y_size = [](const auto& full_size_ref) { + miopen::InlineVector ws_ht_layer_size(full_size_ref.begin(), + full_size_ref.end()); ws_ht_layer_size[0] = 1; diff --git a/src/rnn/rnn_util.cpp b/src/rnn/rnn_util.cpp index 0c1b0f7cf9..93951c288f 100644 --- a/src/rnn/rnn_util.cpp +++ b/src/rnn/rnn_util.cpp @@ -50,7 +50,7 @@ void RNNTensorPaddingConverter::ConvertTensorData(const Handle& handle, auto max_batch_size = bsize_per_time[0]; auto vector_size = padded_tensor_desc.GetLengths()[1]; - const std::vector padded_stride //= single_desc.GetStrides(); + const miopen::InlineVector padded_stride //= single_desc.GetStrides(); {static_cast(max_batch_size) * vector_size, static_cast(vector_size), 1}; unsigned int left_id = 0; @@ -63,9 +63,9 @@ void RNNTensorPaddingConverter::ConvertTensorData(const Handle& handle, auto copy_seq_cnt = i - left_id; auto copy_bsize = bsize_per_time[left_id]; - const std::vector copy_size{static_cast(copy_seq_cnt), - static_cast(copy_bsize), - static_cast(vector_size)}; + const miopen::InlineVector copy_size{static_cast(copy_seq_cnt), + static_cast(copy_bsize), + static_cast(vector_size)}; auto packed_desc = miopen::TensorDescriptor(padded_tensor_desc.GetType(), copy_size); auto padded_desc = @@ -125,11 +125,11 @@ RNNTensorBaseLayoutConverter::GetSamplesDescendingOrder(const SeqTensorDescripto } void ReorderTensorGPUData(const Handle& handle, - const std::vector& tensor_lens, + const miopen::InlineVector& tensor_lens, int reordering_dim, const std::vector& sample_order, - std::vector src_stride, - std::vector dst_stride, + miopen::InlineVector src_stride, + miopen::InlineVector dst_stride, ConstData_t src, Data_t dst, miopenDataType_t data_type) @@ -137,13 +137,15 @@ void ReorderTensorGPUData(const Handle& handle, if(tensor_lens[reordering_dim] != sample_order.size()) MIOPEN_THROW(miopenStatusInternalError, "Wrong tensor lens"); - auto get_single_samlpe_lens = [](const std::vector& lens, int reordering_dim) { - std::vector new_lens = lens; + auto get_single_samlpe_lens = [](const miopen::InlineVector& lens, + int reordering_dim) { + miopen::InlineVector new_lens = lens; new_lens[reordering_dim] = 1; return new_lens; }; - const std::vector copy_size = get_single_samlpe_lens(tensor_lens, reordering_dim); + const miopen::InlineVector copy_size = + get_single_samlpe_lens(tensor_lens, reordering_dim); const auto src_desc = miopen::TensorDescriptor(data_type, copy_size, src_stride); const auto dst_desc = miopen::TensorDescriptor(data_type, copy_size, dst_stride); @@ -178,8 +180,9 @@ void RNNTensorBaseLayoutConverter::ReorderInputTensorGPUData( // const std::vector copy_size = // get_single_samlpe_lens(padded_tensor_desc.GetLengths()); - const std::vector src_stride = padded_tensor_desc.GetPaddedStrides(); - const std::vector dst_stride = dst_padded_tensor_desc.GetPaddedStrides(); + const miopen::InlineVector src_stride = padded_tensor_desc.GetPaddedStrides(); + const miopen::InlineVector dst_stride = + dst_padded_tensor_desc.GetPaddedStrides(); ReorderTensorGPUData(handle, padded_tensor_desc.GetLengths(), @@ -217,8 +220,8 @@ void RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData(const Handle& hand if(lens[reordering_dim] != sample_order.size()) MIOPEN_THROW(miopenStatusInternalError, "Wrong tensor lens"); - const std::vector src_stride = tensor_desc.GetStrides(); - const std::vector dst_stride = tensor_desc.GetStrides(); + const miopen::InlineVector src_stride = tensor_desc.GetStrides(); + const miopen::InlineVector dst_stride = tensor_desc.GetStrides(); ReorderTensorGPUData(handle, lens, @@ -268,11 +271,11 @@ void RNNTensorBaseLayoutConverter::ChangeTensorGPUDataPadding( auto r_it_end = seq_lens_per_sample.rend(); const size_t vector_size = tensor_desc.GetLengths()[2]; - const std::vector padded_stride = tensor_desc.GetPaddedStrides(); + const miopen::InlineVector padded_stride = tensor_desc.GetPaddedStrides(); - auto get_packed_stride = [](const std::vector& copy_size, + auto get_packed_stride = [](const miopen::InlineVector& copy_size, const std::vector& dim_order) { - std::vector byte_strides(copy_size.size()); + miopen::InlineVector byte_strides(copy_size.size()); byte_strides.back() = 1; for(size_t i = byte_strides.size() - 1; i > 0; i--) @@ -315,11 +318,11 @@ void RNNTensorBaseLayoutConverter::ChangeTensorGPUDataPadding( else std::tie(copy_seq_cnt, copy_bsize) = get_box_size_batch_major(it, it_end); - const std::vector copy_size{static_cast(copy_bsize), - static_cast(copy_seq_cnt), - static_cast(vector_size)}; + const miopen::InlineVector copy_size{static_cast(copy_bsize), + static_cast(copy_seq_cnt), + static_cast(vector_size)}; - const std::vector packed_stride = + const miopen::InlineVector packed_stride = get_packed_stride(copy_size, tensor_desc.GetLayoutVector()); // Nothing to copy, avoiding error with zero lens in TensorDescriptor @@ -375,15 +378,15 @@ void RNNTensorBaseLayoutConverter::ChangePaddedTensorGPUDataLayout( "Wrong tensor descriptor, Dst data type should match src data type."); } - const std::vector copy_size = src_padded_desc.GetLengths(); + const miopen::InlineVector copy_size = src_padded_desc.GetLengths(); if(dst_padded_desc.GetLengths() != copy_size) { MIOPEN_THROW(miopenStatusInternalError, "Wrong tensor descriptor, Dst desc size should match Src desc size."); } - const std::vector src_stride = src_padded_desc.GetPaddedStrides(); - const std::vector dst_stride = dst_padded_desc.GetPaddedStrides(); + const miopen::InlineVector src_stride = src_padded_desc.GetPaddedStrides(); + const miopen::InlineVector dst_stride = dst_padded_desc.GetPaddedStrides(); auto src_desc = miopen::TensorDescriptor(data_type, copy_size, src_stride); auto dst_desc = miopen::TensorDescriptor(data_type, copy_size, dst_stride); diff --git a/src/seq_tensor.cpp b/src/seq_tensor.cpp index e84e5c10dd..dbac60235f 100644 --- a/src/seq_tensor.cpp +++ b/src/seq_tensor.cpp @@ -40,6 +40,26 @@ namespace miopen { namespace { +template +bool CheckLengths(const miopen::InlineVector& lens) +{ + if(lens.empty()) + return false; + if(!std::all_of(lens.cbegin(), lens.cend(), [](T x) { return x > 0; })) + return false; + return true; +} + +template +bool CheckSequenceLengths(const miopen::InlineVector& lens) +{ + if(lens.empty()) + return false; + if(!std::all_of(lens.cbegin(), lens.cend(), [](T x) { return x >= 0; })) + return false; + return true; +} + template bool CheckLengths(const std::vector& lens) { @@ -60,9 +80,29 @@ bool CheckSequenceLengths(const std::vector& lens) return true; } -std::vector ConvertLengthsOrThrow(const std::vector& lens_in, - const std::string& err_msg, - bool is_seq_len = false) +miopen::InlineVector +ConvertLengthsOrThrow(const miopen::InlineVector& lens_in, + const std::string& err_msg, + bool is_seq_len = false) +{ + if(!is_seq_len) + { + if(!CheckLengths(lens_in)) + MIOPEN_THROW(miopenStatusBadParm, err_msg); + } + else + { + if(!CheckSequenceLengths(lens_in)) + MIOPEN_THROW(miopenStatusBadParm, err_msg); + } + + miopen::InlineVector lens(lens_in.cbegin(), lens_in.cend()); + return lens; +} + +std::vector ConvertLengthsOrThrow(const std::vector& lens_in, + const std::string& err_msg, + bool is_seq_len = false) { if(!is_seq_len) { @@ -88,30 +128,31 @@ SeqTensorDescriptor::SeqTensorDescriptor() : packed(true) {} SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in) - : SeqTensorDescriptor(t, std::vector(lens_in)) + : SeqTensorDescriptor(t, miopen::InlineVector(lens_in)) { } -SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::vector& lens_in) +SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, + const miopen::InlineVector& lens_in) : SeqTensorDescriptor(t, GetDefaultLayoutVector(lens_in.size()), lens_in, true) { } SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in) - : SeqTensorDescriptor(t, std::vector(lens_in)) + : SeqTensorDescriptor(t, miopen::InlineVector(lens_in)) { } SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, - const std::vector& lens_in) + const miopen::InlineVector& lens_in) : SeqTensorDescriptor(t, GetDefaultLayoutVector(lens_in.size()), lens_in, true) { } SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, bool with_padded_seq_layout) : SeqTensorDescriptor(t, layout_in, @@ -124,13 +165,14 @@ SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, const std::initializer_list& lens_in, bool with_padded_seq_layout) - : SeqTensorDescriptor(t, layout_in, std::vector(lens_in), with_padded_seq_layout) + : SeqTensorDescriptor( + t, layout_in, miopen::InlineVector(lens_in), with_padded_seq_layout) { } SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, bool with_padded_seq_layout) : SeqTensorDescriptor(t, layout_in, lens_in, {}, with_padded_seq_layout) { @@ -138,7 +180,7 @@ SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, const std::vector& seq_len, const std::vector& padding_marker_in, bool use_seq_len, @@ -156,7 +198,7 @@ SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, const std::vector& seq_len, const std::vector& padding_marker_in, bool use_seq_len, @@ -174,8 +216,8 @@ SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, - const std::vector& padding_in, + const miopen::InlineVector& lens_in, + const miopen::InlineVector& padding_in, bool with_padded_seq_layout) : SeqTensorDescriptor(t, layout_in, lens_in, {}, padding_in, {}, false, with_padded_seq_layout) { @@ -183,9 +225,9 @@ SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, const std::vector& layout_in, - const std::vector& lens_in, + const miopen::InlineVector& lens_in, const std::vector& seq_len, - const std::vector& padding_in, + const miopen::InlineVector& padding_in, const std::vector& padding_marker_in, bool use_seq_len, bool with_padded_seq_layout) @@ -205,7 +247,7 @@ SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, if(padding_in.empty()) { - padds = std::vector(dims, 0); + padds = miopen::InlineVector(dims, 0); } else { @@ -234,9 +276,12 @@ void SeqTensorDescriptor::SetDimOrder(const std::vector& dims_orde const std::vector& SeqTensorDescriptor::GetLayoutVector() const { return dim_order; }; -const std::vector& SeqTensorDescriptor::GetLengths() const { return lens; } +const miopen::InlineVector& SeqTensorDescriptor::GetLengths() const { return lens; } -const std::vector& SeqTensorDescriptor::GetPadding() const { return padds; } +const miopen::InlineVector& SeqTensorDescriptor::GetPadding() const +{ + return padds; +} const std::vector& SeqTensorDescriptor::GetSequenceLengthsVector() const { @@ -248,9 +293,9 @@ const std::vector& SeqTensorDescriptor::GetPaddingMarkerHolder() const return padding_marker; } -std::vector SeqTensorDescriptor::GetPaddedStrides() const +miopen::InlineVector SeqTensorDescriptor::GetPaddedStrides() const { - std::vector byte_strides(lens.size()); + miopen::InlineVector byte_strides(lens.size()); byte_strides.back() = 1 + padds.back(); for(size_t i = byte_strides.size() - 1; i > 0; i--) diff --git a/src/solver/mha/mha_solver_backward.cpp b/src/solver/mha/mha_solver_backward.cpp index 5a3e0ee40a..7415d33e0e 100644 --- a/src/solver/mha/mha_solver_backward.cpp +++ b/src/solver/mha/mha_solver_backward.cpp @@ -63,8 +63,9 @@ SplitBufferToWorkspace(size_t S, size_t D, size_t NHS, miopenDataType_t out_type out_type == miopenFloat ? 0 : NHS * S * get_data_size(out_type)}; // fp8 dS } -MultiBufferWorkspaceTraits SplitBufferToWorkspace(const std::vector& lengths, - miopenDataType_t out_type) +MultiBufferWorkspaceTraits +SplitBufferToWorkspace(const miopen::InlineVector& lengths, + miopenDataType_t out_type) { const auto [N, H, S, D] = miopen::tien<4>(lengths); return SplitBufferToWorkspace(S, D, N * H * S, out_type); diff --git a/src/solver/mha/mha_solver_forward.cpp b/src/solver/mha/mha_solver_forward.cpp index 265adea007..d91dd1c0a8 100644 --- a/src/solver/mha/mha_solver_forward.cpp +++ b/src/solver/mha/mha_solver_forward.cpp @@ -57,7 +57,7 @@ SplitBufferToWorkspace(size_t S, size_t D, size_t NHS, miopenDataType_t out_type NHS * S * get_data_size(out_type)}; // first matmul tensor } -MultiBufferWorkspaceTraits SplitBufferToWorkspace(const std::vector& lengths, +MultiBufferWorkspaceTraits SplitBufferToWorkspace(const miopen::InlineVector& lengths, miopenDataType_t out_type) { const auto [N, H, S, D] = miopen::tien<4>(lengths); diff --git a/src/solver/reduce/forward_argmax.cpp b/src/solver/reduce/forward_argmax.cpp index d3bffd014d..3cfd69e243 100644 --- a/src/solver/reduce/forward_argmax.cpp +++ b/src/solver/reduce/forward_argmax.cpp @@ -39,7 +39,7 @@ namespace solver { namespace reduce { -size_t ArgmaxForward::XGridSize(std::vector indicedims) const +size_t ArgmaxForward::XGridSize(miopen::InlineVector indicedims) const { size_t indice_numel = std::accumulate(indicedims.begin(), indicedims.end(), 1ULL, std::multiplies()); diff --git a/src/solver/reduce/forward_argmin.cpp b/src/solver/reduce/forward_argmin.cpp index 3d7933edc5..22e2b315ce 100644 --- a/src/solver/reduce/forward_argmin.cpp +++ b/src/solver/reduce/forward_argmin.cpp @@ -39,7 +39,7 @@ namespace solver { namespace reduce { -size_t ArgminForward::XGridSize(std::vector indicedims) const +size_t ArgminForward::XGridSize(miopen::InlineVector indicedims) const { size_t indice_numel = std::accumulate(indicedims.begin(), indicedims.end(), 1ULL, std::multiplies()); diff --git a/src/solver/reduce/forward_max.cpp b/src/solver/reduce/forward_max.cpp index aa58fad86a..dd3071e31a 100644 --- a/src/solver/reduce/forward_max.cpp +++ b/src/solver/reduce/forward_max.cpp @@ -39,7 +39,7 @@ namespace solver { namespace reduce { -size_t MaxForward::XGridSize(std::vector ydims) const +size_t MaxForward::XGridSize(miopen::InlineVector ydims) const { size_t output_numel = std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); diff --git a/src/solver/reduce/forward_min.cpp b/src/solver/reduce/forward_min.cpp index 8b5963030c..99ece8950d 100644 --- a/src/solver/reduce/forward_min.cpp +++ b/src/solver/reduce/forward_min.cpp @@ -39,7 +39,7 @@ namespace solver { namespace reduce { -size_t MinForward::XGridSize(std::vector ydims) const +size_t MinForward::XGridSize(miopen::InlineVector ydims) const { size_t output_numel = std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); diff --git a/src/solver/tensorOp/tensor_op_helpers.hpp b/src/solver/tensorOp/tensor_op_helpers.hpp index cf46c6efe8..d81fdfea0f 100644 --- a/src/solver/tensorOp/tensor_op_helpers.hpp +++ b/src/solver/tensorOp/tensor_op_helpers.hpp @@ -29,6 +29,8 @@ #include #include +#include + #include namespace miopen { @@ -71,8 +73,9 @@ inline std::tuple GetRDBLCKandREADTYPE(size_t len, miopenDa (RD_BLCK == 1) ? data_type : data_type + std::to_string(RD_BLCK)); } -inline std::tuple GetBitmapAndWgInfo(const std::vector& blens, - const std::vector& clens) +inline std::tuple +GetBitmapAndWgInfo(const miopen::InlineVector& blens, + const miopen::InlineVector& clens) { // first_not_one is incorrect if btensor size equal to 1 auto first_not_one = std::find_if(blens.rbegin(), blens.rend(), [](int i) { return i != 1; }); diff --git a/src/tensor.cpp b/src/tensor.cpp index a56cffc7b5..29aab2580b 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -99,7 +99,7 @@ std::optional GetDefaultLayout(unsigned num_dims) } template -bool CheckLengths(const std::vector& lens, T maxval = 0) +bool CheckLengths(const miopen::InlineVector& lens, T maxval = 0) { if(lens.empty()) return false; @@ -113,13 +113,14 @@ bool CheckLengths(const std::vector& lens, T maxval = 0) return true; } -std::vector ConvertLengthsOrThrow(const std::vector& lens_in, - [[maybe_unused]] const std::string& err_msg) +miopen::InlineVector +ConvertLengthsOrThrow(const miopen::InlineVector& lens_in, + [[maybe_unused]] const std::string& err_msg) { if(!CheckLengths(lens_in)) MIOPEN_THROW(miopenStatusBadParm, err_msg); - std::vector lens(lens_in.cbegin(), lens_in.cend()); + miopen::InlineVector lens(lens_in.cbegin(), lens_in.cend()); return lens; } @@ -157,10 +158,10 @@ std::size_t GetVectorLengthForLayout(const std::optional& return vector_length; } -void ReorderVector(std::vector& lens, const std::initializer_list& indices) +void ReorderVector(miopen::InlineVector& lens, + const std::initializer_list& indices) { - std::vector out_lens; - out_lens.reserve(indices.size()); + miopen::InlineVector out_lens(indices.size()); for(size_t index : indices) { assert(index < lens.size()); @@ -170,7 +171,7 @@ void ReorderVector(std::vector& lens, const std::initializer_list& lens) +void VectLensReorder(miopenTensorLayout_t layout, miopen::InlineVector& lens) { switch(layout) { @@ -190,7 +191,7 @@ void VectLensReorder(miopenTensorLayout_t layout, std::vector& lens) // Relevant for NCHWc and CHWNc void VectLensRecalc(miopenTensorLayout_t layout, std::size_t vector_length, - std::vector& lens) + miopen::InlineVector& lens) { unsigned c_pos; @@ -212,8 +213,8 @@ void VectLensRecalc(miopenTensorLayout_t layout, } void CalculateStrides(std::size_t vector_length, - const std::vector& lens, - std::vector& strides) + const miopen::InlineVector& lens, + miopen::InlineVector& strides) { if(lens.empty()) MIOPEN_THROW(miopenStatusInternalError); @@ -228,8 +229,8 @@ void CalculateStrides(std::size_t vector_length, void SetStrides(const std::optional& layout, std::size_t vector_length, - const std::vector& lens, - std::vector& strides) + const miopen::InlineVector& lens, + miopen::InlineVector& strides) { const bool is_vectorized = vector_length > 1; if(!layout || layout == miopenTensorNCHW || layout == miopenTensorNCDHW || is_vectorized) @@ -245,7 +246,7 @@ void SetStrides(const std::optional& layout, } } -bool CheckDimsFitIntoInt(const std::vector& v) +bool CheckDimsFitIntoInt(const miopen::InlineVector& v) { if(std::any_of( v.cbegin(), v.cend(), [](std::size_t x) { return x > std::numeric_limits::max(); })) @@ -265,11 +266,12 @@ TensorDescriptor::TensorDescriptor(miopenDataType_t t) : packed(true), type(t) { // code for better dependency tracking TensorDescriptor::TensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in) - : TensorDescriptor(t, std::vector(lens_in)) + : TensorDescriptor(t, miopen::InlineVector(lens_in)) { } -TensorDescriptor::TensorDescriptor(miopenDataType_t t, const std::vector& lens_in) +TensorDescriptor::TensorDescriptor(miopenDataType_t t, const miopen::InlineVector& +lens_in) : TensorDescriptor(t, GetDefaultLayout(lens_in.size()), ConvertLengthsOrThrow(lens_in, "Lengths must be > 0"), @@ -280,23 +282,25 @@ TensorDescriptor::TensorDescriptor(miopenDataType_t t, const std::vector& l TensorDescriptor::TensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in) - : TensorDescriptor(t, std::vector(lens_in)) + : TensorDescriptor(t, miopen::InlineVector(lens_in)) { } -TensorDescriptor::TensorDescriptor(miopenDataType_t t, const std::vector& lens_in) +TensorDescriptor::TensorDescriptor(miopenDataType_t t, + const miopen::InlineVector& lens_in) : TensorDescriptor(t, GetDefaultLayout(lens_in.size()), lens_in, {}, false) { } -TensorDescriptor::TensorDescriptor(miopenDataType_t t, std::vector&& lens_in) +TensorDescriptor::TensorDescriptor(miopenDataType_t t, + miopen::InlineVector&& lens_in) : TensorDescriptor(t, GetDefaultLayout(lens_in.size()), std::move(lens_in), {}, false) { } TensorDescriptor::TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - const std::vector& lens_in) + const miopen::InlineVector& lens_in) : TensorDescriptor(t, layout_in, ConvertLengthsOrThrow(lens_in, "Lengths must be > 0")) { } @@ -304,27 +308,27 @@ TensorDescriptor::TensorDescriptor(miopenDataType_t t, TensorDescriptor::TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, const std::initializer_list& lens_in) - : TensorDescriptor(t, layout_in, std::vector(lens_in)) + : TensorDescriptor(t, layout_in, miopen::InlineVector(lens_in)) { } TensorDescriptor::TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - const std::vector& lens_in) + const miopen::InlineVector& lens_in) : TensorDescriptor(t, layout_in, lens_in, {}, false) { } TensorDescriptor::TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - std::vector&& lens_in) + miopen::InlineVector&& lens_in) : TensorDescriptor(t, layout_in, std::move(lens_in), {}, false) { } TensorDescriptor::TensorDescriptor(miopenDataType_t t, - const std::vector& lens_in, - const std::vector& strides_in) + const miopen::InlineVector& lens_in, + const miopen::InlineVector& strides_in) : TensorDescriptor(t, ConvertLengthsOrThrow(lens_in, "Lengths must be > 0"), ConvertLengthsOrThrow(strides_in, "Strides must be > 0")) @@ -334,36 +338,38 @@ TensorDescriptor::TensorDescriptor(miopenDataType_t t, TensorDescriptor::TensorDescriptor(miopenDataType_t t, const std::initializer_list& lens_in, const std::initializer_list& strides_in) - : TensorDescriptor(t, std::vector(lens_in), std::vector(strides_in)) + : TensorDescriptor(t, + miopen::InlineVector(lens_in), + miopen::InlineVector(strides_in)) { } TensorDescriptor::TensorDescriptor(miopenDataType_t t, - const std::vector& lens_in, - const std::vector& strides_in) + const miopen::InlineVector& lens_in, + const miopen::InlineVector& strides_in) : TensorDescriptor(t, std::nullopt, lens_in, strides_in, true) { } TensorDescriptor::TensorDescriptor(miopenDataType_t t, - std::vector&& lens_in, - std::vector&& strides_in) + miopen::InlineVector&& lens_in, + miopen::InlineVector&& strides_in) : TensorDescriptor(t, std::nullopt, std::move(lens_in), std::move(strides_in), true) { } TensorDescriptor::TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - const std::vector& lens_in, - const std::vector& strides_in) + const miopen::InlineVector& lens_in, + const miopen::InlineVector& strides_in) : TensorDescriptor(t, layout_in, lens_in, strides_in, true) { } TensorDescriptor::TensorDescriptor(miopenDataType_t t, miopenTensorLayout_t layout_in, - std::vector&& lens_in, - std::vector&& strides_in) + miopen::InlineVector&& lens_in, + miopen::InlineVector&& strides_in) : TensorDescriptor(t, layout_in, std::move(lens_in), std::move(strides_in), true) { } @@ -371,11 +377,11 @@ TensorDescriptor::TensorDescriptor(miopenDataType_t t, // Main private constructor TensorDescriptor::TensorDescriptor(miopenDataType_t t, const std::optional& layout_in, - const std::vector& lens_in, - const std::vector& strides_in, + const miopen::InlineVector& lens_in, + const miopen::InlineVector& strides_in, bool use_strides) : lens(lens_in), - strides(use_strides ? strides_in : std::vector()), + strides(use_strides ? strides_in : miopen::InlineVector()), type(t), tensorLayout(layout_in) { @@ -384,11 +390,11 @@ TensorDescriptor::TensorDescriptor(miopenDataType_t t, TensorDescriptor::TensorDescriptor(miopenDataType_t t, const std::optional& layout_in, - std::vector&& lens_in, - std::vector&& strides_in, + miopen::InlineVector&& lens_in, + miopen::InlineVector&& strides_in, bool use_strides) : lens(std::move(lens_in)), - strides(use_strides ? std::move(strides_in) : std::vector()), + strides(use_strides ? std::move(strides_in) : miopen::InlineVector()), type(t), tensorLayout(layout_in) { @@ -449,7 +455,9 @@ TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, const int* if(plens == nullptr || size <= 0) MIOPEN_THROW(miopenStatusInvalidValue); - return {t, std::vector(plens, plens + size)}; + return {t, + ConvertLengthsOrThrow(miopen::InlineVector(plens, plens + size), + "Lengths must be > 0")}; } TensorDescriptor @@ -458,7 +466,7 @@ TensorDescriptor::MakeDescriptor(miopenDataType_t t, const std::size_t* plens, i if(plens == nullptr || size <= 0) MIOPEN_THROW(miopenStatusInvalidValue); - return {t, std::vector(plens, plens + size)}; + return {t, miopen::InlineVector(plens, plens + size)}; } TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, @@ -469,7 +477,10 @@ TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, if(plens == nullptr || size <= 0) MIOPEN_THROW(miopenStatusInvalidValue); - return {t, layout, std::vector(plens, plens + size)}; + return {t, + layout, + ConvertLengthsOrThrow(miopen::InlineVector(plens, plens + size), + "Lengths must be > 0")}; } TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, @@ -480,7 +491,7 @@ TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, if(plens == nullptr || size <= 0) MIOPEN_THROW(miopenStatusInvalidValue); - return {t, layout, std::vector(plens, plens + size)}; + return {t, layout, miopen::InlineVector(plens, plens + size)}; } TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, @@ -491,7 +502,11 @@ TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, if(plens == nullptr || pstrides == nullptr || size <= 0) MIOPEN_THROW(miopenStatusInvalidValue); - return {t, std::vector(plens, plens + size), std::vector(pstrides, pstrides + size)}; + return {t, + ConvertLengthsOrThrow(miopen::InlineVector(plens, plens + size), + "Lengths must be > 0"), + ConvertLengthsOrThrow(miopen::InlineVector(pstrides, pstrides + size), + "Lengths must be > 0")}; } TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, @@ -503,15 +518,15 @@ TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, MIOPEN_THROW(miopenStatusInvalidValue); return {t, - std::vector(plens, plens + size), - std::vector(pstrides, pstrides + size)}; + miopen::InlineVector(plens, plens + size), + miopen::InlineVector(pstrides, pstrides + size)}; } bool TensorDescriptor::IsVectorized() const { return vector_length > 1; } -const std::vector& TensorDescriptor::GetLengths() const { return lens; } +const miopen::InlineVector& TensorDescriptor::GetLengths() const { return lens; } -const std::vector& TensorDescriptor::GetStrides() const { return strides; } +const miopen::InlineVector& TensorDescriptor::GetStrides() const { return strides; } unsigned TensorDescriptor::GetNumDims() const { return lens.size(); } @@ -712,10 +727,11 @@ bool TensorDescriptor::IsPossibleLayout4D5D(const std::string& layout) const } // See https://github.com/ROCm/MIOpen/pull/765#discussion_r596465551 -std::vector TensorDescriptor::find_permutation(const std::vector& lens, - const std::vector& strides) +miopen::InlineVector +TensorDescriptor::find_permutation(const miopen::InlineVector& lens, + const miopen::InlineVector& strides) { - std::vector result(lens.size()); + miopen::InlineVector result(lens.size()); std::iota(result.begin(), result.end(), 0); std::stable_sort(result.begin(), result.end(), by(std::greater<>{}, [&](auto x) { return std::make_tuple(strides[x], lens[x]); @@ -883,8 +899,8 @@ TensorDescriptor GetFlattenedTensorDescriptor(const TensorDescriptor& desc) return {desc.GetType(), {desc.GetElementSize()}, {static_cast(1)}}; // start flattening tensor - std::vector flat_lengths; - std::vector flat_strides; + miopen::InlineVector flat_lengths; + miopen::InlineVector flat_strides; auto non1_length_strides = boost::combine(desc.GetLengths(), desc.GetStrides()) | boost::adaptors::filtered(f_length_is_not_1_t()); @@ -938,7 +954,8 @@ struct two_exp_ceiling_t } }; -static std::vector get_worker_sizes(const std::vector& data_sizes) +static std::vector +get_worker_sizes(const miopen::InlineVector& data_sizes) { const std::size_t dim = data_sizes.size(); @@ -1156,7 +1173,7 @@ void ScaleTensor(const Handle& handle, std::string kernel_name = "SubTensorOpWithScalar" + std::to_string(yDim_flat) + "d"; - const std::vector& lens = yDesc_flat.GetLengths(); + const miopen::InlineVector& lens = yDesc_flat.GetLengths(); std::string network_config = "scale " + std::to_string(yDesc_flat.GetType()); for(auto& len : lens) @@ -1332,7 +1349,7 @@ void CopyTensor(const Handle& handle, { std::string kernel_name = "SubTensorOpWithSubTensor" + std::to_string(srcDim_flat) + "d"; - const std::vector& lens = srcDesc_flat.GetLengths(); + const miopen::InlineVector& lens = srcDesc_flat.GetLengths(); std::string network_config = "copy " + std::to_string(srcDesc_flat.GetType()); for(auto& len : lens) @@ -1549,7 +1566,7 @@ void CastTensor(const Handle& handle, { std::string kernel_name = "SubTensorOpWithCastTensor" + std::to_string(srcDim_flat) + "d"; - const std::vector& lens = srcDesc_flat.GetLengths(); + const miopen::InlineVector& lens = srcDesc_flat.GetLengths(); // TODO: make proper network config std::string network_config = "cast " + std::to_string(srcDesc_flat.GetType()) + @@ -1853,7 +1870,7 @@ void TransformTensor(const Handle& handle, std::string kernel_name = "SubTensorOpWithTransform" + std::to_string(yDim_flat) + "d"; - const std::vector& lens = yDesc_flat.GetLengths(); + const miopen::InlineVector& lens = yDesc_flat.GetLengths(); std::string network_config = "transform " + std::to_string(yDesc_flat.GetType()); for(auto& len : lens) diff --git a/test/conv_common.hpp b/test/conv_common.hpp index 3ab28952ac..741ec5f29b 100644 --- a/test/conv_common.hpp +++ b/test/conv_common.hpp @@ -740,10 +740,10 @@ struct verify_forward_conv : conv_base bool is_transform = (input.desc.GetLengths()[1] % 4 != 0 || is_vect); - std::vector in_len(input.desc.GetLengths().begin(), - input.desc.GetLengths().end()); - std::vector wei_len(weights.desc.GetLengths().begin(), - weights.desc.GetLengths().end()); + miopen::InlineVector in_len(input.desc.GetLengths().begin(), + input.desc.GetLengths().end()); + miopen::InlineVector wei_len(weights.desc.GetLengths().begin(), + weights.desc.GetLengths().end()); in_len[1] = ((in_len[1] + 3) / 4) * 4; wei_len[1] = ((wei_len[1] + 3) / 4) * 4; @@ -1657,10 +1657,10 @@ struct verify_forward_conv_int8 : conv_base bool is_transform = (input.desc.GetLengths()[1] % 4 != 0 || is_vect); - std::vector in_len(input.desc.GetLengths().begin(), - input.desc.GetLengths().end()); - std::vector wei_len(weights.desc.GetLengths().begin(), - weights.desc.GetLengths().end()); + miopen::InlineVector in_len(input.desc.GetLengths().begin(), + input.desc.GetLengths().end()); + miopen::InlineVector wei_len(weights.desc.GetLengths().begin(), + weights.desc.GetLengths().end()); in_len[1] = ((in_len[1] + 3) / 4) * 4; wei_len[1] = ((wei_len[1] + 3) / 4) * 4; @@ -1796,8 +1796,8 @@ struct conv_driver : test_driver std::string conv_mode; std::string pad_mode; std::vector spatial_dim_elements{}; - std::vector input_dims{}; - std::vector weight_tensor_dims{}; + miopen::InlineVector input_dims{}; + miopen::InlineVector weight_tensor_dims{}; std::vector filter_dims{}; std::size_t batch_size{}; std::size_t input_channels{}; @@ -2081,8 +2081,8 @@ struct conv_driver : test_driver // but this requires the dimensions come from commandline, which is hard for non-NCHW layout if(in_layout != "NCHW" && in_layout != "NCDHW") { - const std::vector dim_lens = input.desc.GetLengths(); - std::vector dim_strides; + const miopen::InlineVector dim_lens = input.desc.GetLengths(); + miopen::InlineVector dim_strides; miopen::tensor_layout_to_strides( dim_lens, miopen::tensor_layout_get_default(weights.desc.GetNumDims()), @@ -2093,8 +2093,8 @@ struct conv_driver : test_driver } if(fil_layout != "NCHW" && fil_layout != "NCDHW" && fil_layout != "CHWN") { - const std::vector dim_lens = weights.desc.GetLengths(); - std::vector dim_strides; + const miopen::InlineVector dim_lens = weights.desc.GetLengths(); + miopen::InlineVector dim_strides; miopen::tensor_layout_to_strides( dim_lens, miopen::tensor_layout_get_default(weights.desc.GetNumDims()), diff --git a/test/cpu_reduce_util.hpp b/test/cpu_reduce_util.hpp index 01accf6650..3550a79a09 100644 --- a/test/cpu_reduce_util.hpp +++ b/test/cpu_reduce_util.hpp @@ -310,7 +310,51 @@ get_all_indexes(const std::vector& dimLengths, int dim, std::vector -static T get_offset_from_index(const std::vector& strides, const std::vector& index) +static void get_all_indexes(const miopen::InlineVector& dimLengths, + int dim, + std::vector>& indexes) +{ + if(dim < dimLengths.size()) + { + std::vector> updated_indexes; + + if(dim == 0) + { + assert(indexes.empty()); + assert(dimLengths[dim] > 0); + for(T i = 0; i < dimLengths[dim]; i++) + { + std::vector index = {i}; + + updated_indexes.push_back(index); + }; + } + else + { + // go through all the current indexes + for(const auto& index : indexes) + { + for(T i = 0; i < dimLengths[dim]; i++) + { + auto index_new = index; + index_new.push_back(i); + + updated_indexes.push_back(index_new); + }; + } + }; + + // update to the indexes (output) + indexes = updated_indexes; + + // further to construct the indexes from the updated status + get_all_indexes(dimLengths, dim + 1, indexes); + }; +}; + +template +static T get_offset_from_index(const miopen::InlineVector& strides, + const std::vector& index) { T offset = 0; @@ -345,4 +389,27 @@ static T get_flatten_offset(const std::vector& lengths, const std::vector& return (offset); }; +template +static T get_flatten_offset(const miopen::InlineVector& lengths, const std::vector& index) +{ + T offset = 0; + + assert(lengths.size() == index.size() && !lengths.empty()); + + int len = lengths.size(); + T stride = 1; + + // for len==1, the loop is not executed + for(int i = len - 1; i > 0; i--) + { + offset += stride * index[i]; + + stride *= lengths[i]; + }; + + offset += stride * index[0]; + + return (offset); +}; + #endif diff --git a/test/dropout_util.hpp b/test/dropout_util.hpp index 983b335775..5d2978f1b9 100644 --- a/test/dropout_util.hpp +++ b/test/dropout_util.hpp @@ -68,14 +68,14 @@ inline void InitKernelStateEmulator(std::vector& states, } template -inline void ExpandTensorDim(std::vector x_len, - std::vector x_str, - std::vector y_len, - std::vector y_str, - std::vector& in_len, - std::vector& in_str, - std::vector& out_len, - std::vector& out_str) +inline void ExpandTensorDim(miopen::InlineVector x_len, + miopen::InlineVector x_str, + miopen::InlineVector y_len, + miopen::InlineVector y_str, + miopen::InlineVector& in_len, + miopen::InlineVector& in_str, + miopen::InlineVector& out_len, + miopen::InlineVector& out_str) { int xl_idx = x_len.size() - 1; int yl_idx = y_len.size() - 1; @@ -122,10 +122,10 @@ void DropoutForwardVerify(const miopen::Handle& handle, auto dropout_rate = DropoutDesc.dropout; // support up to 5D tensor - std::vector in_len(5, 1); - std::vector in_str(5, 1); - std::vector out_len(5, 1); - std::vector out_str(5, 1); + miopen::InlineVector in_len(5, 1); + miopen::InlineVector in_str(5, 1); + miopen::InlineVector out_len(5, 1); + miopen::InlineVector out_str(5, 1); ExpandTensorDim(inputTensor.GetLengths(), inputTensor.GetStrides(), @@ -191,10 +191,10 @@ void DropoutBackwardVerify(const miopen::DropoutDescriptor& DropoutDesc, auto dropout_rate = DropoutDesc.dropout; // support up to 5D tensor - std::vector in_len(5, 1); - std::vector in_str(5, 1); - std::vector out_len(5, 1); - std::vector out_str(5, 1); + miopen::InlineVector in_len(5, 1); + miopen::InlineVector in_str(5, 1); + miopen::InlineVector out_len(5, 1); + miopen::InlineVector out_str(5, 1); ExpandTensorDim(inputTensor.GetLengths(), inputTensor.GetStrides(), diff --git a/test/gpu_reference_kernel.cpp b/test/gpu_reference_kernel.cpp index 41f19a2204..b26ea5734e 100644 --- a/test/gpu_reference_kernel.cpp +++ b/test/gpu_reference_kernel.cpp @@ -275,7 +275,7 @@ static std::string miopen_type_to_string(miopenDataType_t type) /// input: a vector of lengths of dims in a tensor /// multiply each element with a random constant integer -void pad_tensor_strides(std::vector& strides) +void pad_tensor_strides(miopen::InlineVector& strides) { constexpr int min_stride_multiplier = 1; constexpr int max_stride_multiplier = 5; @@ -320,13 +320,13 @@ struct gpu_reference_conv_2d : gpu_reference_kernel_base int wo = conv_out_size(wi, px, dx, fx, sx); int c_per_group = c / g; - std::vector in_len({n, c, hi, wi}); - std::vector wei_len({k, c_per_group, fy, fx}); - std::vector out_len({n, k, ho, wo}); + miopen::InlineVector in_len({n, c, hi, wi}); + miopen::InlineVector wei_len({k, c_per_group, fy, fx}); + miopen::InlineVector out_len({n, k, ho, wo}); - std::vector in_strides; - std::vector wei_strides; - std::vector out_strides; + miopen::InlineVector in_strides; + miopen::InlineVector wei_strides; + miopen::InlineVector out_strides; std::string layout_default = miopen::tensor_layout_get_default(4); std::string layout_string = miopen::TensorDescriptor::LayoutEnumToStr(tensor_layout); @@ -688,13 +688,13 @@ struct gpu_reference_conv_3d : gpu_reference_kernel_base int do_ = conv_out_size(di, pz, dz, fz, sz); int c_per_group = c / g; - std::vector in_len({n, c, di, hi, wi}); - std::vector wei_len({k, c_per_group, fz, fy, fx}); - std::vector out_len({n, k, do_, ho, wo}); + miopen::InlineVector in_len({n, c, di, hi, wi}); + miopen::InlineVector wei_len({k, c_per_group, fz, fy, fx}); + miopen::InlineVector out_len({n, k, do_, ho, wo}); - std::vector in_strides; - std::vector wei_strides; - std::vector out_strides; + miopen::InlineVector in_strides; + miopen::InlineVector wei_strides; + miopen::InlineVector out_strides; std::string layout_default = miopen::tensor_layout_get_default(5); std::string layout_string = miopen::TensorDescriptor::LayoutEnumToStr(tensor_layout); diff --git a/test/gru_common.hpp b/test/gru_common.hpp index 3e0c405c73..397d253478 100644 --- a/test/gru_common.hpp +++ b/test/gru_common.hpp @@ -1994,13 +1994,13 @@ struct verify_forward_infer_gru std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; miopen::TensorDescriptor hiddenDesc(miopen::deref(rnnDesc).dataType, hlens); - std::vector wlen(1, 0); + miopen::InlineVector wlen(1, 0); wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); @@ -2284,13 +2284,13 @@ struct verify_forward_train_gru std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; miopen::TensorDescriptor hiddenDesc(miopen::deref(rnnDesc).dataType, hlens); - std::vector wlen(1, 0); + miopen::InlineVector wlen(1, 0); wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); @@ -2585,13 +2585,13 @@ struct verify_backward_data_gru Workspace rspace{}; rspace.Write(reserveSpace); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; miopen::TensorDescriptor hiddenDesc(miopen::deref(rnnDesc).dataType, hlens); - std::vector wlen(1, 0); + miopen::InlineVector wlen(1, 0); wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); @@ -2846,7 +2846,7 @@ struct verify_backward_weights_gru auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; @@ -3066,7 +3066,7 @@ struct gru_basic_driver : test_driver std::vector dhyin(hx_sz); size_t wei_bytes = 0; - std::vector inlens(2, 0); + miopen::InlineVector inlens(2, 0); inlens.at(0) = batchSeq.at(0); inlens.at(1) = inVecReal; auto firstInputDesc = miopen::TensorDescriptor(miopen::deref(rnnDesc).dataType, inlens); diff --git a/test/gtest/bad_fusion_plan.cpp b/test/gtest/bad_fusion_plan.cpp index 4b7ad74765..96c19f2f40 100644 --- a/test/gtest/bad_fusion_plan.cpp +++ b/test/gtest/bad_fusion_plan.cpp @@ -59,8 +59,8 @@ struct ConvTestCaseFusion << " stride_x:" << tc.stride_x << " dilation_y:" << tc.dilation_y << " dilation_x:" << tc.dilation_x << " )"; } - std::vector GetInput() const { return {N, C, H, W}; } - std::vector GetWeights() const { return {k, C, y, x}; } + miopen::InlineVector GetInput() const { return {N, C, H, W}; } + miopen::InlineVector GetWeights() const { return {k, C, y, x}; } miopen::ConvolutionDescriptor GetConv() const { return miopen::ConvolutionDescriptor{ diff --git a/test/gtest/binary_tensor_ops.cpp b/test/gtest/binary_tensor_ops.cpp index ae3a06330b..9c9a66404a 100644 --- a/test/gtest/binary_tensor_ops.cpp +++ b/test/gtest/binary_tensor_ops.cpp @@ -73,24 +73,30 @@ class GPU_binaryTensorOps : public ::testing::TestWithParam ASSERT_GE(dstSuperTensor.desc.GetNumDims(), lens.size()); - const std::vector& dstSuperStrides = dstSuperTensor.desc.GetStrides(); - std::vector dstStrides(dstSuperStrides.begin() + - (dstSuperTensor.desc.GetNumDims() - lens.size()), - dstSuperStrides.end()); - - dstDesc = miopen::TensorDescriptor(miopen_type{}, lens, dstStrides); + const miopen::InlineVector& dstSuperStrides = dstSuperTensor.desc.GetStrides(); + miopen::InlineVector dstStrides( + dstSuperStrides.begin() + (dstSuperTensor.desc.GetNumDims() - lens.size()), + dstSuperStrides.end()); + + dstDesc = + miopen::TensorDescriptor(miopen_type{}, + miopen::InlineVector(lens.begin(), lens.end()), + dstStrides); dstDataSize = dstDesc.GetElementSpace() + offsets[1]; ASSERT_GE(srcSuperTensor.desc.GetElementSpace(), dstDataSize); ASSERT_GE(srcSuperTensor.desc.GetNumDims(), lens.size()); - const std::vector& srcSuperStrides = srcSuperTensor.desc.GetStrides(); - std::vector srcStrides(srcSuperStrides.begin() + - (srcSuperTensor.desc.GetNumDims() - lens.size()), - srcSuperStrides.end()); + const miopen::InlineVector& srcSuperStrides = srcSuperTensor.desc.GetStrides(); + miopen::InlineVector srcStrides( + srcSuperStrides.begin() + (srcSuperTensor.desc.GetNumDims() - lens.size()), + srcSuperStrides.end()); - srcDesc = miopen::TensorDescriptor(miopen_type{}, lens, srcStrides); + srcDesc = + miopen::TensorDescriptor(miopen_type{}, + miopen::InlineVector(lens.begin(), lens.end()), + srcStrides); srcDataSize = srcDesc.GetElementSpace() + offsets[0]; ASSERT_GE(srcSuperTensor.desc.GetElementSpace(), srcDataSize); diff --git a/test/gtest/conv3d_test_case.hpp b/test/gtest/conv3d_test_case.hpp index a10c1809ca..b2183e88cd 100644 --- a/test/gtest/conv3d_test_case.hpp +++ b/test/gtest/conv3d_test_case.hpp @@ -66,8 +66,8 @@ struct Conv3DTestCase << " conv_mode:" << tc.conv_mode; } - std::vector GetInput() { return {N, C, img.z, img.y, img.x}; } - std::vector GetWeights() + miopen::InlineVector GetInput() { return {N, C, img.z, img.y, img.x}; } + miopen::InlineVector GetWeights() { EXPECT_EQUAL(C % G, 0); return {K, C / G, filter.z, filter.y, filter.x}; diff --git a/test/gtest/conv_tensor_gen.hpp b/test/gtest/conv_tensor_gen.hpp index 8dce82371f..73d8cb8ffe 100644 --- a/test/gtest/conv_tensor_gen.hpp +++ b/test/gtest/conv_tensor_gen.hpp @@ -107,7 +107,7 @@ template struct GenConvData { /// \note CHWNc filter layout is not supported (different storage layout) - GenConvData(const std::vector& filter, unsigned group_count = 1) + GenConvData(const miopen::InlineVector& filter, unsigned group_count = 1) { static_assert(std::is_integral_v == std::is_integral_v); static_assert(sizeof(Tacc) >= sizeof(T)); diff --git a/test/gtest/graphapi_capi_mha_common.hpp b/test/gtest/graphapi_capi_mha_common.hpp index e98fa1abe9..bfb6e49a59 100644 --- a/test/gtest/graphapi_capi_mha_common.hpp +++ b/test/gtest/graphapi_capi_mha_common.hpp @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -703,9 +704,9 @@ class MhaCommonTest : public testing::TestWithParam dims = {n, h, s, d}; miopen::TensorDescriptor td(dtype, {n, h, s, d}); - const std::vector& tdStrides = td.GetStrides(); + const miopen::InlineVector& tdStrides = td.GetStrides(); - std::vector strides(tdStrides.size()); + miopen::InlineVector strides(tdStrides.size()); std::copy_n(tdStrides.begin(), tdStrides.size(), strides.begin()); if(transpose) diff --git a/test/gtest/graphapi_conv_bias_res_add_activ_fwd.cpp b/test/gtest/graphapi_conv_bias_res_add_activ_fwd.cpp index 95ecfa97d0..030a960917 100644 --- a/test/gtest/graphapi_conv_bias_res_add_activ_fwd.cpp +++ b/test/gtest/graphapi_conv_bias_res_add_activ_fwd.cpp @@ -418,7 +418,8 @@ class GPU_ConvBiasResAddActivation_fwd convInvalidCWeightTensorDesc.GetLengths(), convInvalidCWeightTensorDesc.GetStrides())); - std::vector allOnes{size_t{1}, size_t{1}, size_t{1}, size_t{1}, size_t{1}}; + miopen::InlineVector allOnes{ + size_t{1}, size_t{1}, size_t{1}, size_t{1}, size_t{1}}; auto convOutput = allocator.allocate(gr::makeTensor(convOutputName, dataType, allOnes, allOnes)); diff --git a/test/gtest/graphapi_mha_bwd.cpp b/test/gtest/graphapi_mha_bwd.cpp index b49f79900c..bde2d3583d 100644 --- a/test/gtest/graphapi_mha_bwd.cpp +++ b/test/gtest/graphapi_mha_bwd.cpp @@ -38,11 +38,11 @@ class GPU_MhaBwdGraphTest_FP32 : public MhaGraphTestBase { mGraphBuilder = std::make_unique(); - std::vector nhsd = {n, h, s, d}; - std::vector nhds = {n, h, d, s}; - std::vector nhss = {n, h, s, s}; - std::vector nhs1 = {n, h, s, 1}; - std::vector all1s = {1, 1, 1, 1}; + miopen::InlineVector nhsd = {n, h, s, d}; + miopen::InlineVector nhds = {n, h, d, s}; + miopen::InlineVector nhss = {n, h, s, s}; + miopen::InlineVector nhs1 = {n, h, s, 1}; + miopen::InlineVector all1s = {1, 1, 1, 1}; MAKE_TENSOR_F(Q, nhsd, false); MAKE_TENSOR_F(K, nhsd, false); diff --git a/test/gtest/graphapi_mha_cpp_common.hpp b/test/gtest/graphapi_mha_cpp_common.hpp index ac5b991e82..e6d1560e83 100644 --- a/test/gtest/graphapi_mha_cpp_common.hpp +++ b/test/gtest/graphapi_mha_cpp_common.hpp @@ -286,8 +286,9 @@ class MhaGraphTestBase } template - gr::Tensor* - makeTensor(std::string_view name, miopenDataType_t dt, const std::vector& dims) + gr::Tensor* makeTensor(std::string_view name, + miopenDataType_t dt, + const miopen::InlineVector& dims) { auto ptr = mAlloc.allocate(gr::makeTensor(name, dt, dims)); if constexpr(!IsVirt) diff --git a/test/gtest/graphapi_mha_fwd.cpp b/test/gtest/graphapi_mha_fwd.cpp index d64477e557..38e12418ff 100644 --- a/test/gtest/graphapi_mha_fwd.cpp +++ b/test/gtest/graphapi_mha_fwd.cpp @@ -36,10 +36,10 @@ class GPU_MhaFwdGraph_FP32 : public MhaGraphTestBase mGraphBuilder = std::make_unique(); - std::vector nhsd = {n, h, s, d}; - std::vector nhss = {n, h, s, s}; - std::vector nhs1 = {n, h, s, 1}; - std::vector all1s = {1, 1, 1, 1}; + miopen::InlineVector nhsd = {n, h, s, d}; + miopen::InlineVector nhss = {n, h, s, s}; + miopen::InlineVector nhs1 = {n, h, s, 1}; + miopen::InlineVector all1s = {1, 1, 1, 1}; MAKE_TENSOR_F(Q, nhsd, false); MAKE_TENSOR_F(K, nhsd, false); diff --git a/test/gtest/graphapi_operationgraph_descriptor.cpp b/test/gtest/graphapi_operationgraph_descriptor.cpp index a56485b3ff..0188b94e02 100644 --- a/test/gtest/graphapi_operationgraph_descriptor.cpp +++ b/test/gtest/graphapi_operationgraph_descriptor.cpp @@ -51,13 +51,13 @@ class GMockNode : public OpNode public: GMockNode() : mIn(std::make_shared(miopenFloat, - std::vector{8, 64, 64}, - std::vector{64 * 64, 64, 1}, + miopen::InlineVector{8, 64, 64}, + miopen::InlineVector{64 * 64, 64, 1}, ++id, false)), mOut(std::make_shared(miopenFloat, - std::vector{8, 64, 64}, - std::vector{64 * 64, 64, 1}, + miopen::InlineVector{8, 64, 64}, + miopen::InlineVector{64 * 64, 64, 1}, ++id, false)) { diff --git a/test/gtest/graphapi_tensor.cpp b/test/gtest/graphapi_tensor.cpp index 12fdef66c7..a1add0fd2f 100644 --- a/test/gtest/graphapi_tensor.cpp +++ b/test/gtest/graphapi_tensor.cpp @@ -236,8 +236,8 @@ namespace graph_api_tensor_test { static bool TestIsApplicable() { return true; } -using TestCase = std::tuple, - std::vector, +using TestCase = std::tuple, + miopen::InlineVector, miopenDataType_t, std::optional>; static std::vector TestConfigs() @@ -264,8 +264,8 @@ class CPU_GraphTensor_NONE : public ::testing::TestWithParam void Run() { - std::vector dimensions; - std::vector strides; + miopen::InlineVector dimensions; + miopen::InlineVector strides; miopenDataType_t dataType; std::optional layout; diff --git a/test/gtest/group_conv.hpp b/test/gtest/group_conv.hpp index cbe63e55b8..bc080e8c63 100644 --- a/test/gtest/group_conv.hpp +++ b/test/gtest/group_conv.hpp @@ -75,8 +75,8 @@ struct GroupConvTestConfig<2u> << " dilation.y:" << tc.dilation.y << " dilation.x" << tc.dilation.x; } - std::vector GetInput() { return {N, C, img.y, img.x}; } - std::vector GetWeights() + miopen::InlineVector GetInput() { return {N, C, img.y, img.x}; } + miopen::InlineVector GetWeights() { EXPECT_EQUAL(C % G, 0); return {K, C / G, filter.y, filter.x}; diff --git a/test/gtest/inline_vector_basic_ops.cpp b/test/gtest/inline_vector_basic_ops.cpp index 1b10a4c222..b8cf9dbd90 100644 --- a/test/gtest/inline_vector_basic_ops.cpp +++ b/test/gtest/inline_vector_basic_ops.cpp @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -200,3 +201,188 @@ TEST(CPU_InlineVectorClear_NONE, Test) in_v12.clear(); EXPECT_EQ(in_v12.size(), 0); } + +TEST(CPU_InlineVectorInsert_NONE, Test) +{ + miopen::InlineVector iv13_1{1, 2, 3}; + std::vector v13_1{1, 2, 3}; + iv13_1.insert(iv13_1.begin(), 0); + v13_1.insert(v13_1.begin(), 0); + for(int i = 0; i < iv13_1.size(); i++) + { + EXPECT_EQ(iv13_1[i], v13_1[i]); + } + + miopen::InlineVector iv13_2{1, 2, 3}; + std::vector v13_2{1, 2, 3}; + iv13_2.insert(iv13_2.end(), 4); + v13_2.insert(v13_2.end(), 4); + for(int i = 0; i < iv13_2.size(); i++) + { + EXPECT_EQ(iv13_2[i], v13_2[i]); + } + + miopen::InlineVector iv13_3{1, 2, 3, 4}; + std::vector v13_3{1, 2, 3, 4}; + iv13_3.insert(iv13_3.begin() + 2, 0); + v13_3.insert(v13_3.begin() + 2, 0); + for(int i = 0; i < iv13_3.size(); i++) + { + EXPECT_EQ(iv13_3[i], v13_3[i]); + } + + miopen::InlineVector iv13_4{1, 2, 3}; + std::vector v13_4{1, 2, 3}; + iv13_4.insert(iv13_4.begin() + iv13_4.size(), 4); + v13_4.insert(v13_4.begin() + v13_4.size(), 4); + for(int i = 0; i < iv13_4.size(); i++) + { + EXPECT_EQ(iv13_4[i], v13_4[i]); + } +} + +#include + +TEST(CPU_InlineVectorPerf1_NONE, Test) +{ + std::vector iv_times; + std::vector v_times; + + for(int i = 0; i < 1000; i++) + { + auto start = std::chrono::steady_clock::now(); + miopen::InlineVector iv{1, 2, 3, 4, 5}; + auto end = std::chrono::steady_clock::now(); + auto elapsed = + std::chrono::duration_cast>(end - start) + .count(); + iv_times.push_back(elapsed); + + auto start1 = std::chrono::steady_clock::now(); + std::vector v{1, 2, 3, 4, 5}; + auto end1 = std::chrono::steady_clock::now(); + auto elapsed1 = + std::chrono::duration_cast>(end1 - start1) + .count(); + v_times.push_back(elapsed1); + } + + std::cout << "IV min: " << *(std::min_element(iv_times.begin(), iv_times.end())) + << " avg: " << std::reduce(iv_times.begin(), iv_times.end()) / 1000.0 << std::endl; + std::cout << "VE min: " << *(std::min_element(v_times.begin(), v_times.end())) + << " avg: " << std::reduce(v_times.begin(), v_times.end()) / 1000.0 << std::endl; +} + +TEST(CPU_InlineVectorPerf2_NONE, Test) +{ + std::vector iv_times; + std::vector v_times; + + std::initializer_list il{1, 2, 3, 4, 5}; + + for(int i = 0; i < 1000; i++) + { + auto start = std::chrono::steady_clock::now(); + miopen::InlineVector iv(il); + auto end = std::chrono::steady_clock::now(); + auto elapsed = + std::chrono::duration_cast>(end - start) + .count(); + iv_times.push_back(elapsed); + + auto start1 = std::chrono::steady_clock::now(); + std::vector v(il); + auto end1 = std::chrono::steady_clock::now(); + auto elapsed1 = + std::chrono::duration_cast>(end1 - start1) + .count(); + v_times.push_back(elapsed1); + } + + std::cout << "IV min: " << *(std::min_element(iv_times.begin(), iv_times.end())) + << " avg: " << std::reduce(iv_times.begin(), iv_times.end()) / 1000.0 << std::endl; + std::cout << "VE min: " << *(std::min_element(v_times.begin(), v_times.end())) + << " avg: " << std::reduce(v_times.begin(), v_times.end()) / 1000.0 << std::endl; +} + +TEST(CPU_InlineVectorPerf3_NONE, Test) +{ + std::vector iv_times; + std::vector v_times; + + std::initializer_list il{1, 2, 3, 4, 5}; + size_t sum = 0; + + for(int i = 0; i < 1000; i++) + { + sum = 0; + auto start = std::chrono::steady_clock::now(); + miopen::InlineVector iv(il.begin(), il.end()); + for(int j = 0; j < iv.size(); j++) + { + sum += iv[j]; + } + auto end = std::chrono::steady_clock::now(); + auto elapsed = + std::chrono::duration_cast>(end - start) + .count(); + iv_times.push_back(elapsed); + sum = 0; + auto start1 = std::chrono::steady_clock::now(); + std::vector v(il.begin(), il.end()); + for(int j = 0; j < v.size(); j++) + { + sum += v[j]; + } + auto end1 = std::chrono::steady_clock::now(); + auto elapsed1 = + std::chrono::duration_cast>(end1 - start1) + .count(); + v_times.push_back(elapsed1); + } + + std::cout << "IV min: " << *(std::min_element(iv_times.begin(), iv_times.end())) + << " avg: " << std::reduce(iv_times.begin(), iv_times.end()) / 1000.0 << std::endl; + std::cout << "VE min: " << *(std::min_element(v_times.begin(), v_times.end())) + << " avg: " << std::reduce(v_times.begin(), v_times.end()) / 1000.0 << std::endl; +} + +TEST(CPU_InlineVectorPerf4_NONE, Test) +{ + std::vector iv_times; + std::vector v_times; + + std::initializer_list il{1, 2, 3, 4, 5}; + size_t sum = 0; + + for(int i = 0; i < 1000; i++) + { + sum = 0; + auto start = std::chrono::steady_clock::now(); + miopen::InlineVector iv(il); + auto first_not_one = std::find_if(iv.rbegin(), iv.rend(), [](int j) { return j != 1; }); + auto d = std::distance(iv.begin(), first_not_one.base()); + int work_per_wg = std::accumulate(iv.begin() + d, iv.end(), 1, std::multiplies()); + auto end = std::chrono::steady_clock::now(); + auto elapsed = + std::chrono::duration_cast>(end - start) + .count(); + iv_times.push_back(elapsed); + sum = 0; + auto start1 = std::chrono::steady_clock::now(); + std::vector v(il); + auto first_not_one1 = std::find_if(v.rbegin(), v.rend(), [](int j) { return j != 1; }); + auto d1 = std::distance(v.begin(), first_not_one1.base()); + int work_per_wg1 = std::accumulate(v.begin() + d1, v.end(), 1, std::multiplies()); + auto end1 = std::chrono::steady_clock::now(); + auto elapsed1 = + std::chrono::duration_cast>(end1 - start1) + .count(); + v_times.push_back(elapsed1); + } + + std::cout << "IV min: " << *(std::min_element(iv_times.begin(), iv_times.end())) + << " avg: " << std::reduce(iv_times.begin(), iv_times.end()) / 1000.0 << std::endl; + std::cout << "VE min: " << *(std::min_element(v_times.begin(), v_times.end())) + << " avg: " << std::reduce(v_times.begin(), v_times.end()) / 1000.0 << std::endl; +} diff --git a/test/gtest/kthvalue.hpp b/test/gtest/kthvalue.hpp index 2aa7e6fd41..055dbb7d59 100644 --- a/test/gtest/kthvalue.hpp +++ b/test/gtest/kthvalue.hpp @@ -134,7 +134,8 @@ struct KthvalueFwdTest : public ::testing::TestWithParam std::fill(outputHost.begin(), outputHost.end(), 0); // miopenDataType_t doesn't support size_t, I use double instead (both types use 64 bits) - indicesDesc = miopen::TensorDescriptor(miopenDouble, outDims); + indicesDesc = miopen::TensorDescriptor( + miopenDouble, miopen::InlineVector(outDims.begin(), outDims.end())); size_t outputSize = indicesDesc.GetElementSize(); indices.resize(outputSize); indicesHost.resize(outputSize); diff --git a/test/gtest/layout_transpose.cpp b/test/gtest/layout_transpose.cpp index 25e5c54c20..14765cdb05 100644 --- a/test/gtest/layout_transpose.cpp +++ b/test/gtest/layout_transpose.cpp @@ -260,12 +260,12 @@ struct LayoutTransposeTest_2D : public ::testing::TestWithParam tensor_len = {static_cast(n), - static_cast(c), - static_cast(h), - static_cast(w)}; + miopen::InlineVector tensor_len = {static_cast(n), + static_cast(c), + static_cast(h), + static_cast(w)}; - std::vector tensor_strides; + miopen::InlineVector tensor_strides; std::string layout_default = miopen::tensor_layout_get_default(tensor_len.size()); std::string layout_string = @@ -348,13 +348,13 @@ struct LayoutTransposeTest_3D : public ::testing::TestWithParam tensor_len = {static_cast(n), - static_cast(c), - static_cast(d), - static_cast(h), - static_cast(w)}; + miopen::InlineVector tensor_len = {static_cast(n), + static_cast(c), + static_cast(d), + static_cast(h), + static_cast(w)}; - std::vector tensor_strides; + miopen::InlineVector tensor_strides; std::string layout_default = miopen::tensor_layout_get_default(tensor_len.size()); std::string layout_string = diff --git a/test/gtest/nonpack_conv3d_fwd.hpp b/test/gtest/nonpack_conv3d_fwd.hpp index 76836683bd..16f1b887e9 100644 --- a/test/gtest/nonpack_conv3d_fwd.hpp +++ b/test/gtest/nonpack_conv3d_fwd.hpp @@ -44,9 +44,9 @@ struct NonPackTestCase : Conv3DTestCase size_t o2; size_t o3; size_t o4; - std::vector GetInputStrides() { return {i0, i1, i2, i3, i4}; } - std::vector GetWeightStrides() { return {w0, w1, w2, w3, w4}; } - std::vector GetOutputStrides() { return {o0, o1, o2, o3, o4}; } + miopen::InlineVector GetInputStrides() { return {i0, i1, i2, i3, i4}; } + miopen::InlineVector GetWeightStrides() { return {w0, w1, w2, w3, w4}; } + miopen::InlineVector GetOutputStrides() { return {o0, o1, o2, o3, o4}; } }; template <> diff --git a/test/gtest/reduceextreme.hpp b/test/gtest/reduceextreme.hpp index deed82116b..a3764ceb27 100644 --- a/test/gtest/reduceextreme.hpp +++ b/test/gtest/reduceextreme.hpp @@ -48,7 +48,7 @@ void cpu_extreme_forward(tensor input, miopenReduceExtremeOp_t reduceExtremeOp) { auto input_dims = input.desc.GetLengths(); - std::vector output_dims; + miopen::InlineVector output_dims; if((reduceExtremeOp == MIOPEN_REDUCE_EXTREME_MAX) || reduceExtremeOp == MIOPEN_REDUCE_EXTREME_MIN) diff --git a/test/gtest/ternary_tensor_ops.cpp b/test/gtest/ternary_tensor_ops.cpp index 74af9180fe..be3fae773b 100644 --- a/test/gtest/ternary_tensor_ops.cpp +++ b/test/gtest/ternary_tensor_ops.cpp @@ -28,43 +28,44 @@ #include "gtest_common.hpp" namespace { -std::vector> tensorALensArr = {{32, 16, 8, 4, 4}, // tensor A - {16, 20, 16, 8}, - {20, 16, 8}, - {1, 16, 8}, - {16, 8}, - {8}}; - -std::vector> tensorBLensArr = {{32, 16, 8, 4, 4}, // tensor B - {32, 16, 1, 1, 1}, - {1, 16, 8, 1, 1}, - {1, 1, 8, 4, 1}, - {16, 20, 16, 8}, - {16, 20, 16, 1}, - {16, 20, 1, 1}, - {16, 1, 1, 1}, - {1, 20, 16, 8}, - {1, 20, 16, 1}, - {1, 20, 1, 1}, - {1, 1, 16, 8}, - {1, 1, 1, 8}, - {20, 16, 8}, - {20, 16, 1}, - {1, 16, 8}, - {1, 16, 1}, - {20, 1, 1}, - {16, 8}, - {16, 1}, - {1, 8}, - {8}, - {1}}; +std::vector> tensorALensArr = {{32, 16, 8, 4, 4}, // tensor A + {16, 20, 16, 8}, + {20, 16, 8}, + {1, 16, 8}, + {16, 8}, + {8}}; + +std::vector> tensorBLensArr = {{32, 16, 8, 4, 4}, // tensor B + {32, 16, 1, 1, 1}, + {1, 16, 8, 1, 1}, + {1, 1, 8, 4, 1}, + {16, 20, 16, 8}, + {16, 20, 16, 1}, + {16, 20, 1, 1}, + {16, 1, 1, 1}, + {1, 20, 16, 8}, + {1, 20, 16, 1}, + {1, 20, 1, 1}, + {1, 1, 16, 8}, + {1, 1, 1, 8}, + {20, 16, 8}, + {20, 16, 1}, + {1, 16, 8}, + {1, 16, 1}, + {20, 1, 1}, + {16, 8}, + {16, 1}, + {1, 8}, + {8}, + {1}}; std::vector> offsetsArr = { {0, 0, 0}, {64, 32, 16}, {32, 16, 32}, {32, 16, 32}}; std::vector> alphabetaArr = {{1, 1, 0}, {-1, 1, 1}, {1.0, 0.5, 0.3}}; -std::vector> stridesArr = {{8 * 16 * 20 * 16, 8 * 16 * 20, 8 * 16, 8, 1}}; +std::vector> stridesArr = { + {8 * 16 * 20 * 16, 8 * 16 * 20, 8 * 16, 8, 1}}; std::vector packedArr = {true, false}; @@ -74,12 +75,12 @@ std::vector operationArr = { struct TestCase { - std::vector tensorlens_ac; - std::vector tensorlens_b; + miopen::InlineVector tensorlens_ac; + miopen::InlineVector tensorlens_b; std::vector offsets; - std::vector stride_a; - std::vector stride_b; - std::vector stride_c; + miopen::InlineVector stride_a; + miopen::InlineVector stride_b; + miopen::InlineVector stride_c; std::vector alphabeta; bool packed; miopenTensorOp_t operation; @@ -113,8 +114,8 @@ struct TensorOpsCommon : public testing::TestWithParam testCase.tensorlens_ac, testCase.stride_c, testCase.offsets[2], testCase.packed); } - tensor CreateTensor(const std::vector& lens, - const std::vector& strides, + tensor CreateTensor(const miopen::InlineVector& lens, + const miopen::InlineVector& strides, int64_t offset, bool isPacked) { @@ -122,8 +123,8 @@ struct TensorOpsCommon : public testing::TestWithParam if(!isPacked) { - std::vector real_strides(strides.begin() + (strides.size() - lens.size()), - strides.end()); + miopen::InlineVector real_strides( + strides.begin() + (strides.size() - lens.size()), strides.end()); auto r = tensor{lens, real_strides}.generate(tensor_elem_gen_integer{max_value}); r.data.resize(r.data.size() + offset); return r; @@ -253,8 +254,8 @@ using GPU_TernaryTensorOps_FP16 = TensorOpsCommon; using GPU_TernaryTensorOps_FP64 = TensorOpsCommon; namespace { -bool checkTensorsCompatibility(const std::vector& tensorALens, - const std::vector& tensorBLens) +bool checkTensorsCompatibility(const miopen::InlineVector& tensorALens, + const miopen::InlineVector& tensorBLens) { if(tensorALens.size() != tensorBLens.size()) { @@ -273,8 +274,8 @@ bool checkTensorsCompatibility(const std::vector& tensorALens, } void AddTestCases(std::vector& testCases, - const std::vector& tensorALens, - const std::vector& tensorBLens) + const miopen::InlineVector tensorALens, + const miopen::InlineVector& tensorBLens) { const auto& stride_a = stridesArr[0]; const auto& stride_b = stridesArr[0]; @@ -292,8 +293,8 @@ void AddTestCases(std::vector& testCases, final_offsets = offsets; } - auto checkStride = [p = packed](const std::vector& lens, - const std::vector& strides) { + auto checkStride = [p = packed](const miopen::InlineVector& lens, + const miopen::InlineVector& strides) { if(p) return true; diff --git a/test/gtest/unary_tensor_ops.cpp b/test/gtest/unary_tensor_ops.cpp index 7ee680aef2..aed12000aa 100644 --- a/test/gtest/unary_tensor_ops.cpp +++ b/test/gtest/unary_tensor_ops.cpp @@ -60,12 +60,13 @@ class GPU_unaryTensorOps : public ::testing::TestWithParam const auto& [lens, offset] = GetParam(); ASSERT_GE(superTensor.desc.GetNumDims(), lens.size()); - const std::vector& superStrides = superTensor.desc.GetStrides(); - std::vector strides(superStrides.begin() + - (superTensor.desc.GetNumDims() - lens.size()), - superStrides.end()); + const miopen::InlineVector& superStrides = superTensor.desc.GetStrides(); + miopen::InlineVector strides(superStrides.begin() + + (superTensor.desc.GetNumDims() - lens.size()), + superStrides.end()); - subDesc = miopen::TensorDescriptor(miopen_type{}, lens, strides); + subDesc = miopen::TensorDescriptor( + miopen_type{}, miopen::InlineVector(lens.begin(), lens.end()), strides); dataSize = subDesc.GetElementSpace() + offset; ASSERT_GE(superTensor.desc.GetElementSpace(), dataSize); } diff --git a/test/gtest/unit_TensorDescriptor.hpp b/test/gtest/unit_TensorDescriptor.hpp index f6a425395e..c252971e0b 100644 --- a/test/gtest/unit_TensorDescriptor.hpp +++ b/test/gtest/unit_TensorDescriptor.hpp @@ -33,29 +33,30 @@ namespace unit_tests { struct TensorDescriptorParams { - TensorDescriptorParams(miopenDataType_t datatype_in, std::vector&& lens_in) + TensorDescriptorParams(miopenDataType_t datatype_in, + miopen::InlineVector&& lens_in) : datatype(datatype_in), lens(std::move(lens_in)) { } TensorDescriptorParams(miopenDataType_t datatype_in, miopenTensorLayout_t layout_in, - std::vector&& lens_in) + miopen::InlineVector&& lens_in) : datatype(datatype_in), layout(layout_in), lens(std::move(lens_in)) { } TensorDescriptorParams(miopenDataType_t datatype_in, - std::vector&& lens_in, - std::vector&& strides_in) + miopen::InlineVector&& lens_in, + miopen::InlineVector&& strides_in) : datatype(datatype_in), lens(std::move(lens_in)), strides(std::move(strides_in)) { } TensorDescriptorParams(miopenDataType_t datatype_in, miopenTensorLayout_t layout_in, - std::vector&& lens_in, - std::vector&& strides_in) + miopen::InlineVector&& lens_in, + miopen::InlineVector&& strides_in) : datatype(datatype_in), layout(layout_in), lens(std::move(lens_in)), @@ -65,7 +66,7 @@ struct TensorDescriptorParams std::size_t GetNumDims() const { return lens.size(); } - const std::vector& GetLens() const { return lens; } + const miopen::InlineVector& GetLens() const { return lens; } miopenDataType_t GetDataType() const { return datatype; } @@ -102,8 +103,8 @@ struct TensorDescriptorParams private: miopenDataType_t datatype; std::optional layout; - std::vector lens; - std::vector strides; + miopen::InlineVector lens; + miopen::InlineVector strides; }; } // namespace unit_tests diff --git a/test/gtest/unit_conv_solver.cpp b/test/gtest/unit_conv_solver.cpp index 9bea24ce11..a258515fe6 100644 --- a/test/gtest/unit_conv_solver.cpp +++ b/test/gtest/unit_conv_solver.cpp @@ -81,8 +81,8 @@ bool IsDeviceSupported(Gpu supported_devs, Gpu dev) ConvTestCase::ConvTestCase() : x(miopenHalf, {}), w(miopenHalf, {}), conv({}, {}, {}){}; -ConvTestCase::ConvTestCase(std::vector&& x_, - std::vector&& w_, +ConvTestCase::ConvTestCase(miopen::InlineVector&& x_, + miopen::InlineVector&& w_, std::vector&& pad_, std::vector&& stride_, std::vector&& dilation_, @@ -98,8 +98,8 @@ ConvTestCase::ConvTestCase(std::vector&& x_, { } -ConvTestCase::ConvTestCase(std::vector&& x_, - std::vector&& w_, +ConvTestCase::ConvTestCase(miopen::InlineVector&& x_, + miopen::InlineVector&& w_, std::vector&& pad_, std::vector&& stride_, std::vector&& dilation_, diff --git a/test/gtest/unit_conv_solver.hpp b/test/gtest/unit_conv_solver.hpp index 864d554bdd..e33bfffd7a 100644 --- a/test/gtest/unit_conv_solver.hpp +++ b/test/gtest/unit_conv_solver.hpp @@ -43,15 +43,15 @@ struct ConvTestCase { ConvTestCase(); - ConvTestCase(std::vector&& x, - std::vector&& w, + ConvTestCase(miopen::InlineVector&& x, + miopen::InlineVector&& w, std::vector&& pad, std::vector&& stride, std::vector&& dilation, miopenDataType_t type); - ConvTestCase(std::vector&& x, - std::vector&& w, + ConvTestCase(miopen::InlineVector&& x, + miopen::InlineVector&& w, std::vector&& pad, std::vector&& stride, std::vector&& dilation, diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index 85d17dc138..a30f460927 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -566,13 +566,13 @@ struct verify_forward_infer_lstm : verify_forward_lstm auto cy = initCell; std::fill(cy.begin(), cy.end(), 0.); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; miopen::TensorDescriptor hiddenDesc(miopen::deref(rnnDesc).dataType, hlens); - std::vector wlen(1, 0); + miopen::InlineVector wlen(1, 0); wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); @@ -935,13 +935,13 @@ struct verify_forward_train_lstm : verify_forward_lstm std::fill(cy.begin(), cy.end(), 0.); auto cy_dev = handle.Write(cy); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; miopen::TensorDescriptor hiddenDesc(miopen::deref(rnnDesc).dataType, hlens); - std::vector wlen(1, 0); + miopen::InlineVector wlen(1, 0); wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); @@ -1262,13 +1262,13 @@ verify_backward_data_lstm::gpu() const auto dyin_dev = handle.Write(dy); auto weights_dev = handle.Write(weights); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; miopen::TensorDescriptor hiddenDesc(miopen::deref(rnnDesc).dataType, hlens); - std::vector wlen(1, 0); + miopen::InlineVector wlen(1, 0); wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); @@ -1459,7 +1459,7 @@ std::vector verify_backward_weights_lstm::gpu() const auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); - std::vector hlens(3, 0); + miopen::InlineVector hlens(1, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; @@ -1670,7 +1670,7 @@ struct lstm_basic_driver : test_driver std::vector dcyin(hx_sz); size_t wei_bytes = 0; - std::vector inlens(2, 0); + miopen::InlineVector inlens(2, 0); inlens.at(0) = batchSeq.at(0); inlens.at(1) = inVecReal; auto firstInputDesc = miopen::TensorDescriptor(miopen::deref(rnnDesc).dataType, inlens); diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp index 231b635a63..31237300bf 100644 --- a/test/pooling_common.hpp +++ b/test/pooling_common.hpp @@ -692,7 +692,8 @@ struct pooling_driver : test_driver } } - auto input_desc = miopen::TensorDescriptor(this->type, in_shape); + auto input_desc = miopen::TensorDescriptor( + this->type, miopen::InlineVector(in_shape.begin(), in_shape.end())); if(spt_dim != 2 && spt_dim != 3) { diff --git a/test/rnn_seq_api.hpp b/test/rnn_seq_api.hpp index 81077aa26b..3e538b92bc 100644 --- a/test/rnn_seq_api.hpp +++ b/test/rnn_seq_api.hpp @@ -1397,7 +1397,7 @@ inline size_t get_RNN_params_byteSize(miopen::Handle& handle, miopen::SeqTensorDescriptor& inTensor) { auto& in_lens = inTensor.GetLengths(); - const std::vector in_dims = {in_lens[0], in_lens[2]}; + const miopen::InlineVector in_dims = {in_lens[0], in_lens[2]}; miopen::TensorDescriptor baseInputDesc(rnnDesc.dataType, in_dims); size_t wei_bytes = 0; diff --git a/test/rnn_util.hpp b/test/rnn_util.hpp index 2c95b7ec56..b4f7d02039 100644 --- a/test/rnn_util.hpp +++ b/test/rnn_util.hpp @@ -71,7 +71,7 @@ template inline void HiddenTensorReorder(const std::vector& src_array, std::vector& dst_array, const std::vector& batch_order, - const std::vector hid_len, + const miopen::InlineVector hid_len, bool is_dst_direct_order) { const size_t copy_size = hid_len[2]; diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp index 3a9c427eac..41eecb76c8 100644 --- a/test/rnn_vanilla_common.hpp +++ b/test/rnn_vanilla_common.hpp @@ -1458,13 +1458,13 @@ struct verify_forward_infer_rnn std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; miopen::TensorDescriptor hiddenDesc(miopen::deref(rnnDesc).dataType, hlens); - std::vector wlen(1, 0); + miopen::InlineVector wlen(1, 0); wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); @@ -1734,13 +1734,13 @@ struct verify_forward_train_rnn std::fill(hy.begin(), hy.end(), 0.); auto hy_dev = handle.Write(hy); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; miopen::TensorDescriptor hiddenDesc(miopen::deref(rnnDesc).dataType, hlens); - std::vector wlen(1, 0); + miopen::InlineVector wlen(1, 0); wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); @@ -2017,13 +2017,13 @@ struct verify_backward_data_rnn auto weights_dev = handle.Write(weights); // auto hx_dev = handle.Write(initHidden); - std::vector hlens(3, 0); + miopen::InlineVector hlens(1, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; miopen::TensorDescriptor hiddenDesc(miopen::deref(rnnDesc).dataType, hlens); - std::vector wlen(1, 0); + miopen::InlineVector wlen(1, 0); wlen[0] = weights.size(); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, wlen); @@ -2270,7 +2270,7 @@ struct verify_backward_weights_rnn auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); - std::vector hlens(3, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; @@ -2488,7 +2488,7 @@ struct rnn_basic_vanilla_driver : test_driver dhyin.resize(hx_sz); size_t wei_bytes = 0; - std::vector inlens(2, 0); + miopen::InlineVector inlens(2, 0); inlens.at(0) = batchSeq.at(0); inlens.at(1) = inVecReal; auto firstInputDesc = miopen::TensorDescriptor(miopen::deref(rnnDesc).dataType, inlens); diff --git a/test/tensor_holder.hpp b/test/tensor_holder.hpp index ff9566fe6c..10fa713c11 100644 --- a/test/tensor_holder.hpp +++ b/test/tensor_holder.hpp @@ -161,25 +161,63 @@ struct tensor #endif template - tensor(const std::vector& dims) : desc(miopen_type{}, dims), data(desc.GetElementSpace()) + tensor(const std::vector& dims) + : desc(miopen_type{}, miopen::InlineVector(dims.begin(), dims.end())), + data(desc.GetElementSpace()) { } template tensor(const std::vector& dims, const std::vector& strides) - : desc(miopen_type{}, dims, strides), data(desc.GetElementSpace()) + : desc(miopen_type{}, + miopen::InlineVector(dims.begin(), dims.end()), + miopen::InlineVector(strides.begin(), strides.end())), + data(desc.GetElementSpace()) { assert(dims.size() == strides.size()); } template tensor(miopenTensorLayout_t layout, const std::vector& dims) - : desc(miopen_type{}, layout, dims), data(desc.GetElementSpace()) + : desc(miopen_type{}, layout, miopen::InlineVector(dims.begin(), dims.end())), + data(desc.GetElementSpace()) { } template tensor(miopenTensorLayout_t layout, const std::vector& dims, const std::vector& strides) + : desc(miopen_type{}, + layout, + miopen::InlineVector(dims.begin(), dims.end()), + miopen::InlineVector(strides.begin(), strides.end())), + data(desc.GetElementSpace()) + { + assert(dims.size() == strides.size()); + } + + template + tensor(const miopen::InlineVector& dims) + : desc(miopen_type{}, dims), data(desc.GetElementSpace()) + { + } + + template + tensor(const miopen::InlineVector& dims, const miopen::InlineVector& strides) + : desc(miopen_type{}, dims, strides), data(desc.GetElementSpace()) + { + assert(dims.size() == strides.size()); + } + + template + tensor(miopenTensorLayout_t layout, const miopen::InlineVector& dims) + : desc(miopen_type{}, layout, dims), data(desc.GetElementSpace()) + { + } + + template + tensor(miopenTensorLayout_t layout, + const miopen::InlineVector& dims, + const miopen::InlineVector& strides) : desc(miopen_type{}, layout, dims, strides), data(desc.GetElementSpace()) { assert(dims.size() == strides.size()); @@ -396,15 +434,19 @@ void serialize(std::istream& s, tensor& x) serialize(s, lens); std::vector strides; serialize(s, strides); - x.desc = miopen::TensorDescriptor{miopen_type{}, lens, strides}; + x.desc = + miopen::TensorDescriptor{miopen_type{}, + miopen::InlineVector(lens.begin(), lens.end()), + miopen::InlineVector(strides.begin(), strides.end())}; serialize(s, x.data); } template void serialize(std::ostream& s, const tensor& x) { - const auto& lens = x.desc.GetLengths(); - const auto& strides = x.desc.GetStrides(); + const std::vector lens(x.desc.GetLengths().begin(), x.desc.GetLengths().end()); + const std::vector strides(x.desc.GetStrides().begin(), x.desc.GetStrides().end()); + // how to make InlineVector serializable? serialize(s, lens); serialize(s, strides); serialize(s, x.data); diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp index 1cfd41936d..f0722d0458 100644 --- a/test/tensor_reorder.cpp +++ b/test/tensor_reorder.cpp @@ -331,12 +331,12 @@ struct tensor_reorder_driver : tensor_reorder_base_driver uint32_t order_2, uint32_t order_3) { int tensor_sz = dim_0 * dim_1 * dim_2 * dim_3; - std::vector tensor_len({static_cast(dim_0), - static_cast(dim_1), - static_cast(dim_2), - static_cast(dim_3)}); + miopen::InlineVector tensor_len({static_cast(dim_0), + static_cast(dim_1), + static_cast(dim_2), + static_cast(dim_3)}); - std::vector tensor_strides; + miopen::InlineVector tensor_strides; std::string layout_default = miopen::tensor_layout_get_default(4); std::string layout_string = miopen::TensorDescriptor::LayoutEnumToStr(miopenTensorNCHW); diff --git a/test/tensor_transform.cpp b/test/tensor_transform.cpp index c67491b5e6..ee83b0f2d4 100644 --- a/test/tensor_transform.cpp +++ b/test/tensor_transform.cpp @@ -367,12 +367,14 @@ struct tensor_transform_driver : test_driver // Test tensor layout transform srcSuper_pad = tensor{srcLens}.generate(tensor_elem_gen_integer{max_value}); dstSuper_depad = tensor{srcLens}.generate(tensor_elem_gen_integer{max_value}); - srcDesc = miopen::TensorDescriptor(this->type, srcLens); + srcDesc = miopen::TensorDescriptor( + this->type, miopen::InlineVector(srcLens.begin(), srcLens.end())); srcLens[1] = (srcLens[1] % 4 == 0) ? srcLens[1] : ((srcLens[1] + 3) / 4) * 4; dstSuper_pad = tensor{srcLens}.generate(tensor_elem_gen_integer{max_value}); srcSuper_depad = tensor{srcLens}.generate(tensor_elem_gen_integer{max_value}); - dstDesc = miopen::TensorDescriptor(this->type, srcLens); + dstDesc = miopen::TensorDescriptor( + this->type, miopen::InlineVector(srcLens.begin(), srcLens.end())); if(srcDesc.GetLengths().size() == dstDesc.GetLengths().size()) { @@ -397,17 +399,23 @@ struct tensor_transform_driver : test_driver printf("\n DST: \n"); show_tensor(super_dst); #endif - std::vector superStrides_src = super_src.desc.GetStrides(); - std::vector superStrides_dst = super_dst.desc.GetStrides(); - std::vector subStrides_src(superStrides_src.begin() + - (super_src.desc.GetNumDims() - subLens.size()), - superStrides_src.end()); - std::vector subStrides_dst(superStrides_dst.begin() + - (super_dst.desc.GetNumDims() - subLens.size()), - superStrides_dst.end()); - - subDesc_src = miopen::TensorDescriptor(this->type, subLens, subStrides_src); - subDesc_dst = miopen::TensorDescriptor(this->type, subLens, subStrides_dst); + miopen::InlineVector superStrides_src = super_src.desc.GetStrides(); + miopen::InlineVector superStrides_dst = super_dst.desc.GetStrides(); + miopen::InlineVector subStrides_src( + superStrides_src.begin() + (super_src.desc.GetNumDims() - subLens.size()), + superStrides_src.end()); + miopen::InlineVector subStrides_dst( + superStrides_dst.begin() + (super_dst.desc.GetNumDims() - subLens.size()), + superStrides_dst.end()); + + subDesc_src = + miopen::TensorDescriptor(this->type, + miopen::InlineVector(srcLens.begin(), srcLens.end()), + subStrides_src); + subDesc_dst = + miopen::TensorDescriptor(this->type, + miopen::InlineVector(srcLens.begin(), srcLens.end()), + subStrides_dst); verify_equals(verify_tensor_transform_scale{ super_src, subDesc_src, super_dst, subDesc_dst, offset, offset, T(alpha), T(beta)}); From 7158bbfa75c4dc33bc72c98dcba6b36e4cee4f1c Mon Sep 17 00:00:00 2001 From: novakovicdj Date: Tue, 11 Feb 2025 17:45:49 +0200 Subject: [PATCH 2/6] forgotten change --- src/include/miopen/rnn/algorithms/dynamic_algo_utils.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/miopen/rnn/algorithms/dynamic_algo_utils.hpp b/src/include/miopen/rnn/algorithms/dynamic_algo_utils.hpp index 2b3cdcf8ee..b77718b921 100644 --- a/src/include/miopen/rnn/algorithms/dynamic_algo_utils.hpp +++ b/src/include/miopen/rnn/algorithms/dynamic_algo_utils.hpp @@ -145,7 +145,7 @@ class MaskedPow2Range } // namespace rnn_dynamic -inline std::vector roundedDynamicLengths(const SeqTensorDescriptor& desc) +inline miopen::InlineVector roundedDynamicLengths(const SeqTensorDescriptor& desc) { auto src_lens = desc.GetLengths(); auto real_seq_len = src_lens[1]; From 2ce03ffc2a22668167a5033246bec1fc07f3d357 Mon Sep 17 00:00:00 2001 From: novakovicdj Date: Wed, 12 Feb 2025 12:43:46 +0200 Subject: [PATCH 3/6] remove InlineVector perf tests --- test/gtest/inline_vector_basic_ops.cpp | 146 ------------------------- 1 file changed, 146 deletions(-) diff --git a/test/gtest/inline_vector_basic_ops.cpp b/test/gtest/inline_vector_basic_ops.cpp index b8cf9dbd90..0527ff2f85 100644 --- a/test/gtest/inline_vector_basic_ops.cpp +++ b/test/gtest/inline_vector_basic_ops.cpp @@ -240,149 +240,3 @@ TEST(CPU_InlineVectorInsert_NONE, Test) EXPECT_EQ(iv13_4[i], v13_4[i]); } } - -#include - -TEST(CPU_InlineVectorPerf1_NONE, Test) -{ - std::vector iv_times; - std::vector v_times; - - for(int i = 0; i < 1000; i++) - { - auto start = std::chrono::steady_clock::now(); - miopen::InlineVector iv{1, 2, 3, 4, 5}; - auto end = std::chrono::steady_clock::now(); - auto elapsed = - std::chrono::duration_cast>(end - start) - .count(); - iv_times.push_back(elapsed); - - auto start1 = std::chrono::steady_clock::now(); - std::vector v{1, 2, 3, 4, 5}; - auto end1 = std::chrono::steady_clock::now(); - auto elapsed1 = - std::chrono::duration_cast>(end1 - start1) - .count(); - v_times.push_back(elapsed1); - } - - std::cout << "IV min: " << *(std::min_element(iv_times.begin(), iv_times.end())) - << " avg: " << std::reduce(iv_times.begin(), iv_times.end()) / 1000.0 << std::endl; - std::cout << "VE min: " << *(std::min_element(v_times.begin(), v_times.end())) - << " avg: " << std::reduce(v_times.begin(), v_times.end()) / 1000.0 << std::endl; -} - -TEST(CPU_InlineVectorPerf2_NONE, Test) -{ - std::vector iv_times; - std::vector v_times; - - std::initializer_list il{1, 2, 3, 4, 5}; - - for(int i = 0; i < 1000; i++) - { - auto start = std::chrono::steady_clock::now(); - miopen::InlineVector iv(il); - auto end = std::chrono::steady_clock::now(); - auto elapsed = - std::chrono::duration_cast>(end - start) - .count(); - iv_times.push_back(elapsed); - - auto start1 = std::chrono::steady_clock::now(); - std::vector v(il); - auto end1 = std::chrono::steady_clock::now(); - auto elapsed1 = - std::chrono::duration_cast>(end1 - start1) - .count(); - v_times.push_back(elapsed1); - } - - std::cout << "IV min: " << *(std::min_element(iv_times.begin(), iv_times.end())) - << " avg: " << std::reduce(iv_times.begin(), iv_times.end()) / 1000.0 << std::endl; - std::cout << "VE min: " << *(std::min_element(v_times.begin(), v_times.end())) - << " avg: " << std::reduce(v_times.begin(), v_times.end()) / 1000.0 << std::endl; -} - -TEST(CPU_InlineVectorPerf3_NONE, Test) -{ - std::vector iv_times; - std::vector v_times; - - std::initializer_list il{1, 2, 3, 4, 5}; - size_t sum = 0; - - for(int i = 0; i < 1000; i++) - { - sum = 0; - auto start = std::chrono::steady_clock::now(); - miopen::InlineVector iv(il.begin(), il.end()); - for(int j = 0; j < iv.size(); j++) - { - sum += iv[j]; - } - auto end = std::chrono::steady_clock::now(); - auto elapsed = - std::chrono::duration_cast>(end - start) - .count(); - iv_times.push_back(elapsed); - sum = 0; - auto start1 = std::chrono::steady_clock::now(); - std::vector v(il.begin(), il.end()); - for(int j = 0; j < v.size(); j++) - { - sum += v[j]; - } - auto end1 = std::chrono::steady_clock::now(); - auto elapsed1 = - std::chrono::duration_cast>(end1 - start1) - .count(); - v_times.push_back(elapsed1); - } - - std::cout << "IV min: " << *(std::min_element(iv_times.begin(), iv_times.end())) - << " avg: " << std::reduce(iv_times.begin(), iv_times.end()) / 1000.0 << std::endl; - std::cout << "VE min: " << *(std::min_element(v_times.begin(), v_times.end())) - << " avg: " << std::reduce(v_times.begin(), v_times.end()) / 1000.0 << std::endl; -} - -TEST(CPU_InlineVectorPerf4_NONE, Test) -{ - std::vector iv_times; - std::vector v_times; - - std::initializer_list il{1, 2, 3, 4, 5}; - size_t sum = 0; - - for(int i = 0; i < 1000; i++) - { - sum = 0; - auto start = std::chrono::steady_clock::now(); - miopen::InlineVector iv(il); - auto first_not_one = std::find_if(iv.rbegin(), iv.rend(), [](int j) { return j != 1; }); - auto d = std::distance(iv.begin(), first_not_one.base()); - int work_per_wg = std::accumulate(iv.begin() + d, iv.end(), 1, std::multiplies()); - auto end = std::chrono::steady_clock::now(); - auto elapsed = - std::chrono::duration_cast>(end - start) - .count(); - iv_times.push_back(elapsed); - sum = 0; - auto start1 = std::chrono::steady_clock::now(); - std::vector v(il); - auto first_not_one1 = std::find_if(v.rbegin(), v.rend(), [](int j) { return j != 1; }); - auto d1 = std::distance(v.begin(), first_not_one1.base()); - int work_per_wg1 = std::accumulate(v.begin() + d1, v.end(), 1, std::multiplies()); - auto end1 = std::chrono::steady_clock::now(); - auto elapsed1 = - std::chrono::duration_cast>(end1 - start1) - .count(); - v_times.push_back(elapsed1); - } - - std::cout << "IV min: " << *(std::min_element(iv_times.begin(), iv_times.end())) - << " avg: " << std::reduce(iv_times.begin(), iv_times.end()) / 1000.0 << std::endl; - std::cout << "VE min: " << *(std::min_element(v_times.begin(), v_times.end())) - << " avg: " << std::reduce(v_times.begin(), v_times.end()) / 1000.0 << std::endl; -} From 37ccc64d25e4fd7f7a8aa907173f9a7072d5e6ce Mon Sep 17 00:00:00 2001 From: novakovicdj Date: Wed, 12 Feb 2025 13:12:35 +0200 Subject: [PATCH 4/6] code tyding --- src/include/miopen/inline_vector.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/include/miopen/inline_vector.hpp b/src/include/miopen/inline_vector.hpp index 49d853adf8..4d249a5e53 100644 --- a/src/include/miopen/inline_vector.hpp +++ b/src/include/miopen/inline_vector.hpp @@ -266,7 +266,6 @@ class InlineVector int idx = std::distance(begin(), pos); if(idx < 0 || idx > real_size) { - std::cout << idx << " " << real_size << std::endl; MIOPEN_THROW("Cannot insert data at this position"); } real_size += 1; From 039a532dfe03d24b0cbd0ead269fcfe63611f479 Mon Sep 17 00:00:00 2001 From: novakovicdj Date: Wed, 12 Feb 2025 13:49:29 +0200 Subject: [PATCH 5/6] fix typos --- src/include/miopen/inline_vector.hpp | 19 +++++++++++++------ src/ocl/dropoutocl.cpp | 8 ++++---- test/lstm_common.hpp | 2 +- test/rnn_vanilla_common.hpp | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/include/miopen/inline_vector.hpp b/src/include/miopen/inline_vector.hpp index 4d249a5e53..b0c9fe3bfc 100644 --- a/src/include/miopen/inline_vector.hpp +++ b/src/include/miopen/inline_vector.hpp @@ -331,14 +331,21 @@ class InlineVector template std::ostream& operator<<(std::ostream& os, const InlineVector& iv) { - // TODO: check if this function is correct - os << "{"; - for(int i = 0; i < iv.size() - 1; i++) + if(iv.empty()) { - os << iv[i] << ", "; + os << "{}"; } - os << iv.back(); - os << "}"; + else + { + os << "{"; + for(int i = 0; i < iv.size() - 1; i++) + { + os << iv[i] << ", "; + } + os << iv.back(); + os << "}"; + } + return os; } diff --git a/src/ocl/dropoutocl.cpp b/src/ocl/dropoutocl.cpp index d63b45e80b..bf571093a1 100644 --- a/src/ocl/dropoutocl.cpp +++ b/src/ocl/dropoutocl.cpp @@ -240,10 +240,10 @@ void DropoutDescriptor::Dropout(const Handle& handle, } // support up to 5D tensor - miopen::InlineVector in_len{1, 1, 1, 1, 1}; - miopen::InlineVector in_str{1, 1, 1, 1, 1}; - miopen::InlineVector out_len{1, 1, 1, 1, 1}; - miopen::InlineVector out_str{1, 1, 1, 1, 1}; + miopen::InlineVector in_len(5, 1); + miopen::InlineVector in_str(5, 1); + miopen::InlineVector out_len(5, 1); + miopen::InlineVector out_str(5, 1); SquashPairedTensor(xDesc.GetLengths(), xDesc.GetStrides(), diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp index a30f460927..e829eed71b 100644 --- a/test/lstm_common.hpp +++ b/test/lstm_common.hpp @@ -1459,7 +1459,7 @@ std::vector verify_backward_weights_lstm::gpu() const auto dweights_dev = handle.Write(dweights); miopen::TensorDescriptor weightDesc(miopen::deref(rnnDesc).dataType, {weightSize}); - miopen::InlineVector hlens(1, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * (dirMode != 0 ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; diff --git a/test/rnn_vanilla_common.hpp b/test/rnn_vanilla_common.hpp index 41eecb76c8..7454eb7323 100644 --- a/test/rnn_vanilla_common.hpp +++ b/test/rnn_vanilla_common.hpp @@ -2017,7 +2017,7 @@ struct verify_backward_data_rnn auto weights_dev = handle.Write(weights); // auto hx_dev = handle.Write(initHidden); - miopen::InlineVector hlens(1, 0); + miopen::InlineVector hlens(3, 0); hlens[0] = nLayers * ((dirMode != 0) ? 2 : 1); hlens[1] = batch_seq[0]; hlens[2] = hiddenSize; From d3c8f37b4c48c6fbec9f66a3e43f99f749cb90f2 Mon Sep 17 00:00:00 2001 From: novakovicdj Date: Thu, 13 Feb 2025 11:46:17 +0200 Subject: [PATCH 6/6] additional changes from vector to InlineVector for drivers --- driver/CBAInferFusion_driver.hpp | 26 ++++++++--------- driver/InputFlags.cpp | 8 +++--- driver/InputFlags.hpp | 9 +++--- driver/activ_driver.hpp | 8 +++--- driver/adam_driver.hpp | 8 +++--- driver/addlayernorm_driver.hpp | 4 +-- driver/cat_driver.hpp | 6 ++-- driver/conv_driver.hpp | 40 +++++++++++++------------- driver/ctc_verify.hpp | 12 ++++---- driver/dropout_driver.hpp | 10 ++++--- driver/dropout_gpu_emulator.hpp | 32 ++++++++++----------- driver/getitem_driver.hpp | 6 ++-- driver/glu_driver.hpp | 4 +-- driver/groupnorm_driver.hpp | 16 +++++------ driver/kthvalue_driver.hpp | 9 +++--- driver/layernorm_driver.hpp | 4 +-- driver/lrn_driver.hpp | 8 +++--- driver/miopen_Reduction.hpp | 8 +++--- driver/multimarginloss_driver.hpp | 20 ++++++------- driver/pool_driver.hpp | 6 ++-- driver/prelu_driver.hpp | 2 +- driver/reduce_driver.hpp | 10 +++---- driver/reducecalculation_driver.hpp | 2 +- driver/reduceextreme_driver.hpp | 4 +-- driver/rnn_seq_driver.hpp | 2 +- driver/rope_driver.hpp | 2 +- driver/softmarginloss_driver.hpp | 8 +++--- driver/softmax_driver.hpp | 8 +++--- driver/t5layernorm_driver.hpp | 4 +-- driver/tensorop_driver.hpp | 6 ++-- driver/transformers_adam_w_driver.hpp | 9 +++--- src/include/miopen/inline_vector.hpp | 21 +++++++++++++- test/gtest/inline_vector_basic_ops.cpp | 33 +++++++++++++++++++++ 33 files changed, 207 insertions(+), 148 deletions(-) diff --git a/driver/CBAInferFusion_driver.hpp b/driver/CBAInferFusion_driver.hpp index 7c62a7bc91..2462a7751b 100644 --- a/driver/CBAInferFusion_driver.hpp +++ b/driver/CBAInferFusion_driver.hpp @@ -106,9 +106,9 @@ class CBAInferFusionDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); - std::vector GetOutputTensorLengths(); - std::vector GetWeightTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetOutputTensorLengths(); + miopen::InlineVector GetWeightTensorLengthsFromCmdLine(); std::vector GetModeFromCmdLine(); int SetActivationDescriptorFromCmdLineArgs(); @@ -309,14 +309,14 @@ int CBAInferFusionDriver::SetActivationDescriptorFromCmdLineArgs() } template -std::vector CBAInferFusionDriver::GetWeightTensorLengthsFromCmdLine() +miopen::InlineVector CBAInferFusionDriver::GetWeightTensorLengthsFromCmdLine() { int wei_n = inflags.GetValueInt("out_channels"); int wei_c = inflags.GetValueInt("in_channels"); int wei_h = inflags.GetValueInt("fil_h"); int wei_w = inflags.GetValueInt("fil_w"); - return std::vector({wei_n, wei_c, wei_h, wei_w}); + return miopen::InlineVector({wei_n, wei_c, wei_h, wei_w}); } template @@ -327,8 +327,8 @@ int CBAInferFusionDriver::GetandSetData() SetConvDescriptorFromCmdLineArgs(); SetActivationDescriptorFromCmdLineArgs(); - std::vector in_len = GetInputTensorLengthsFromCmdLine(); - std::vector wei_len = GetWeightTensorLengthsFromCmdLine(); + miopen::InlineVector in_len = GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector wei_len = GetWeightTensorLengthsFromCmdLine(); SetTensor4d(inputTensor, in_len, data_type); @@ -336,7 +336,7 @@ int CBAInferFusionDriver::GetandSetData() SetTensor4d(weightTensor, wei_len, data_type); - std::vector out_len{}; + miopen::InlineVector out_len{}; if(fusion_mode != miopen_fusion_na) { out_len = GetOutputTensorLengths(); @@ -349,7 +349,7 @@ int CBAInferFusionDriver::GetandSetData() if(bias_mode) { - std::vector b_len{1, out_len[1], 1, 1}; + miopen::InlineVector b_len{1, out_len[1], 1, 1}; SetTensor4d(biasTensor, b_len, data_type); } @@ -419,13 +419,13 @@ int CBAInferFusionDriver::AddCmdLineArgs() } template -std::vector CBAInferFusionDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector CBAInferFusionDriver::GetInputTensorLengthsFromCmdLine() { int in_n = inflags.GetValueInt("batchsize"); int in_c = inflags.GetValueInt("in_channels"); int in_h = inflags.GetValueInt("in_h"); int in_w = inflags.GetValueInt("in_w"); - return std::vector({in_n, in_c, in_h, in_w}); + return miopen::InlineVector({in_n, in_c, in_h, in_w}); } template @@ -502,11 +502,11 @@ int CBAInferFusionDriver::SetConvDescriptorFromCmdLineArgs() } template -std::vector CBAInferFusionDriver::GetOutputTensorLengths() +miopen::InlineVector CBAInferFusionDriver::GetOutputTensorLengths() { int n, c, h, w; miopenGetConvolutionForwardOutputDim(convDesc, inputTensor, weightTensor, &n, &c, &h, &w); - return std::vector({n, c, h, w}); + return miopen::InlineVector({n, c, h, w}); } template diff --git a/driver/InputFlags.cpp b/driver/InputFlags.cpp index 98b3d85b6a..5fbe02452b 100644 --- a/driver/InputFlags.cpp +++ b/driver/InputFlags.cpp @@ -258,7 +258,7 @@ TensorParameters InputFlags::GetValueTensor(const std::string& long_name) const return {}; auto parse = [](auto line) { - auto ret = std::vector{}; + auto ret = miopen::InlineVector{}; const auto strs = miopen::SplitDelim(line, 'x'); for(auto&& str : strs) { @@ -280,7 +280,7 @@ TensorParameters InputFlags::GetValueTensor(const std::string& long_name) const return {lens}; auto layout = std::string{}; - auto strides = std::vector{}; + auto strides = miopen::InlineVector{}; if(std::isdigit(components[1][0])) strides = parse(components[1]); @@ -302,7 +302,7 @@ TensorParametersUint64 InputFlags::GetValueTensorUint64(const std::string& long_ return {}; auto parse = [](auto line) { - auto ret = std::vector{}; + auto ret = miopen::InlineVector{}; const auto strs = miopen::SplitDelim(line, 'x'); for(auto&& str : strs) { @@ -324,7 +324,7 @@ TensorParametersUint64 InputFlags::GetValueTensorUint64(const std::string& long_ return {lens}; auto layout = std::string{}; - auto strides = std::vector{}; + auto strides = miopen::InlineVector{}; if(std::isdigit(components[1][0])) strides = parse(components[1]); diff --git a/driver/InputFlags.hpp b/driver/InputFlags.hpp index 43f7c3a206..0d2b4822a2 100644 --- a/driver/InputFlags.hpp +++ b/driver/InputFlags.hpp @@ -27,6 +27,7 @@ #define MIOPEN_INPUT_FLAGS_HPP_ #include +#include #include @@ -46,8 +47,8 @@ struct Input struct TensorParameters { - std::vector lengths = {}; - std::vector strides = {}; + miopen::InlineVector lengths = {}; + miopen::InlineVector strides = {}; std::string layout = ""; TensorParameters FillMissing(const TensorParameters& other) const @@ -65,8 +66,8 @@ struct TensorParameters struct TensorParametersUint64 { - std::vector lengths = {}; - std::vector strides = {}; + miopen::InlineVector lengths = {}; + miopen::InlineVector strides = {}; std::string layout = ""; TensorParametersUint64 FillMissing(const TensorParametersUint64& other) const diff --git a/driver/activ_driver.hpp b/driver/activ_driver.hpp index d068e769cc..96b8c1ac64 100644 --- a/driver/activ_driver.hpp +++ b/driver/activ_driver.hpp @@ -65,7 +65,7 @@ class ActivationDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); int SetActivationDescriptorFromCmdLineArgs(); @@ -130,7 +130,7 @@ int ActivationDriver::ParseCmdLineArgs(int argc, char* argv[]) template int ActivationDriver::GetandSetData() { - std::vector in_len = GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector in_len = GetInputTensorLengthsFromCmdLine(); SetTensor4d(inputTensor, in_len, data_type); @@ -166,14 +166,14 @@ int ActivationDriver::AddCmdLineArgs() } template -std::vector ActivationDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector ActivationDriver::GetInputTensorLengthsFromCmdLine() { int in_n = inflags.GetValueInt("batchsize"); int in_c = inflags.GetValueInt("in_channels"); int in_h = inflags.GetValueInt("in_h"); int in_w = inflags.GetValueInt("in_w"); - return std::vector({in_n, in_c, in_h, in_w}); + return miopen::InlineVector({in_n, in_c, in_h, in_w}); } template diff --git a/driver/adam_driver.hpp b/driver/adam_driver.hpp index 0664bc59a7..ca61ba08ca 100644 --- a/driver/adam_driver.hpp +++ b/driver/adam_driver.hpp @@ -148,7 +148,7 @@ class AdamDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); int AllocateBuffersAndCopy() override; @@ -264,7 +264,7 @@ int AdamDriver::GetandSetData() found_inf = inflags.GetValueInt("found_inf"); } - std::vector one_size = {1}; + miopen::InlineVector one_size = {1}; SetTensorNd(paramDesc, param_len, data_type); SetTensorNd(paramOutDesc, param_len, data_type); SetTensorNd(gradDesc, param_len, grad_type); @@ -318,9 +318,9 @@ int AdamDriver::AddCmdLineArgs() } template -std::vector AdamDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector AdamDriver::GetInputTensorLengthsFromCmdLine() { - std::vector ret; + miopen::InlineVector ret; auto tensor = inflags.GetValueTensor("dims"); if(!tensor.lengths.empty()) return tensor.lengths; diff --git a/driver/addlayernorm_driver.hpp b/driver/addlayernorm_driver.hpp index 73cdd6110d..8ca20e238f 100644 --- a/driver/addlayernorm_driver.hpp +++ b/driver/addlayernorm_driver.hpp @@ -210,13 +210,13 @@ int AddLayerNormDriver::GetandSetData() MIOPEN_THROW_IF(dim < 0 || static_cast(dim) >= in_len.size(), "normalized_dim out of range"); - std::vector inner_len; + miopen::InlineVector inner_len; if(dim == in_len.size()) inner_len = {1}; else inner_len = {in_len.begin() + dim, in_len.end()}; - std::vector outer_len; + miopen::InlineVector outer_len; if(dim == 0) outer_len = {1}; else diff --git a/driver/cat_driver.hpp b/driver/cat_driver.hpp index 3254b5f3bc..5c6328a814 100644 --- a/driver/cat_driver.hpp +++ b/driver/cat_driver.hpp @@ -106,7 +106,7 @@ class CatDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector> GetInputTensorLengthsFromCmdLine(); + std::vector> GetInputTensorLengthsFromCmdLine(); int AllocateBuffersAndCopy() override; @@ -203,10 +203,10 @@ int CatDriver::AddCmdLineArgs() } template -std::vector> CatDriver::GetInputTensorLengthsFromCmdLine() +std::vector> CatDriver::GetInputTensorLengthsFromCmdLine() { const int max_input_count = 8; - std::vector> ret; + std::vector> ret; std::string name = "input"; for(int i = 1; i < max_input_count; i++) { diff --git a/driver/conv_driver.hpp b/driver/conv_driver.hpp index 6fde50d7a2..fce74fb6b9 100644 --- a/driver/conv_driver.hpp +++ b/driver/conv_driver.hpp @@ -284,13 +284,13 @@ class ConvDriver : public Driver int GetandSetData() override; bool TensorsCasted() const; - std::vector GetInputTensorLengthsFromCmdLine(); - std::vector GetWeightTensorLengthsFromCmdLine(); - std::vector GetBiasTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetWeightTensorLengthsFromCmdLine(); + miopen::InlineVector GetBiasTensorLengthsFromCmdLine(); int SetConvDescriptorFromCmdLineArgs(); - std::vector GetOutputTensorLengths(); + miopen::InlineVector GetOutputTensorLengths(); int AllocateBuffersAndCopy() override; @@ -769,8 +769,8 @@ bool ConvDriver::TensorsCasted() const template int ConvDriver::GetandSetData() { - std::vector in_len = GetInputTensorLengthsFromCmdLine(); - std::vector wei_len = GetWeightTensorLengthsFromCmdLine(); + miopen::InlineVector in_len = GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector wei_len = GetWeightTensorLengthsFromCmdLine(); SetTensorNd(inputTensor, in_len, inflags.GetValueStr("in_layout"), data_type); if(inflags.GetValueStr("in_cast_type") != "-1") @@ -792,7 +792,7 @@ int ConvDriver::GetandSetData() if(IsInputTensorTransform()) { - std::vector in_len_vect4(in_len.begin(), in_len.end()), + miopen::InlineVector in_len_vect4(in_len.begin(), in_len.end()), wei_len_vect4(wei_len.begin(), wei_len.end()); in_len_vect4[1] = ((in_len[1] + 3) / 4) * 4; SetTensorNd(inputTensor_vect4, in_len_vect4, data_type); @@ -801,7 +801,7 @@ int ConvDriver::GetandSetData() } SetConvDescriptorFromCmdLineArgs(); - std::vector out_len = GetOutputTensorLengths(); + miopen::InlineVector out_len = GetOutputTensorLengths(); if(miopen::deref(inputTensor).GetLayoutEnum() == miopenTensorNCHWc4 || miopen::deref(inputTensor).GetLayoutEnum() == miopenTensorNCHWc8) { @@ -821,15 +821,15 @@ int ConvDriver::GetandSetData() if(inflags.GetValueInt("bias") != 0) { - std::vector bias_len = GetBiasTensorLengthsFromCmdLine(); + miopen::InlineVector bias_len = GetBiasTensorLengthsFromCmdLine(); SetTensorNd(biasTensor, bias_len, data_type); } if(warmup_enabled) { AutoMiopenWarmupMode warmupMode; - std::vector warmup_in_len = {1, 1, 16, 16}; // NCHW - std::vector warmup_wei_len = {1, 1, 1, 1}; // KCYX + miopen::InlineVector warmup_in_len = {1, 1, 16, 16}; // NCHW + miopen::InlineVector warmup_wei_len = {1, 1, 1, 1}; // KCYX SetTensorNd(warmupInputTensor, warmup_in_len, warmup_data_type); SetTensorNd(warmupWeightTensor, warmup_wei_len, warmup_data_type); @@ -851,7 +851,7 @@ int ConvDriver::GetandSetData() miopenSetConvolutionGroupCount(warmupConvDesc, group_count); int warmup_out_len_size = miopen::deref(warmupInputTensor).GetNumDims(); - std::vector warmup_out_len(warmup_out_len_size); + miopen::InlineVector warmup_out_len(warmup_out_len_size); miopenGetConvolutionNdForwardOutputDim(warmupConvDesc, warmupInputTensor, warmupWeightTensor, @@ -1000,9 +1000,9 @@ int ConvDriver::AddCmdLineArgs() } template -std::vector ConvDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector ConvDriver::GetInputTensorLengthsFromCmdLine() { - std::vector in_lens; + miopen::InlineVector in_lens; int spatial_dim = inflags.GetValueInt("spatial_dim"); in_lens.resize(2 + spatial_dim); @@ -1032,9 +1032,9 @@ std::vector ConvDriver::GetInputTensorLengthsFromCmdLine() } template -std::vector ConvDriver::GetWeightTensorLengthsFromCmdLine() +miopen::InlineVector ConvDriver::GetWeightTensorLengthsFromCmdLine() { - std::vector wei_lens; + miopen::InlineVector wei_lens; int spatial_dim = inflags.GetValueInt("spatial_dim"); wei_lens.resize(2 + spatial_dim); @@ -1086,11 +1086,11 @@ std::vector ConvDriver::GetWeightTensorLengthsFromCmdLine() } template -std::vector ConvDriver::GetBiasTensorLengthsFromCmdLine() +miopen::InlineVector ConvDriver::GetBiasTensorLengthsFromCmdLine() { int spatial_dim = inflags.GetValueInt("spatial_dim"); - std::vector bias_lens(2 + spatial_dim, 1); + miopen::InlineVector bias_lens(2 + spatial_dim, 1); bias_lens[1] = inflags.GetValueInt("out_channels"); @@ -1203,11 +1203,11 @@ int ConvDriver::SetConvDescriptorFromCmdLineArgs() } template -std::vector ConvDriver::GetOutputTensorLengths() +miopen::InlineVector ConvDriver::GetOutputTensorLengths() { int ndim = miopen::deref(inputTensor).GetNumDims(); - std::vector out_lens(ndim); + miopen::InlineVector out_lens(ndim); miopenGetConvolutionNdForwardOutputDim( convDesc, inputTensor, weightTensor, &ndim, out_lens.data()); diff --git a/driver/ctc_verify.hpp b/driver/ctc_verify.hpp index 5364625d73..3bbb418cc8 100644 --- a/driver/ctc_verify.hpp +++ b/driver/ctc_verify.hpp @@ -331,10 +331,10 @@ void ctc_softmaxlayer_gradient_log(std::vector& label, template void RunCTCLossCPUVerify(const int num_class, - std::vector probsSize, - std::vector probsStride, - std::vector gradientsSize, - std::vector gradientsStride, + miopen::InlineVector probsSize, + miopen::InlineVector probsStride, + miopen::InlineVector gradientsSize, + miopen::InlineVector gradientsStride, std::vector& probs, std::vector& labels, std::vector& labelLengths, @@ -493,8 +493,8 @@ void RunCTCLossCPUVerify(const int num_class, } template -void GetCTCLossWorkspaceSizeCPU(std::vector probsDesc, - std::vector gradientsDesc, +void GetCTCLossWorkspaceSizeCPU(miopen::InlineVector probsDesc, + miopen::InlineVector gradientsDesc, const int* labels, const int* labelLengths, const int* inputLengths, diff --git a/driver/dropout_driver.hpp b/driver/dropout_driver.hpp index 0215c8c64c..3f73c68361 100644 --- a/driver/dropout_driver.hpp +++ b/driver/dropout_driver.hpp @@ -70,7 +70,7 @@ class DropoutDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(std::string input_str); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(std::string input_str); int AllocateBuffersAndCopy() override; @@ -135,7 +135,8 @@ int DropoutDriver::ParseCmdLineArgs(int argc, char* argv[]) template int DropoutDriver::GetandSetData() { - std::vector in_len = GetInputTensorLengthsFromCmdLine(inflags.GetValueStr("input_dim")); + miopen::InlineVector in_len = + GetInputTensorLengthsFromCmdLine(inflags.GetValueStr("input_dim")); SetTensorNd(inputTensor, in_len, data_type); SetTensorNd(outputTensor, in_len, data_type); @@ -183,9 +184,10 @@ int DropoutDriver::AddCmdLineArgs() } template -std::vector DropoutDriver::GetInputTensorLengthsFromCmdLine(std::string input_str) +miopen::InlineVector +DropoutDriver::GetInputTensorLengthsFromCmdLine(std::string input_str) { - std::vector in_lens; + miopen::InlineVector in_lens; std::stringstream ss(input_str); int cont = 0; diff --git a/driver/dropout_gpu_emulator.hpp b/driver/dropout_gpu_emulator.hpp index d6ee776e56..1f6594ca14 100644 --- a/driver/dropout_gpu_emulator.hpp +++ b/driver/dropout_gpu_emulator.hpp @@ -68,14 +68,14 @@ static void InitKernelStateEmulator(std::vector& states, } template -inline void ExpandTensorDim(std::vector x_len, - std::vector x_str, - std::vector y_len, - std::vector y_str, - std::vector& in_len, - std::vector& in_str, - std::vector& out_len, - std::vector& out_str) +inline void ExpandTensorDim(miopen::InlineVector x_len, + miopen::InlineVector x_str, + miopen::InlineVector y_len, + miopen::InlineVector y_str, + miopen::InlineVector& in_len, + miopen::InlineVector& in_str, + miopen::InlineVector& out_len, + miopen::InlineVector& out_str) { auto itr_xl = x_len.end() - 1; auto itr_yl = y_len.end() - 1; @@ -151,10 +151,10 @@ void RunDropoutForwardEmulator(miopenHandle_t handle, } // support up to 5D tensor - std::vector in_len(5, 1); - std::vector in_str(5, 1); - std::vector out_len(5, 1); - std::vector out_str(5, 1); + miopen::InlineVector in_len(5, 1); + miopen::InlineVector in_str(5, 1); + miopen::InlineVector out_len(5, 1); + miopen::InlineVector out_str(5, 1); ExpandTensorDim(miopen::deref(inputTensor).GetLengths(), miopen::deref(inputTensor).GetStrides(), @@ -233,10 +233,10 @@ void RunDropoutBackwardEmulator(const miopenDropoutDescriptor_t dropoutDesc, } // support up to 5D tensor - std::vector in_len(5, 1); - std::vector in_str(5, 1); - std::vector out_len(5, 1); - std::vector out_str(5, 1); + miopen::InlineVector in_len(5, 1); + miopen::InlineVector in_str(5, 1); + miopen::InlineVector out_len(5, 1); + miopen::InlineVector out_str(5, 1); ExpandTensorDim(miopen::deref(inputTensor).GetLengths(), miopen::deref(inputTensor).GetStrides(), diff --git a/driver/getitem_driver.hpp b/driver/getitem_driver.hpp index c48c9a0520..3a4d0f45c3 100644 --- a/driver/getitem_driver.hpp +++ b/driver/getitem_driver.hpp @@ -283,7 +283,9 @@ int GetitemDriver::GetandSetData() { miopenTensorDescriptor_t indexDesc; miopenCreateTensorDescriptor(&indexDesc); - if(SetTensorNd(indexDesc, indexTensorLength, miopenInt32) != miopenStatusSuccess) + miopen::InlineVector indexTensorLength_iv(indexTensorLength.begin(), + indexTensorLength.end()); + if(SetTensorNd(indexDesc, indexTensorLength_iv, miopenInt32) != miopenStatusSuccess) MIOPEN_THROW("Error parsing indexs tensor: " + inflags.GetValueStr("indexs") + "."); indexDescs.push_back(indexDesc); } @@ -291,7 +293,7 @@ int GetitemDriver::GetandSetData() if(SetTensorNd(dxDesc, dxTensorParam.lengths, data_type) != miopenStatusSuccess) MIOPEN_THROW("Error parsing dinput tensor: " + inflags.GetValueStr("dinput") + "."); - std::vector error_length; + miopen::InlineVector error_length; error_length.push_back(indexCountParam); if(SetTensorNd(errorDesc, error_length, miopen_type{}) != miopenStatusSuccess) MIOPEN_THROW("Error making error tensor: " + inflags.GetValueStr("indexcount") + "."); diff --git a/driver/glu_driver.hpp b/driver/glu_driver.hpp index 38deb2d69e..79b38a2cb9 100644 --- a/driver/glu_driver.hpp +++ b/driver/glu_driver.hpp @@ -191,12 +191,12 @@ int GLUDriver::ParseCmdLineArgs(int argc, char* argv[]) template int GLUDriver::GetandSetData() { - std::vector in_len = inflags.GetValueTensor("dim_lengths").lengths; + miopen::InlineVector in_len = inflags.GetValueTensor("dim_lengths").lengths; dim = inflags.GetValueInt("dim_to_split"); SetTensorNd(inputTensor, in_len, data_type); - std::vector out_len; + miopen::InlineVector out_len; for(int i = 0; i < in_len.size(); i++) { diff --git a/driver/groupnorm_driver.hpp b/driver/groupnorm_driver.hpp index 15717aa776..c307b2f830 100644 --- a/driver/groupnorm_driver.hpp +++ b/driver/groupnorm_driver.hpp @@ -64,7 +64,7 @@ class GroupNormDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); int AllocateBuffersAndCopy() override; @@ -139,9 +139,9 @@ int GroupNormDriver::GetandSetData() eps = static_cast(inflags.GetValueDouble("eps")); mode = miopenNormMode_t(inflags.GetValueInt("mode")); - std::vector in_len = GetInputTensorLengthsFromCmdLine(); - std::vector weight_bias_len = {in_len[1]}; - std::vector mean_rstd_len = {in_len[0], num_groups}; + miopen::InlineVector in_len = GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector weight_bias_len = {in_len[1]}; + miopen::InlineVector mean_rstd_len = {in_len[0], num_groups}; SetTensorNd(inputDesc, in_len, data_type); SetTensorNd(weightDesc, weight_bias_len, data_type); @@ -178,7 +178,7 @@ int GroupNormDriver::AddCmdLineArgs() } template -std::vector GroupNormDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector GroupNormDriver::GetInputTensorLengthsFromCmdLine() { int in_n = inflags.GetValueInt("batchsize"); int in_c = inflags.GetValueInt("in_channels"); @@ -189,17 +189,17 @@ std::vector GroupNormDriver::GetInputTensorLengthsFromCmdLine() if((in_n != 0) && (in_c != 0) && (in_d != 0) && (in_h != 0) && (in_w != 0)) { dim_size = 5; - return std::vector({in_n, in_c, in_d, in_h, in_w}); + return miopen::InlineVector({in_n, in_c, in_d, in_h, in_w}); } else if((in_n != 0) && (in_c != 0) && (in_h != 0) && (in_w != 0)) { dim_size = 4; - return std::vector({in_n, in_c, in_h, in_w}); + return miopen::InlineVector({in_n, in_c, in_h, in_w}); } else if((in_n != 0) && (in_c != 0) && (in_w != 0)) { dim_size = 3; - return std::vector({in_n, in_c, in_w}); + return miopen::InlineVector({in_n, in_c, in_w}); } else { diff --git a/driver/kthvalue_driver.hpp b/driver/kthvalue_driver.hpp index 75f7e5b535..751dc53f73 100644 --- a/driver/kthvalue_driver.hpp +++ b/driver/kthvalue_driver.hpp @@ -102,7 +102,7 @@ class KthvalueDriver : public Driver data_type = miopen_type{}; } - std::vector ComputeStrides(std::vector input); + miopen::InlineVector ComputeStrides(miopen::InlineVector input); int AddCmdLineArgs() override; int ParseCmdLineArgs(int argc, char* argv[]) override; InputFlags& GetInputFlags() override { return inflags; } @@ -175,7 +175,7 @@ template int KthvalueDriver::GetandSetData() { auto inDims = inflags.GetValueTensor("dim-lengths").lengths; - std::vector inStride = ComputeStrides(inDims); + miopen::InlineVector inStride = ComputeStrides(inDims); auto outDims = inflags.GetValueTensor("dim-lengths").lengths; if(dim < 0) @@ -202,11 +202,12 @@ int KthvalueDriver::GetandSetData() // Equivalent to: tensor.tranpose(0, -1).contiguous().tranpose(0, -1) incase contiguous = False template -std::vector KthvalueDriver::ComputeStrides(std::vector inputDim) +miopen::InlineVector +KthvalueDriver::ComputeStrides(miopen::InlineVector inputDim) { if(!isContiguous) std::swap(inputDim.front(), inputDim.back()); - std::vector strides(inputDim.size()); + miopen::InlineVector strides(inputDim.size()); strides.back() = 1; for(int i = inputDim.size() - 2; i >= 0; --i) strides[i] = strides[i + 1] * inputDim[i + 1]; diff --git a/driver/layernorm_driver.hpp b/driver/layernorm_driver.hpp index cb5c787dbb..36fe53c4fc 100644 --- a/driver/layernorm_driver.hpp +++ b/driver/layernorm_driver.hpp @@ -200,13 +200,13 @@ int LayerNormDriver::GetandSetData() MIOPEN_THROW_IF(dim < 0 || static_cast(dim) >= in_len.size(), "normalized_dim out of range"); - std::vector inner_len; + miopen::InlineVector inner_len; if(dim == in_len.size()) inner_len = {1}; else inner_len = {in_len.begin() + dim, in_len.end()}; - std::vector outer_len; + miopen::InlineVector outer_len; if(dim == 0) outer_len = {1}; else diff --git a/driver/lrn_driver.hpp b/driver/lrn_driver.hpp index 679dcfda5b..c5763b4a15 100644 --- a/driver/lrn_driver.hpp +++ b/driver/lrn_driver.hpp @@ -67,7 +67,7 @@ class LRNDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); int SetLRNDescriptorFromCmdLineArgs(); @@ -144,7 +144,7 @@ int LRNDriver::ParseCmdLineArgs(int argc, char* argv[]) template int LRNDriver::GetandSetData() { - std::vector in_len = GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector in_len = GetInputTensorLengthsFromCmdLine(); SetTensor4d(inputTensor, in_len, data_type); SetTensor4d(dInputTensor, in_len, data_type); @@ -184,14 +184,14 @@ int LRNDriver::AddCmdLineArgs() } template -std::vector LRNDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector LRNDriver::GetInputTensorLengthsFromCmdLine() { int in_n = inflags.GetValueInt("batchsize"); int in_c = inflags.GetValueInt("in_channels"); int in_h = inflags.GetValueInt("in_h"); int in_w = inflags.GetValueInt("in_w"); - return std::vector({in_n, in_c, in_h, in_w}); + return miopen::InlineVector({in_n, in_c, in_h, in_w}); } template diff --git a/driver/miopen_Reduction.hpp b/driver/miopen_Reduction.hpp index 973c2d2895..b655fb4b7f 100644 --- a/driver/miopen_Reduction.hpp +++ b/driver/miopen_Reduction.hpp @@ -103,10 +103,10 @@ class miopenReductionHost miopenReduceTensorIndices_t indicesOpt; miopenIndicesType_t indicesType; - std::vector inLengths; - std::vector outLengths; - std::vector inStrides; - std::vector outStrides; + miopen::InlineVector inLengths; + miopen::InlineVector outLengths; + miopen::InlineVector inStrides; + miopen::InlineVector outStrides; std::vector invariantLengths; std::vector toReduceLengths; diff --git a/driver/multimarginloss_driver.hpp b/driver/multimarginloss_driver.hpp index dab040ef3e..663d5859d2 100644 --- a/driver/multimarginloss_driver.hpp +++ b/driver/multimarginloss_driver.hpp @@ -212,33 +212,33 @@ template int MultiMarginLossDriver::GetandSetData() { // Set tensor description - std::vector in_len = inflags.GetValueTensor("dim").lengths; + miopen::InlineVector in_len = inflags.GetValueTensor("dim").lengths; size_t N = in_len[0], C = in_len[1]; if(inflags.GetValueInt("contiguous") == 1) { SetTensorNd(iDesc, in_len, data_type); - std::vector t_len = {N}; + miopen::InlineVector t_len = {N}; SetTensorNd(tDesc, t_len, miopenInt64); - std::vector w_len = {C}; + miopen::InlineVector w_len = {C}; SetTensorNd(wDesc, w_len, data_type); } else { - std::vector in_strides(in_len.size()); + miopen::InlineVector in_strides(in_len.size()); in_strides.back() = 1; for(int i = in_len.size() - 2; i >= 0; --i) in_strides[i] = in_strides[i + 1] * in_len[i + 1]; in_strides[0] *= 2; SetTensorNd(iDesc, in_len, in_strides, data_type); - std::vector t_len = {N}; - std::vector t_strides = {2}; + miopen::InlineVector t_strides = {2}; + miopen::InlineVector t_len = {N}; SetTensorNd(tDesc, t_len, t_strides, miopenInt64); - std::vector w_lens = {C}; - std::vector w_strides = {2}; + miopen::InlineVector w_lens = {C}; + miopen::InlineVector w_strides = {2}; SetTensorNd(wDesc, w_lens, w_strides, data_type); } @@ -261,12 +261,12 @@ int MultiMarginLossDriver::GetandSetData() { if(reduction == "none") { - std::vector o_lens = {N}; + miopen::InlineVector o_lens = {N}; SetTensorNd(oDesc, o_lens, data_type); } else { - std::vector o_lens = {1}; + miopen::InlineVector o_lens = {1}; SetTensorNd(oDesc, o_lens, data_type); } } diff --git a/driver/pool_driver.hpp b/driver/pool_driver.hpp index 9d3bebeb51..3b9778e9b8 100644 --- a/driver/pool_driver.hpp +++ b/driver/pool_driver.hpp @@ -77,7 +77,7 @@ class PoolDriver_impl : public Driver int SetPoolDescriptorFromCmdLineArgs(); - std::vector GetOutputTensorLengths(); + miopen::InlineVector GetOutputTensorLengths(); int AllocateBuffersAndCopy() override; @@ -322,9 +322,9 @@ int PoolDriver_impl::SetPoolDescriptorFromCmdLineArgs() } template -std::vector PoolDriver_impl::GetOutputTensorLengths() +miopen::InlineVector PoolDriver_impl::GetOutputTensorLengths() { - std::vector out_dim(spatial_dim + 2); + miopen::InlineVector out_dim(spatial_dim + 2); miopenGetPoolingNdForwardOutputDim(poolDesc, inputTensor, spatial_dim + 2, out_dim.data()); return out_dim; diff --git a/driver/prelu_driver.hpp b/driver/prelu_driver.hpp index e304f27465..749e91824a 100644 --- a/driver/prelu_driver.hpp +++ b/driver/prelu_driver.hpp @@ -141,7 +141,7 @@ int PReLUDriver::GetandSetData() { auto inTensorParam = inflags.GetValueTensor("input"); auto input_length = inTensorParam.lengths; - std::vector weight_length = {inflags.GetValueInt("NumParameters")}; + miopen::InlineVector weight_length = {inflags.GetValueInt("NumParameters")}; if(SetTensorNd(inputDesc, input_length, data_type) != miopenStatusSuccess) MIOPEN_THROW("Error parsing input tensor: " + inflags.GetValueStr("input") + "."); diff --git a/driver/reduce_driver.hpp b/driver/reduce_driver.hpp index 5a1d6b0d71..12027e2820 100644 --- a/driver/reduce_driver.hpp +++ b/driver/reduce_driver.hpp @@ -75,7 +75,7 @@ class ReduceDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); std::vector GetDimsToReduceFromCmdLine(); int SetReduceTensorDescriptorFromCmdLineArgs(); @@ -141,9 +141,9 @@ int ReduceDriver::ParseCmdLineArgs(int argc, char* argv[]) template int ReduceDriver::GetandSetData() { - std::vector inLengths = GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector inLengths = GetInputTensorLengthsFromCmdLine(); std::vector toReduceDims = GetDimsToReduceFromCmdLine(); - std::vector outLengths = inLengths; + miopen::InlineVector outLengths = inLengths; std::vector invariantDims; assert(toReduceDims.size() <= inLengths.size()); @@ -221,11 +221,11 @@ int ReduceDriver::AddCmdLineArgs() } template -std::vector ReduceDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector ReduceDriver::GetInputTensorLengthsFromCmdLine() { std::string lengthsStr = inflags.GetValueStr("DimLengths"); - std::vector lengths; + miopen::InlineVector lengths; std::size_t pos = 0; std::size_t new_pos; diff --git a/driver/reducecalculation_driver.hpp b/driver/reducecalculation_driver.hpp index 2001969509..d9e13d8a60 100644 --- a/driver/reducecalculation_driver.hpp +++ b/driver/reducecalculation_driver.hpp @@ -169,7 +169,7 @@ int ReduceCalculationDriver::GetandSetData() if(SetTensorNd(inputDesc, in_len, data_type) != miopenStatusSuccess) MIOPEN_THROW("Error parsing input tensor: " + inflags.GetValueStr("input") + "."); - std::vector out_len; + miopen::InlineVector out_len; for(int i = 0; i < in_len.size(); ++i) { diff --git a/driver/reduceextreme_driver.hpp b/driver/reduceextreme_driver.hpp index a06f5288a1..1fd3f66f9f 100644 --- a/driver/reduceextreme_driver.hpp +++ b/driver/reduceextreme_driver.hpp @@ -59,7 +59,7 @@ int32_t mloReduceExtremeForwardRunHost(miopenTensorDescriptor_t xDesc, int32_t dim) { auto x_dims = miopen::deref(xDesc).GetLengths(); - std::vector indice_dims; + miopen::InlineVector indice_dims; if(yhost) indice_dims = miopen::deref(yDesc).GetLengths(); else @@ -194,7 +194,7 @@ int ReduceExtremeDriver::GetandSetData() dim = inflags.GetValueInt("DimToReduce"); reduceExtremeOp = static_cast(inflags.GetValueInt("ReduceExtremeOp")); - std::vector out_len; + miopen::InlineVector out_len; for(int i = 0; i < in_len.size(); ++i) { diff --git a/driver/rnn_seq_driver.hpp b/driver/rnn_seq_driver.hpp index 9ff4de9411..57ff6d872b 100644 --- a/driver/rnn_seq_driver.hpp +++ b/driver/rnn_seq_driver.hpp @@ -341,7 +341,7 @@ template int RNNSeqDriver::CheckDescriptor(miopenTensorDescriptor_t src_desc, const std::vector& src_lens) { - const std::vector lens = GetTensorLengths(src_desc); + const miopen::InlineVector lens = GetTensorLengths(src_desc); if(lens.size() != src_lens.size() || !std::equal(src_lens.begin(), src_lens.end(), lens.begin())) diff --git a/driver/rope_driver.hpp b/driver/rope_driver.hpp index ecaf55ac64..ba1f4b20eb 100644 --- a/driver/rope_driver.hpp +++ b/driver/rope_driver.hpp @@ -197,7 +197,7 @@ int RoPEDriver::GetandSetData() auto inTensorParam = inflags.GetValueTensorUint64("input"); auto in_len = inTensorParam.lengths; - std::vector rotary_dim = {in_len[1], in_len[2], in_len[3]}; + miopen::InlineVector rotary_dim = {in_len[1], in_len[2], in_len[3]}; if(SetTensorNd(x_dyDesc, in_len, data_type) != miopenStatusSuccess) MIOPEN_THROW("Error parsing input tensor: " + inflags.GetValueStr("input") + "."); diff --git a/driver/softmarginloss_driver.hpp b/driver/softmarginloss_driver.hpp index 2b79df80a3..c745c20798 100644 --- a/driver/softmarginloss_driver.hpp +++ b/driver/softmarginloss_driver.hpp @@ -241,10 +241,10 @@ template int SoftMarginLossDriver::GetandSetData() { // Set input tensor description - std::vector in_len = inflags.GetValueTensor("dim").lengths; + miopen::InlineVector in_len = inflags.GetValueTensor("dim").lengths; if(inflags.GetValueStr("stride") != "-1") { - std::vector in_stride = inflags.GetValueTensor("stride").lengths; + miopen::InlineVector in_stride = inflags.GetValueTensor("stride").lengths; SetTensorNd(inputDesc, in_len, in_stride, data_type); } else @@ -255,7 +255,7 @@ int SoftMarginLossDriver::GetandSetData() } else { - std::vector in_strides(in_len.size()); + miopen::InlineVector in_strides(in_len.size()); in_strides.back() = 1; for(int i = in_len.size() - 2; i >= 0; --i) in_strides[i] = in_strides[i + 1] * in_len[i + 1]; @@ -286,7 +286,7 @@ int SoftMarginLossDriver::GetandSetData() SetTensorNd(outputDesc, in_len, data_type); else { - std::vector out_lens = {1}; + miopen::InlineVector out_lens = {1}; SetTensorNd(outputDesc, out_lens, data_type); } } diff --git a/driver/softmax_driver.hpp b/driver/softmax_driver.hpp index 5d1aca386e..665b6a6a83 100644 --- a/driver/softmax_driver.hpp +++ b/driver/softmax_driver.hpp @@ -66,7 +66,7 @@ class SoftmaxDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); int AllocateBuffersAndCopy() override; @@ -133,7 +133,7 @@ int SoftmaxDriver::ParseCmdLineArgs(int argc, char* argv[]) template int SoftmaxDriver::GetandSetData() { - std::vector in_len = GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector in_len = GetInputTensorLengthsFromCmdLine(); SetTensor4d(inputTensor, in_len, data_type); SetTensor4d(outputTensor, in_len, data_type); @@ -175,7 +175,7 @@ int SoftmaxDriver::AddCmdLineArgs() } template -std::vector SoftmaxDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector SoftmaxDriver::GetInputTensorLengthsFromCmdLine() { int in_n = inflags.GetValueInt("batchsize"); int in_c = inflags.GetValueInt("in_channels"); @@ -184,7 +184,7 @@ std::vector SoftmaxDriver::GetInputTensorLengthsFromCmdLine() isForward = inflags.GetValueInt("forw") == 1; - return std::vector({in_n, in_c, in_h, in_w}); + return miopen::InlineVector({in_n, in_c, in_h, in_w}); } template diff --git a/driver/t5layernorm_driver.hpp b/driver/t5layernorm_driver.hpp index 192cd5d62b..b1dab67d19 100644 --- a/driver/t5layernorm_driver.hpp +++ b/driver/t5layernorm_driver.hpp @@ -277,13 +277,13 @@ int T5LayerNormDriver::GetandSetData() auto in_len = inTensorParam.lengths; - std::vector inner_len; + miopen::InlineVector inner_len; inner_len = {in_len[in_len.size() - 1]}; MIOPEN_THROW_IF(inner_len[0] == 0, "Final dimension must be nonzero"); - std::vector outer_len; + miopen::InlineVector outer_len; outer_len = {in_len.begin(), in_len.end() - 1}; diff --git a/driver/tensorop_driver.hpp b/driver/tensorop_driver.hpp index 34a84cd3b7..7f008e3966 100644 --- a/driver/tensorop_driver.hpp +++ b/driver/tensorop_driver.hpp @@ -59,7 +59,7 @@ class TensorOpDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); int SetTensorOpFromCmdLineArgs(); @@ -121,7 +121,7 @@ int TensorOpDriver::ParseCmdLineArgs(int argc, char* argv[]) template int TensorOpDriver::GetandSetData() { - std::vector in_len = GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector in_len = GetInputTensorLengthsFromCmdLine(); SetTensor4d(aTensor, in_len, data_type); SetTensor4d(bTensor, in_len, data_type); SetTensor4d(cTensor, in_len, data_type); @@ -157,7 +157,7 @@ int TensorOpDriver::AddCmdLineArgs() } template -std::vector TensorOpDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector TensorOpDriver::GetInputTensorLengthsFromCmdLine() { int in_n = inflags.GetValueInt("batchsize"); int in_c = inflags.GetValueInt("in_channels"); diff --git a/driver/transformers_adam_w_driver.hpp b/driver/transformers_adam_w_driver.hpp index fd3756e559..ee740d2efd 100644 --- a/driver/transformers_adam_w_driver.hpp +++ b/driver/transformers_adam_w_driver.hpp @@ -72,7 +72,7 @@ class TransformersAdamWDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); + miopen::InlineVector GetInputTensorLengthsFromCmdLine(); int AllocateBuffersAndCopy() override; @@ -179,7 +179,7 @@ int TransformersAdamWDriver::GetandSetData() found_inf = inflags.GetValueInt("found_inf"); } - std::vector one_size = {1}; + miopen::InlineVector one_size = {1}; SetTensorNd(paramDesc, param_len, data_type); SetTensorNd(paramOutDesc, param_len, data_type); SetTensorNd(gradDesc, param_len, grad_type); @@ -226,9 +226,10 @@ int TransformersAdamWDriver::AddCmdLineArgs() } template -std::vector TransformersAdamWDriver::GetInputTensorLengthsFromCmdLine() +miopen::InlineVector +TransformersAdamWDriver::GetInputTensorLengthsFromCmdLine() { - std::vector ret; + miopen::InlineVector ret; auto tensor = inflags.GetValueTensor("dims"); if(!tensor.lengths.empty()) return tensor.lengths; diff --git a/src/include/miopen/inline_vector.hpp b/src/include/miopen/inline_vector.hpp index b0c9fe3bfc..c585e92117 100644 --- a/src/include/miopen/inline_vector.hpp +++ b/src/include/miopen/inline_vector.hpp @@ -255,7 +255,6 @@ class InlineVector real_size = n; } - // Insert // Insert 'value' before 'pos' iterator insert(iterator pos, const T& value) { @@ -284,6 +283,26 @@ class InlineVector return iterator(data() + idx); } + // Erase element at the pos + iterator erase(iterator pos) + { + if(empty()) + { + return iterator(data()); + } + int idx = std::distance(begin(), pos); + if(idx < 0 || idx >= real_size) + { + MIOPEN_THROW("Cannot erase data at this position"); + } + real_size -= 1; + for(int i = idx; i < real_size; i++) + { + storage[i] = storage[i + 1]; + } + return iterator(data() + idx); + } + // Add element to the back void push_back(const T& e) { diff --git a/test/gtest/inline_vector_basic_ops.cpp b/test/gtest/inline_vector_basic_ops.cpp index 0527ff2f85..6de2fbc9aa 100644 --- a/test/gtest/inline_vector_basic_ops.cpp +++ b/test/gtest/inline_vector_basic_ops.cpp @@ -240,3 +240,36 @@ TEST(CPU_InlineVectorInsert_NONE, Test) EXPECT_EQ(iv13_4[i], v13_4[i]); } } + +TEST(CPU_InlineVectorErase_NONE, Test) +{ + miopen::InlineVector iv14_1{1, 2, 3}; + std::vector v14_1{1, 2, 3}; + iv14_1.erase(iv14_1.begin()); + v14_1.erase(v14_1.begin()); + for(int i = 0; i < iv14_1.size(); i++) + { + EXPECT_EQ(iv14_1[i], v14_1[i]); + } + + miopen::InlineVector iv14_2{1, 2, 3}; + EXPECT_ANY_THROW(iv14_2.erase(iv14_2.end())); + + miopen::InlineVector iv14_3{1, 2, 3, 4}; + std::vector v14_3{1, 2, 3, 4}; + iv14_3.erase(iv14_3.begin() + 2); + v14_3.erase(v14_3.begin() + 2); + for(int i = 0; i < iv14_3.size(); i++) + { + EXPECT_EQ(iv14_3[i], v14_3[i]); + } + + miopen::InlineVector iv14_4{1, 2, 3}; + std::vector v14_4{1, 2, 3}; + iv14_4.erase(std::prev(iv14_4.end())); + v14_4.erase(std::prev(v14_4.end())); + for(int i = 0; i < iv14_4.size(); i++) + { + EXPECT_EQ(iv14_4[i], v14_4[i]); + } +}