ROCm · novakovicdj · Feb 11, 2025 · Feb 11, 2025 · Feb 11, 2025 · Feb 12, 2025
@@ -106,9 +106,9 @@ class CBAInferFusionDriver : public Driver
     InputFlags& GetInputFlags() override { return inflags; }
 
     int GetandSetData() override;
-    std::vector<int> GetInputTensorLengthsFromCmdLine();
-    std::vector<int> GetOutputTensorLengths();
-    std::vector<int> GetWeightTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> GetInputTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> GetOutputTensorLengths();
+    miopen::InlineVector<int, 5> GetWeightTensorLengthsFromCmdLine();
     std::vector<int> GetModeFromCmdLine();
 
     int SetActivationDescriptorFromCmdLineArgs();
@@ -309,14 +309,14 @@ int CBAInferFusionDriver<Tgpu, Tref>::SetActivationDescriptorFromCmdLineArgs()
 }
 
 template <typename Tgpu, typename Tref>
-std::vector<int> CBAInferFusionDriver<Tgpu, Tref>::GetWeightTensorLengthsFromCmdLine()
+miopen::InlineVector<int, 5> CBAInferFusionDriver<Tgpu, Tref>::GetWeightTensorLengthsFromCmdLine()
 {
     int wei_n = inflags.GetValueInt("out_channels");
     int wei_c = inflags.GetValueInt("in_channels");
     int wei_h = inflags.GetValueInt("fil_h");
     int wei_w = inflags.GetValueInt("fil_w");
 
-    return std::vector<int>({wei_n, wei_c, wei_h, wei_w});
+    return miopen::InlineVector<int, 5>({wei_n, wei_c, wei_h, wei_w});
 }
 
 template <typename Tgpu, typename Tref>
@@ -327,16 +327,16 @@ int CBAInferFusionDriver<Tgpu, Tref>::GetandSetData()
     SetConvDescriptorFromCmdLineArgs();
     SetActivationDescriptorFromCmdLineArgs();
 
-    std::vector<int> in_len  = GetInputTensorLengthsFromCmdLine();
-    std::vector<int> wei_len = GetWeightTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> in_len  = GetInputTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> wei_len = GetWeightTensorLengthsFromCmdLine();
 
     SetTensor4d(inputTensor, in_len, data_type);
 
     miopenCreateFusionPlan(&fusePlanDesc, miopenVerticalFusion, inputTensor);
 
     SetTensor4d(weightTensor, wei_len, data_type);
 
-    std::vector<int> out_len{};
+    miopen::InlineVector<int, 5> out_len{};
     if(fusion_mode != miopen_fusion_na)
     {
         out_len = GetOutputTensorLengths();
@@ -349,7 +349,7 @@ int CBAInferFusionDriver<Tgpu, Tref>::GetandSetData()
 
     if(bias_mode)
     {
-        std::vector<int> b_len{1, out_len[1], 1, 1};
+        miopen::InlineVector<int, 5> b_len{1, out_len[1], 1, 1};
         SetTensor4d(biasTensor, b_len, data_type);
     }
 
@@ -419,13 +419,13 @@ int CBAInferFusionDriver<Tgpu, Tref>::AddCmdLineArgs()
 }
 
 template <typename Tgpu, typename Tref>
-std::vector<int> CBAInferFusionDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
+miopen::InlineVector<int, 5> CBAInferFusionDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
 {
     int in_n = inflags.GetValueInt("batchsize");
     int in_c = inflags.GetValueInt("in_channels");
     int in_h = inflags.GetValueInt("in_h");
     int in_w = inflags.GetValueInt("in_w");
-    return std::vector<int>({in_n, in_c, in_h, in_w});
+    return miopen::InlineVector<int, 5>({in_n, in_c, in_h, in_w});
 }
 
 template <typename Tgpu, typename Tref>
@@ -502,11 +502,11 @@ int CBAInferFusionDriver<Tgpu, Tref>::SetConvDescriptorFromCmdLineArgs()
 }
 
 template <typename Tgpu, typename Tref>
-std::vector<int> CBAInferFusionDriver<Tgpu, Tref>::GetOutputTensorLengths()
+miopen::InlineVector<int, 5> CBAInferFusionDriver<Tgpu, Tref>::GetOutputTensorLengths()
 {
     int n, c, h, w;
     miopenGetConvolutionForwardOutputDim(convDesc, inputTensor, weightTensor, &n, &c, &h, &w);
-    return std::vector<int>({n, c, h, w});
+    return miopen::InlineVector<int, 5>({n, c, h, w});
 }
 
 template <typename Tgpu, typename Tref>

@@ -258,7 +258,7 @@ TensorParameters InputFlags::GetValueTensor(const std::string& long_name) const
         return {};
 
     auto parse = [](auto line) {
-        auto ret        = std::vector<int>{};
+        auto ret        = miopen::InlineVector<int, 5>{};
         const auto strs = miopen::SplitDelim(line, 'x');
         for(auto&& str : strs)
         {
@@ -280,7 +280,7 @@ TensorParameters InputFlags::GetValueTensor(const std::string& long_name) const
         return {lens};
 
     auto layout  = std::string{};
-    auto strides = std::vector<int>{};
+    auto strides = miopen::InlineVector<int, 5>{};
 
     if(std::isdigit(components[1][0]))
         strides = parse(components[1]);
@@ -302,7 +302,7 @@ TensorParametersUint64 InputFlags::GetValueTensorUint64(const std::string& long_
         return {};
 
     auto parse = [](auto line) {
-        auto ret        = std::vector<uint64_t>{};
+        auto ret        = miopen::InlineVector<uint64_t, 5>{};
         const auto strs = miopen::SplitDelim(line, 'x');
         for(auto&& str : strs)
         {
@@ -324,7 +324,7 @@ TensorParametersUint64 InputFlags::GetValueTensorUint64(const std::string& long_
         return {lens};
 
     auto layout  = std::string{};
-    auto strides = std::vector<uint64_t>{};
+    auto strides = miopen::InlineVector<uint64_t, 5>{};
 
     if(std::isdigit(components[1][0]))
         strides = parse(components[1]);

@@ -27,6 +27,7 @@
 #define MIOPEN_INPUT_FLAGS_HPP_
 
 #include <miopen/miopen.h>
+#include <miopen/inline_vector.hpp>
 
 #include <boost/optional.hpp>
 
@@ -46,8 +47,8 @@ struct Input
 
 struct TensorParameters
 {
-    std::vector<int> lengths = {};
-    std::vector<int> strides = {};
+    miopen::InlineVector<int, 5> lengths = {};
+    miopen::InlineVector<int, 5> strides = {};
     std::string layout       = "";
 
     TensorParameters FillMissing(const TensorParameters& other) const
@@ -65,8 +66,8 @@ struct TensorParameters
 
 struct TensorParametersUint64
 {
-    std::vector<uint64_t> lengths = {};
-    std::vector<uint64_t> strides = {};
+    miopen::InlineVector<uint64_t, 5> lengths = {};
+    miopen::InlineVector<uint64_t, 5> strides = {};
     std::string layout            = "";
 
     TensorParametersUint64 FillMissing(const TensorParametersUint64& other) const

@@ -65,7 +65,7 @@ class ActivationDriver : public Driver
     InputFlags& GetInputFlags() override { return inflags; }
 
     int GetandSetData() override;
-    std::vector<int> GetInputTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> GetInputTensorLengthsFromCmdLine();
 
     int SetActivationDescriptorFromCmdLineArgs();
 
@@ -130,7 +130,7 @@ int ActivationDriver<Tgpu, Tref>::ParseCmdLineArgs(int argc, char* argv[])
 template <typename Tgpu, typename Tref>
 int ActivationDriver<Tgpu, Tref>::GetandSetData()
 {
-    std::vector<int> in_len = GetInputTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> in_len = GetInputTensorLengthsFromCmdLine();
 
     SetTensor4d(inputTensor, in_len, data_type);
 
@@ -166,14 +166,14 @@ int ActivationDriver<Tgpu, Tref>::AddCmdLineArgs()
 }
 
 template <typename Tgpu, typename Tref>
-std::vector<int> ActivationDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
+miopen::InlineVector<int, 5> ActivationDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
 {
     int in_n = inflags.GetValueInt("batchsize");
     int in_c = inflags.GetValueInt("in_channels");
     int in_h = inflags.GetValueInt("in_h");
     int in_w = inflags.GetValueInt("in_w");
 
-    return std::vector<int>({in_n, in_c, in_h, in_w});
+    return miopen::InlineVector<int, 5>({in_n, in_c, in_h, in_w});
 }
 
 template <typename Tgpu, typename Tref>

@@ -148,7 +148,7 @@ class AdamDriver : public Driver
     InputFlags& GetInputFlags() override { return inflags; }
 
     int GetandSetData() override;
-    std::vector<int> GetInputTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> GetInputTensorLengthsFromCmdLine();
 
     int AllocateBuffersAndCopy() override;
 
@@ -264,7 +264,7 @@ int AdamDriver<Tgpu, Tref, Tgrad>::GetandSetData()
         found_inf  = inflags.GetValueInt("found_inf");
     }
 
-    std::vector<int> one_size = {1};
+    miopen::InlineVector<int, 5> one_size = {1};
     SetTensorNd(paramDesc, param_len, data_type);
     SetTensorNd(paramOutDesc, param_len, data_type);
     SetTensorNd(gradDesc, param_len, grad_type);
@@ -318,9 +318,9 @@ int AdamDriver<Tgpu, Tref, Tgrad>::AddCmdLineArgs()
 }
 
 template <typename Tgpu, typename Tref, typename Tgrad>
-std::vector<int> AdamDriver<Tgpu, Tref, Tgrad>::GetInputTensorLengthsFromCmdLine()
+miopen::InlineVector<int, 5> AdamDriver<Tgpu, Tref, Tgrad>::GetInputTensorLengthsFromCmdLine()
 {
-    std::vector<int> ret;
+    miopen::InlineVector<int, 5> ret;
     auto tensor = inflags.GetValueTensor("dims");
     if(!tensor.lengths.empty())
         return tensor.lengths;

@@ -210,13 +210,13 @@ int AddLayerNormDriver<Tgpu, Tref>::GetandSetData()
     MIOPEN_THROW_IF(dim < 0 || static_cast<size_t>(dim) >= in_len.size(),
                     "normalized_dim out of range");
 
-    std::vector<int> inner_len;
+    miopen::InlineVector<int, 5> inner_len;
     if(dim == in_len.size())
         inner_len = {1};
     else
         inner_len = {in_len.begin() + dim, in_len.end()};
 
-    std::vector<int> outer_len;
+    miopen::InlineVector<int, 5> outer_len;
     if(dim == 0)
         outer_len = {1};
     else

@@ -106,7 +106,7 @@ class CatDriver : public Driver
     InputFlags& GetInputFlags() override { return inflags; }
 
     int GetandSetData() override;
-    std::vector<std::vector<int>> GetInputTensorLengthsFromCmdLine();
+    std::vector<miopen::InlineVector<int, 5>> GetInputTensorLengthsFromCmdLine();
 
     int AllocateBuffersAndCopy() override;
 
@@ -203,10 +203,10 @@ int CatDriver<Tgpu, Tref>::AddCmdLineArgs()
 }
 
 template <typename Tgpu, typename Tref>
-std::vector<std::vector<int>> CatDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
+std::vector<miopen::InlineVector<int, 5>> CatDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
 {
     const int max_input_count = 8;
-    std::vector<std::vector<int>> ret;
+    std::vector<miopen::InlineVector<int, 5>> ret;
     std::string name = "input";
     for(int i = 1; i < max_input_count; i++)
     {

@@ -284,13 +284,13 @@ class ConvDriver : public Driver
 
     int GetandSetData() override;
     bool TensorsCasted() const;
-    std::vector<int> GetInputTensorLengthsFromCmdLine();
-    std::vector<int> GetWeightTensorLengthsFromCmdLine();
-    std::vector<int> GetBiasTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> GetInputTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> GetWeightTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> GetBiasTensorLengthsFromCmdLine();
 
     int SetConvDescriptorFromCmdLineArgs();
 
-    std::vector<int> GetOutputTensorLengths();
+    miopen::InlineVector<int, 5> GetOutputTensorLengths();
 
     int AllocateBuffersAndCopy() override;
 
@@ -769,8 +769,8 @@ bool ConvDriver<Tgpu, Tref>::TensorsCasted() const
 template <typename Tgpu, typename Tref>
 int ConvDriver<Tgpu, Tref>::GetandSetData()
 {
-    std::vector<int> in_len  = GetInputTensorLengthsFromCmdLine();
-    std::vector<int> wei_len = GetWeightTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> in_len  = GetInputTensorLengthsFromCmdLine();
+    miopen::InlineVector<int, 5> wei_len = GetWeightTensorLengthsFromCmdLine();
 
     SetTensorNd(inputTensor, in_len, inflags.GetValueStr("in_layout"), data_type);
     if(inflags.GetValueStr("in_cast_type") != "-1")
@@ -792,7 +792,7 @@ int ConvDriver<Tgpu, Tref>::GetandSetData()
 
     if(IsInputTensorTransform())
     {
-        std::vector<int> in_len_vect4(in_len.begin(), in_len.end()),
+        miopen::InlineVector<int, 5> in_len_vect4(in_len.begin(), in_len.end()),
             wei_len_vect4(wei_len.begin(), wei_len.end());
         in_len_vect4[1] = ((in_len[1] + 3) / 4) * 4;
         SetTensorNd(inputTensor_vect4, in_len_vect4, data_type);
@@ -801,7 +801,7 @@ int ConvDriver<Tgpu, Tref>::GetandSetData()
     }
     SetConvDescriptorFromCmdLineArgs();
 
-    std::vector<int> out_len = GetOutputTensorLengths();
+    miopen::InlineVector<int, 5> out_len = GetOutputTensorLengths();
     if(miopen::deref(inputTensor).GetLayoutEnum() == miopenTensorNCHWc4 ||
        miopen::deref(inputTensor).GetLayoutEnum() == miopenTensorNCHWc8)
     {
@@ -821,15 +821,15 @@ int ConvDriver<Tgpu, Tref>::GetandSetData()
 
     if(inflags.GetValueInt("bias") != 0)
     {
-        std::vector<int> bias_len = GetBiasTensorLengthsFromCmdLine();
+        miopen::InlineVector<int, 5> bias_len = GetBiasTensorLengthsFromCmdLine();
         SetTensorNd(biasTensor, bias_len, data_type);
     }
 
     if(warmup_enabled)
     {
         AutoMiopenWarmupMode warmupMode;
-        std::vector<int> warmup_in_len  = {1, 1, 16, 16}; // NCHW
-        std::vector<int> warmup_wei_len = {1, 1, 1, 1};   // KCYX
+        miopen::InlineVector<int, 5> warmup_in_len  = {1, 1, 16, 16}; // NCHW
+        miopen::InlineVector<int, 5> warmup_wei_len = {1, 1, 1, 1};   // KCYX
         SetTensorNd(warmupInputTensor, warmup_in_len, warmup_data_type);
         SetTensorNd(warmupWeightTensor, warmup_wei_len, warmup_data_type);
 
@@ -851,7 +851,7 @@ int ConvDriver<Tgpu, Tref>::GetandSetData()
         miopenSetConvolutionGroupCount(warmupConvDesc, group_count);
 
         int warmup_out_len_size = miopen::deref(warmupInputTensor).GetNumDims();
-        std::vector<int> warmup_out_len(warmup_out_len_size);
+        miopen::InlineVector<int, 5> warmup_out_len(warmup_out_len_size);
         miopenGetConvolutionNdForwardOutputDim(warmupConvDesc,
                                                warmupInputTensor,
                                                warmupWeightTensor,
@@ -1000,9 +1000,9 @@ int ConvDriver<Tgpu, Tref>::AddCmdLineArgs()
 }
 
 template <typename Tgpu, typename Tref>
-std::vector<int> ConvDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
+miopen::InlineVector<int, 5> ConvDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
 {
-    std::vector<int> in_lens;
+    miopen::InlineVector<int, 5> in_lens;
 
     int spatial_dim = inflags.GetValueInt("spatial_dim");
     in_lens.resize(2 + spatial_dim);
@@ -1032,9 +1032,9 @@ std::vector<int> ConvDriver<Tgpu, Tref>::GetInputTensorLengthsFromCmdLine()
 }
 
 template <typename Tgpu, typename Tref>
-std::vector<int> ConvDriver<Tgpu, Tref>::GetWeightTensorLengthsFromCmdLine()
+miopen::InlineVector<int, 5> ConvDriver<Tgpu, Tref>::GetWeightTensorLengthsFromCmdLine()
 {
-    std::vector<int> wei_lens;
+    miopen::InlineVector<int, 5> wei_lens;
 
     int spatial_dim = inflags.GetValueInt("spatial_dim");
     wei_lens.resize(2 + spatial_dim);
@@ -1086,11 +1086,11 @@ std::vector<int> ConvDriver<Tgpu, Tref>::GetWeightTensorLengthsFromCmdLine()
 }
 
 template <typename Tgpu, typename Tref>
-std::vector<int> ConvDriver<Tgpu, Tref>::GetBiasTensorLengthsFromCmdLine()
+miopen::InlineVector<int, 5> ConvDriver<Tgpu, Tref>::GetBiasTensorLengthsFromCmdLine()
 {
     int spatial_dim = inflags.GetValueInt("spatial_dim");
 
-    std::vector<int> bias_lens(2 + spatial_dim, 1);
+    miopen::InlineVector<int, 5> bias_lens(2 + spatial_dim, 1);
 
     bias_lens[1] = inflags.GetValueInt("out_channels");
 
@@ -1203,11 +1203,11 @@ int ConvDriver<Tgpu, Tref>::SetConvDescriptorFromCmdLineArgs()
 }
 
 template <typename Tgpu, typename Tref>
-std::vector<int> ConvDriver<Tgpu, Tref>::GetOutputTensorLengths()
+miopen::InlineVector<int, 5> ConvDriver<Tgpu, Tref>::GetOutputTensorLengths()
 {
     int ndim = miopen::deref(inputTensor).GetNumDims();
 
-    std::vector<int> out_lens(ndim);
+    miopen::InlineVector<int, 5> out_lens(ndim);
 
     miopenGetConvolutionNdForwardOutputDim(
         convDesc, inputTensor, weightTensor, &ndim, out_lens.data());