tenstorrent · sminakov-tt · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025
@@ -86,7 +86,7 @@ int main() {
             optimizer.zero_grad();
             auto output = (*model)(data);
             auto loss = ttml::ops::mse_loss(output, targets);
-            fmt::print("Loss shape: {}\n", loss->get_value().get_shape());
+            fmt::print("Loss shape: {}\n", loss->get_value().get_logical_shape());
             auto mesh_shape = device->shape();
             ttml::core::MeshToXTensorVariant<float> identity_composer =
                 ttml::core::VectorMeshToXTensor<float>(mesh_shape);

@@ -538,8 +538,8 @@ int main(int argc, char **argv) {
 
                 auto data_tensor = ttml::autograd::create_tensor(ttml::core::from_vector<uint32_t, DataType::UINT32>(
                     data, ttml::core::create_shape({batch_size, 1, 1, sequence_length}), device, Layout::ROW_MAJOR));
-                auto targets_tensor = ttml::autograd::create_tensor(
-                    ttml::core::from_vector<int32_t, DataType::INT32>(targets, {batch_size * sequence_length}, device));
+                auto targets_tensor = ttml::autograd::create_tensor(ttml::core::from_vector<int32_t, DataType::INT32>(
+                    targets, ttnn::SimpleShape({batch_size * sequence_length}), device));
                 return {data_tensor, targets_tensor};
             };
 
@@ -632,7 +632,7 @@ int main(int argc, char **argv) {
             loss->backward();
             ttml::autograd::ctx().reset_graph();
 
-            auto samples = features->get_value().get_shape()[0];
+            auto samples = features->get_value().get_logical_shape()[0];
             gradient_accumulator_helper.update(loss_float, samples);
 
             // synchronize gradients for multi-device case, no-op if single device

@@ -14,7 +14,7 @@ void print_tensor(const tt::tt_metal::Tensor& tensor) {
     // but we are using TILE layout. The printed format WILL NOT be correct. But good enough for a demo
 
     // Get the shape of the tensor
-    auto shape = tensor.get_shape();
+    auto shape = tensor.get_logical_shape();
     // compyte the size of the tensor
     size_t size = 1;
     for (size_t i = 0; i < shape.size(); i++) size *= shape[i];

@@ -34,18 +34,18 @@ Tensor::Tensor(const tt::tt_metal::Tensor& value, bool requires_grad) : m_value(
 
 void Tensor::add_grad(const tt::tt_metal::Tensor& grad) {
     if (!is_grad_initialized()) {
-        auto value_shape = m_value.get_tensor().get_shape();
-        if (grad.get_shape() != value_shape) {
-            throw std::logic_error(
-                fmt::format("Shapes of gradients are not equal. Expected: {}, got: {}", value_shape, grad.get_shape()));
+        auto value_shape = m_value.get_tensor().get_logical_shape();
+        if (grad.get_logical_shape() != value_shape) {
+            throw std::logic_error(fmt::format(
+                "Shapes of gradients are not equal. Expected: {}, got: {}", value_shape, grad.get_logical_shape()));
         }
 
         m_grad = grad;
         return;
     }
 
-    const auto& grad_shape = grad.get_shape();
-    const auto& m_grad_shape = m_grad.get_shape();
+    const auto& grad_shape = grad.get_logical_shape();
+    const auto& m_grad_shape = m_grad.get_logical_shape();
     if (grad_shape != m_grad_shape) {
         throw std::logic_error(
             fmt::format("Shapes of gradients are not equal. Expected: {}, got: {}", m_grad_shape, grad_shape));
@@ -110,13 +110,13 @@ void Tensor::set_value(const tt::tt_metal::Tensor& value) {
 
 void Tensor::set_grad(const tt::tt_metal::Tensor& grad) {
     if (core::is_tensor_initialized(grad)) {
-        auto grad_shape = grad.get_shape();
-        auto value_shape = m_value.get_tensor().get_shape();
+        auto grad_shape = grad.get_logical_shape();
+        auto value_shape = m_value.get_tensor().get_logical_shape();
         if (grad_shape != value_shape) {
             throw std::logic_error(fmt::format(
                 "Shapes of gradients are not equal. Expected: {}, got: {}",
-                m_value.get_tensor().get_shape(),
-                grad.get_shape()));
+                m_value.get_tensor().get_logical_shape(),
+                grad.get_logical_shape()));
         }
     }
     m_grad = grad;

@@ -31,7 +31,7 @@ T get_median(std::vector<T>& vec) {
 
 template <typename T>
 void print_tensor_stats_(const tt::tt_metal::Tensor& tensor, const std::string& name) {
-    auto tensor_shape = tensor.get_shape();
+    auto tensor_shape = tensor.get_logical_shape();
     auto tensor_vec = tensor.to_vector<T>();
 
     auto median = get_median(tensor_vec);
@@ -84,10 +84,13 @@ tt::tt_metal::OwnedBuffer create_owned_buffer_from_vector_of_floats(
 
 template <typename T>
 tt::tt_metal::Tensor ttml_create_owned_tensor(
-    std::vector<T>&& data, const ttnn::Shape& shape, tt::tt_metal::DataType data_type, tt::tt_metal::Layout layout) {
+    std::vector<T>&& data,
+    const ttnn::SimpleShape& shape,
+    tt::tt_metal::DataType data_type,
+    tt::tt_metal::Layout layout) {
     auto buffer = tt::tt_metal::owned_buffer::create(std::move(data));
     auto storage = OwnedStorage{std::move(buffer)};
-    return {std::move(storage), shape.logical_shape(), data_type, layout};
+    return {std::move(storage), shape, data_type, layout};
 }
 
 }  // namespace
@@ -102,21 +105,21 @@ tt::tt_metal::Tensor ones_like(const tt::tt_metal::Tensor& tensor) {
 }
 
 tt::tt_metal::Tensor empty(
-    const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, const MemoryConfig& memory_config) {
-    return ttnn::empty(shape.logical_shape(), DataType::BFLOAT16, Layout::TILE, device, memory_config);
+    const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, const MemoryConfig& memory_config) {
+    return ttnn::empty(shape, DataType::BFLOAT16, Layout::TILE, device, memory_config);
 }
 
 tt::tt_metal::Tensor full(
-    const ttnn::Shape& shape, float value, ttnn::distributed::MeshDevice* device, DataType dtype) {
-    return ttnn::full(shape.logical_shape(), value, dtype, Layout::TILE, std::ref(*device));
+    const ttnn::SimpleShape& shape, float value, ttnn::distributed::MeshDevice* device, DataType dtype) {
+    return ttnn::full(shape, value, dtype, Layout::TILE, std::ref(*device));
 }
 
-tt::tt_metal::Tensor zeros(const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, DataType dtype) {
-    return core::full(shape.logical_shape(), 0.F, device, dtype);
+tt::tt_metal::Tensor zeros(const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, DataType dtype) {
+    return core::full(shape, 0.F, device, dtype);
 }
 
-tt::tt_metal::Tensor ones(const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, DataType dtype) {
-    return core::full(shape.logical_shape(), 1.F, device, dtype);
+tt::tt_metal::Tensor ones(const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, DataType dtype) {
+    return core::full(shape, 1.F, device, dtype);
 }
 
 template <class T, DataType TensorType>
@@ -171,24 +174,26 @@ template tt::tt_metal::Tensor from_xtensors_to_host<int32_t, tt::tt_metal::DataT
 
 template <>
 tt::tt_metal::Tensor from_vector<float, DataType::BFLOAT16>(
-    const std::vector<float>& buffer, const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, Layout layout) {
+    const std::vector<float>& buffer,
+    const ttnn::SimpleShape& shape,
+    ttnn::distributed::MeshDevice* device,
+    Layout layout) {
     assert(device != nullptr);
     const DataType data_type = DataType::BFLOAT16;
     MemoryConfig output_mem_config{};
-    auto logical_shape = shape.logical_shape();
-    size_t volume = logical_shape.volume();
+    size_t volume = shape.volume();
     if (buffer.size() != volume) {
         throw std::logic_error(
             fmt::format("Current buffer size is {} different from shape volume {}", buffer.size(), volume));
     }
     auto owned_buffer = create_owned_buffer_from_vector_of_floats(buffer, data_type);
     // remove possible paddings from the shape (it conflicts with ROW MAJOR)
-    auto output = tt::tt_metal::Tensor(OwnedStorage{owned_buffer}, logical_shape, data_type, Layout::ROW_MAJOR);
+    auto output = tt::tt_metal::Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR);
 
     const size_t MAX_TILE_DIMENSION = 16384;
     // Temporary workaround for the issue with tilize for large size
     // https://github.com/tenstorrent/tt-metal/issues/15950
-    if (logical_shape[-1] >= MAX_TILE_DIMENSION && layout == Layout::TILE) {
+    if (shape[-1] >= MAX_TILE_DIMENSION && layout == Layout::TILE) {
         output = ttnn::to_layout(output, Layout::TILE, std::nullopt, output_mem_config, device);
         output = ttnn::to_device(output, device, output_mem_config);
     } else {
@@ -205,7 +210,10 @@ tt::tt_metal::Tensor from_vector<float, DataType::BFLOAT16>(
 // it is expected that tilize will be fixed in the after next tt-metal main update
 template <>
 tt::tt_metal::Tensor from_vector<float, DataType::FLOAT32>(
-    const std::vector<float>& buffer, const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, Layout layout) {
+    const std::vector<float>& buffer,
+    const ttnn::SimpleShape& shape,
+    ttnn::distributed::MeshDevice* device,
+    Layout layout) {
     auto tensor = from_vector<float, DataType::BFLOAT16>(buffer, shape, device, layout);
     return ttnn::typecast(tensor, DataType::FLOAT32);
 }
@@ -216,20 +224,19 @@ From vector uint32 doesn't support tilize_with_zero_padding on device
 template <>
 tt::tt_metal::Tensor from_vector<uint32_t, DataType::UINT32>(
     const std::vector<uint32_t>& buffer,
-    const ttnn::Shape& shape,
+    const ttnn::SimpleShape& shape,
     ttnn::distributed::MeshDevice* device,
     Layout layout) {
     MemoryConfig output_mem_config{};
-    auto logical_shape = shape.logical_shape();
-    auto volume = logical_shape.volume();
+    auto volume = shape.volume();
     if (buffer.size() != volume) {
         throw std::logic_error(
             fmt::format("Current buffer size is {} different from shape volume {}", buffer.size(), volume));
     }
 
     // remove possible paddings from the shape (it conflicts with ROW MAJOR)
     std::vector<uint32_t> buffer_copy = buffer;
-    auto output = ttml_create_owned_tensor(std::move(buffer_copy), logical_shape, DataType::UINT32, Layout::ROW_MAJOR);
+    auto output = ttml_create_owned_tensor(std::move(buffer_copy), shape, DataType::UINT32, Layout::ROW_MAJOR);
     if (device != nullptr) {
         if (layout != Layout::ROW_MAJOR) {
             output = ttnn::to_layout(output, layout, std::nullopt, output_mem_config, device);
@@ -246,20 +253,19 @@ From vector int32 doesn't support tilize_with_zero_padding on device
 template <>
 tt::tt_metal::Tensor from_vector<int32_t, DataType::INT32>(
     const std::vector<int32_t>& buffer,
-    const ttnn::Shape& shape,
+    const ttnn::SimpleShape& shape,
     ttnn::distributed::MeshDevice* device,
     Layout layout) {
     MemoryConfig output_mem_config{};
-    auto logical_shape = shape.logical_shape();
-    auto volume = logical_shape.volume();
+    auto volume = shape.volume();
     if (buffer.size() != volume) {
         throw std::logic_error(
             fmt::format("Current buffer size is {} different from shape volume {}", buffer.size(), volume));
     }
 
     // remove possible paddings from the shape (it conflicts with ROW MAJOR)
     std::vector<int32_t> buffer_copy = buffer;
-    auto output = ttml_create_owned_tensor(std::move(buffer_copy), logical_shape, DataType::INT32, Layout::ROW_MAJOR);
+    auto output = ttml_create_owned_tensor(std::move(buffer_copy), shape, DataType::INT32, Layout::ROW_MAJOR);
     if (device != nullptr) {
         if (layout != Layout::ROW_MAJOR) {
             output = ttnn::to_layout(output, layout, std::nullopt, output_mem_config, device);
@@ -274,8 +280,8 @@ bool is_tensor_initialized(const tt::tt_metal::Tensor& tensor) {
     return tensor.tensor_attributes != nullptr;
 }
 
-ttnn::Shape create_shape(const std::array<uint32_t, 4>& args) {
-    return ttnn::Shape{args};
+ttnn::SimpleShape create_shape(const std::array<uint32_t, 4>& args) {
+    return ttnn::SimpleShape{args};
 }
 
 void print_tensor_stats(const tt::tt_metal::Tensor& tensor, const std::string& name) {

@@ -18,18 +18,21 @@ tt::tt_metal::Tensor zeros_like(const tt::tt_metal::Tensor& tensor);
 tt::tt_metal::Tensor ones_like(const tt::tt_metal::Tensor& tensor);
 
 tt::tt_metal::Tensor empty(
-    const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, const MemoryConfig& memory_config);
+    const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, const MemoryConfig& memory_config);
 tt::tt_metal::Tensor full(
-    const ttnn::Shape& shape, float value, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);
+    const ttnn::SimpleShape& shape,
+    float value,
+    ttnn::distributed::MeshDevice* device,
+    DataType dtype = DataType::BFLOAT16);
 tt::tt_metal::Tensor zeros(
-    const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);
+    const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);
 tt::tt_metal::Tensor ones(
-    const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);
+    const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);
 
 template <class VectorType = float, DataType TensorType = DataType::BFLOAT16>
 [[nodiscard]] tt::tt_metal::Tensor from_vector(
     const std::vector<VectorType>& buffer,
-    const ttnn::Shape& shape,
+    const ttnn::SimpleShape& shape,
     ttnn::distributed::MeshDevice* device,
     Layout layout = Layout::TILE);
 
@@ -44,7 +47,7 @@ template <class T = float>
 
 [[nodiscard]] bool is_tensor_initialized(const tt::tt_metal::Tensor& tensor);
 
-[[nodiscard]] ttnn::Shape create_shape(const std::array<uint32_t, 4>& args);
+[[nodiscard]] ttnn::SimpleShape create_shape(const std::array<uint32_t, 4>& args);
 
 template <class T = float, DataType TensorType = DataType::BFLOAT16>
 [[nodiscard]] tt::tt_metal::Tensor from_xtensor(
@@ -57,7 +60,7 @@ template <class T = float, DataType TensorType = DataType::BFLOAT16>
 template <class T = float>
 [[nodiscard]] xt::xarray<T> to_xtensor(const tt::tt_metal::Tensor& tensor) {
     auto vec = tensor.to_vector<T>();
-    const auto& shape = tensor.get_shape().logical_shape();
+    const auto& shape = tensor.get_logical_shape();
     std::vector<size_t> shape_vec(shape.cbegin(), shape.cend());
     // adapt creates view of the vector, but return will copy this data anyway (by creation of xt::array)
     return xt::adapt(std::move(vec), shape_vec);

@@ -11,64 +11,64 @@
 #include "core/tt_tensor_utils.hpp"
 #include "cpu_initializers.hpp"
 namespace ttml::init {
-void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, UniformRange range) {
+void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, UniformRange range) {
     auto* device = &autograd::ctx().get_device();
     assert(device);
-    size_t volume = shape.logical_shape().volume();
+    size_t volume = shape.volume();
     std::vector<float> vec(volume);
     uniform_init(vec, range);
 
     t->set_value(ttml::core::from_vector(vec, shape, device));
 }
 
-void normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, NormalParams params) {
+void normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, NormalParams params) {
     auto* device = &autograd::ctx().get_device();
     assert(device);
-    size_t volume = shape.logical_shape().volume();
+    size_t volume = shape.volume();
     std::vector<float> vec(volume);
     normal_init(vec, params);
     t->set_value(ttml::core::from_vector(vec, shape, device));
 }
 
-void constant_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, float value) {
+void constant_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, float value) {
     auto* device = &autograd::ctx().get_device();
     t->set_value(core::full(shape, value, device));
 }
 
-void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params) {
+void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, FanParams params) {
     auto* device = &autograd::ctx().get_device();
     assert(device);
-    size_t volume = shape.logical_shape().volume();
+    size_t volume = shape.volume();
     std::vector<float> vec(volume);
     xavier_uniform_init(vec, params);
 
     t->set_value(ttml::core::from_vector(vec, shape, device));
 }
 
-void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params) {
+void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, FanParams params) {
     auto* device = &autograd::ctx().get_device();
     assert(device);
-    size_t volume = shape.logical_shape().volume();
+    size_t volume = shape.volume();
     std::vector<float> vec(volume);
     xavier_normal_init(vec, params);
 
     t->set_value(ttml::core::from_vector(vec, shape, device));
 }
 
-void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_in) {
+void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, int fan_in) {
     auto* device = &autograd::ctx().get_device();
     assert(device);
-    size_t volume = shape.logical_shape().volume();
+    size_t volume = shape.volume();
     std::vector<float> vec(volume);
     kaiming_uniform_init(vec, fan_in);
 
     t->set_value(ttml::core::from_vector(vec, shape, device));
 }
 
-void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_out) {
+void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, int fan_out) {
     auto* device = &autograd::ctx().get_device();
     assert(device);
-    size_t volume = shape.logical_shape().volume();
+    size_t volume = shape.volume();
     std::vector<float> vec(volume);
     kaiming_normal_init(vec, fan_out);
 

@@ -9,18 +9,18 @@
 
 namespace ttml::init {
 
-void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, UniformRange range);
+void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, UniformRange range);
 
-void normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, NormalParams params);
+void normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, NormalParams params);
 
-void constant_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, float value);
+void constant_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, float value);
 
-void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params);
+void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, FanParams params);
 
-void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params);
+void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, FanParams params);
 
-void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_in);
+void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, int fan_in);
 
-void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_out);
+void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, int fan_out);
 
 }  // namespace ttml::init
@@ -65,9 +65,9 @@ void weights_initialization(Transformer& model) {
     for (auto& [name, tensor_ptr] : params) {
         const auto& tensor = tensor_ptr->get_value();
         if (name.find("weight") != std::string::npos) {
-            init::normal_init(tensor_ptr, tensor.get_shape(), {0.F, 0.02F});
+            init::normal_init(tensor_ptr, tensor.get_logical_shape(), {0.F, 0.02F});
         } else if (name.find("bias") != std::string::npos) {
-            init::constant_init(tensor_ptr, tensor.get_shape(), 0.F);
+            init::constant_init(tensor_ptr, tensor.get_logical_shape(), 0.F);
         }
     }
 }