Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove ttnn::Shape from tt-train #17053

Merged
merged 4 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tt-train/sources/examples/linear_regression_ddp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ int main() {
optimizer.zero_grad();
auto output = (*model)(data);
auto loss = ttml::ops::mse_loss(output, targets);
fmt::print("Loss shape: {}\n", loss->get_value().get_shape());
fmt::print("Loss shape: {}\n", loss->get_value().get_logical_shape());
auto mesh_shape = device->shape();
ttml::core::MeshToXTensorVariant<float> identity_composer =
ttml::core::VectorMeshToXTensor<float>(mesh_shape);
Expand Down
6 changes: 3 additions & 3 deletions tt-train/sources/examples/nano_gpt/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,8 @@ int main(int argc, char **argv) {

auto data_tensor = ttml::autograd::create_tensor(ttml::core::from_vector<uint32_t, DataType::UINT32>(
data, ttml::core::create_shape({batch_size, 1, 1, sequence_length}), device, Layout::ROW_MAJOR));
auto targets_tensor = ttml::autograd::create_tensor(
ttml::core::from_vector<int32_t, DataType::INT32>(targets, {batch_size * sequence_length}, device));
auto targets_tensor = ttml::autograd::create_tensor(ttml::core::from_vector<int32_t, DataType::INT32>(
targets, ttnn::SimpleShape({batch_size * sequence_length}), device));
return {data_tensor, targets_tensor};
};

Expand Down Expand Up @@ -632,7 +632,7 @@ int main(int argc, char **argv) {
loss->backward();
ttml::autograd::ctx().reset_graph();

auto samples = features->get_value().get_shape()[0];
auto samples = features->get_value().get_logical_shape()[0];
gradient_accumulator_helper.update(loss_float, samples);

// synchronize gradients for multi-device case, no-op if single device
Expand Down
2 changes: 1 addition & 1 deletion tt-train/sources/examples/sample_app/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void print_tensor(const tt::tt_metal::Tensor& tensor) {
// but we are using TILE layout. The printed format WILL NOT be correct. But good enough for a demo

// Get the shape of the tensor
auto shape = tensor.get_shape();
auto shape = tensor.get_logical_shape();
// compyte the size of the tensor
size_t size = 1;
for (size_t i = 0; i < shape.size(); i++) size *= shape[i];
Expand Down
20 changes: 10 additions & 10 deletions tt-train/sources/ttml/autograd/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,18 @@ Tensor::Tensor(const tt::tt_metal::Tensor& value, bool requires_grad) : m_value(

void Tensor::add_grad(const tt::tt_metal::Tensor& grad) {
if (!is_grad_initialized()) {
auto value_shape = m_value.get_tensor().get_shape();
if (grad.get_shape() != value_shape) {
throw std::logic_error(
fmt::format("Shapes of gradients are not equal. Expected: {}, got: {}", value_shape, grad.get_shape()));
auto value_shape = m_value.get_tensor().get_logical_shape();
if (grad.get_logical_shape() != value_shape) {
throw std::logic_error(fmt::format(
"Shapes of gradients are not equal. Expected: {}, got: {}", value_shape, grad.get_logical_shape()));
}

m_grad = grad;
return;
}

const auto& grad_shape = grad.get_shape();
const auto& m_grad_shape = m_grad.get_shape();
const auto& grad_shape = grad.get_logical_shape();
const auto& m_grad_shape = m_grad.get_logical_shape();
if (grad_shape != m_grad_shape) {
throw std::logic_error(
fmt::format("Shapes of gradients are not equal. Expected: {}, got: {}", m_grad_shape, grad_shape));
Expand Down Expand Up @@ -110,13 +110,13 @@ void Tensor::set_value(const tt::tt_metal::Tensor& value) {

void Tensor::set_grad(const tt::tt_metal::Tensor& grad) {
if (core::is_tensor_initialized(grad)) {
auto grad_shape = grad.get_shape();
auto value_shape = m_value.get_tensor().get_shape();
auto grad_shape = grad.get_logical_shape();
auto value_shape = m_value.get_tensor().get_logical_shape();
if (grad_shape != value_shape) {
throw std::logic_error(fmt::format(
"Shapes of gradients are not equal. Expected: {}, got: {}",
m_value.get_tensor().get_shape(),
grad.get_shape()));
m_value.get_tensor().get_logical_shape(),
grad.get_logical_shape()));
}
}
m_grad = grad;
Expand Down
60 changes: 33 additions & 27 deletions tt-train/sources/ttml/core/tt_tensor_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ T get_median(std::vector<T>& vec) {

template <typename T>
void print_tensor_stats_(const tt::tt_metal::Tensor& tensor, const std::string& name) {
auto tensor_shape = tensor.get_shape();
auto tensor_shape = tensor.get_logical_shape();
auto tensor_vec = tensor.to_vector<T>();

auto median = get_median(tensor_vec);
Expand Down Expand Up @@ -84,10 +84,13 @@ tt::tt_metal::OwnedBuffer create_owned_buffer_from_vector_of_floats(

template <typename T>
tt::tt_metal::Tensor ttml_create_owned_tensor(
std::vector<T>&& data, const ttnn::Shape& shape, tt::tt_metal::DataType data_type, tt::tt_metal::Layout layout) {
std::vector<T>&& data,
const ttnn::SimpleShape& shape,
tt::tt_metal::DataType data_type,
tt::tt_metal::Layout layout) {
auto buffer = tt::tt_metal::owned_buffer::create(std::move(data));
auto storage = OwnedStorage{std::move(buffer)};
return {std::move(storage), shape.logical_shape(), data_type, layout};
return {std::move(storage), shape, data_type, layout};
}

} // namespace
Expand All @@ -102,21 +105,21 @@ tt::tt_metal::Tensor ones_like(const tt::tt_metal::Tensor& tensor) {
}

tt::tt_metal::Tensor empty(
const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, const MemoryConfig& memory_config) {
return ttnn::empty(shape.logical_shape(), DataType::BFLOAT16, Layout::TILE, device, memory_config);
const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, const MemoryConfig& memory_config) {
return ttnn::empty(shape, DataType::BFLOAT16, Layout::TILE, device, memory_config);
}

tt::tt_metal::Tensor full(
const ttnn::Shape& shape, float value, ttnn::distributed::MeshDevice* device, DataType dtype) {
return ttnn::full(shape.logical_shape(), value, dtype, Layout::TILE, std::ref(*device));
const ttnn::SimpleShape& shape, float value, ttnn::distributed::MeshDevice* device, DataType dtype) {
return ttnn::full(shape, value, dtype, Layout::TILE, std::ref(*device));
}

tt::tt_metal::Tensor zeros(const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, DataType dtype) {
return core::full(shape.logical_shape(), 0.F, device, dtype);
tt::tt_metal::Tensor zeros(const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, DataType dtype) {
return core::full(shape, 0.F, device, dtype);
}

tt::tt_metal::Tensor ones(const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, DataType dtype) {
return core::full(shape.logical_shape(), 1.F, device, dtype);
tt::tt_metal::Tensor ones(const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, DataType dtype) {
return core::full(shape, 1.F, device, dtype);
}

template <class T, DataType TensorType>
Expand Down Expand Up @@ -171,24 +174,26 @@ template tt::tt_metal::Tensor from_xtensors_to_host<int32_t, tt::tt_metal::DataT

template <>
tt::tt_metal::Tensor from_vector<float, DataType::BFLOAT16>(
const std::vector<float>& buffer, const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, Layout layout) {
const std::vector<float>& buffer,
const ttnn::SimpleShape& shape,
ttnn::distributed::MeshDevice* device,
Layout layout) {
assert(device != nullptr);
const DataType data_type = DataType::BFLOAT16;
MemoryConfig output_mem_config{};
auto logical_shape = shape.logical_shape();
size_t volume = logical_shape.volume();
size_t volume = shape.volume();
if (buffer.size() != volume) {
throw std::logic_error(
fmt::format("Current buffer size is {} different from shape volume {}", buffer.size(), volume));
}
auto owned_buffer = create_owned_buffer_from_vector_of_floats(buffer, data_type);
// remove possible paddings from the shape (it conflicts with ROW MAJOR)
auto output = tt::tt_metal::Tensor(OwnedStorage{owned_buffer}, logical_shape, data_type, Layout::ROW_MAJOR);
auto output = tt::tt_metal::Tensor(OwnedStorage{owned_buffer}, shape, data_type, Layout::ROW_MAJOR);

const size_t MAX_TILE_DIMENSION = 16384;
// Temporary workaround for the issue with tilize for large size
// https://github.com/tenstorrent/tt-metal/issues/15950
if (logical_shape[-1] >= MAX_TILE_DIMENSION && layout == Layout::TILE) {
if (shape[-1] >= MAX_TILE_DIMENSION && layout == Layout::TILE) {
output = ttnn::to_layout(output, Layout::TILE, std::nullopt, output_mem_config, device);
output = ttnn::to_device(output, device, output_mem_config);
} else {
Expand All @@ -205,7 +210,10 @@ tt::tt_metal::Tensor from_vector<float, DataType::BFLOAT16>(
// it is expected that tilize will be fixed in the after next tt-metal main update
template <>
tt::tt_metal::Tensor from_vector<float, DataType::FLOAT32>(
const std::vector<float>& buffer, const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, Layout layout) {
const std::vector<float>& buffer,
const ttnn::SimpleShape& shape,
ttnn::distributed::MeshDevice* device,
Layout layout) {
auto tensor = from_vector<float, DataType::BFLOAT16>(buffer, shape, device, layout);
return ttnn::typecast(tensor, DataType::FLOAT32);
}
Expand All @@ -216,20 +224,19 @@ From vector uint32 doesn't support tilize_with_zero_padding on device
template <>
tt::tt_metal::Tensor from_vector<uint32_t, DataType::UINT32>(
const std::vector<uint32_t>& buffer,
const ttnn::Shape& shape,
const ttnn::SimpleShape& shape,
ttnn::distributed::MeshDevice* device,
Layout layout) {
MemoryConfig output_mem_config{};
auto logical_shape = shape.logical_shape();
auto volume = logical_shape.volume();
auto volume = shape.volume();
if (buffer.size() != volume) {
throw std::logic_error(
fmt::format("Current buffer size is {} different from shape volume {}", buffer.size(), volume));
}

// remove possible paddings from the shape (it conflicts with ROW MAJOR)
std::vector<uint32_t> buffer_copy = buffer;
auto output = ttml_create_owned_tensor(std::move(buffer_copy), logical_shape, DataType::UINT32, Layout::ROW_MAJOR);
auto output = ttml_create_owned_tensor(std::move(buffer_copy), shape, DataType::UINT32, Layout::ROW_MAJOR);
if (device != nullptr) {
if (layout != Layout::ROW_MAJOR) {
output = ttnn::to_layout(output, layout, std::nullopt, output_mem_config, device);
Expand All @@ -246,20 +253,19 @@ From vector int32 doesn't support tilize_with_zero_padding on device
template <>
tt::tt_metal::Tensor from_vector<int32_t, DataType::INT32>(
const std::vector<int32_t>& buffer,
const ttnn::Shape& shape,
const ttnn::SimpleShape& shape,
ttnn::distributed::MeshDevice* device,
Layout layout) {
MemoryConfig output_mem_config{};
auto logical_shape = shape.logical_shape();
auto volume = logical_shape.volume();
auto volume = shape.volume();
if (buffer.size() != volume) {
throw std::logic_error(
fmt::format("Current buffer size is {} different from shape volume {}", buffer.size(), volume));
}

// remove possible paddings from the shape (it conflicts with ROW MAJOR)
std::vector<int32_t> buffer_copy = buffer;
auto output = ttml_create_owned_tensor(std::move(buffer_copy), logical_shape, DataType::INT32, Layout::ROW_MAJOR);
auto output = ttml_create_owned_tensor(std::move(buffer_copy), shape, DataType::INT32, Layout::ROW_MAJOR);
if (device != nullptr) {
if (layout != Layout::ROW_MAJOR) {
output = ttnn::to_layout(output, layout, std::nullopt, output_mem_config, device);
Expand All @@ -274,8 +280,8 @@ bool is_tensor_initialized(const tt::tt_metal::Tensor& tensor) {
return tensor.tensor_attributes != nullptr;
}

ttnn::Shape create_shape(const std::array<uint32_t, 4>& args) {
return ttnn::Shape{args};
ttnn::SimpleShape create_shape(const std::array<uint32_t, 4>& args) {
return ttnn::SimpleShape{args};
}

void print_tensor_stats(const tt::tt_metal::Tensor& tensor, const std::string& name) {
Expand Down
17 changes: 10 additions & 7 deletions tt-train/sources/ttml/core/tt_tensor_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,21 @@ tt::tt_metal::Tensor zeros_like(const tt::tt_metal::Tensor& tensor);
tt::tt_metal::Tensor ones_like(const tt::tt_metal::Tensor& tensor);

tt::tt_metal::Tensor empty(
const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, const MemoryConfig& memory_config);
const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, const MemoryConfig& memory_config);
tt::tt_metal::Tensor full(
const ttnn::Shape& shape, float value, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);
const ttnn::SimpleShape& shape,
float value,
ttnn::distributed::MeshDevice* device,
DataType dtype = DataType::BFLOAT16);
tt::tt_metal::Tensor zeros(
const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);
const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);
tt::tt_metal::Tensor ones(
const ttnn::Shape& shape, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);
const ttnn::SimpleShape& shape, ttnn::distributed::MeshDevice* device, DataType dtype = DataType::BFLOAT16);

template <class VectorType = float, DataType TensorType = DataType::BFLOAT16>
[[nodiscard]] tt::tt_metal::Tensor from_vector(
const std::vector<VectorType>& buffer,
const ttnn::Shape& shape,
const ttnn::SimpleShape& shape,
ttnn::distributed::MeshDevice* device,
Layout layout = Layout::TILE);

Expand All @@ -44,7 +47,7 @@ template <class T = float>

[[nodiscard]] bool is_tensor_initialized(const tt::tt_metal::Tensor& tensor);

[[nodiscard]] ttnn::Shape create_shape(const std::array<uint32_t, 4>& args);
[[nodiscard]] ttnn::SimpleShape create_shape(const std::array<uint32_t, 4>& args);

template <class T = float, DataType TensorType = DataType::BFLOAT16>
[[nodiscard]] tt::tt_metal::Tensor from_xtensor(
Expand All @@ -57,7 +60,7 @@ template <class T = float, DataType TensorType = DataType::BFLOAT16>
template <class T = float>
[[nodiscard]] xt::xarray<T> to_xtensor(const tt::tt_metal::Tensor& tensor) {
auto vec = tensor.to_vector<T>();
const auto& shape = tensor.get_shape().logical_shape();
const auto& shape = tensor.get_logical_shape();
std::vector<size_t> shape_vec(shape.cbegin(), shape.cend());
// adapt creates view of the vector, but return will copy this data anyway (by creation of xt::array)
return xt::adapt(std::move(vec), shape_vec);
Expand Down
26 changes: 13 additions & 13 deletions tt-train/sources/ttml/init/tensor_initializers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,64 +11,64 @@
#include "core/tt_tensor_utils.hpp"
#include "cpu_initializers.hpp"
namespace ttml::init {
void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, UniformRange range) {
void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, UniformRange range) {
auto* device = &autograd::ctx().get_device();
assert(device);
size_t volume = shape.logical_shape().volume();
size_t volume = shape.volume();
std::vector<float> vec(volume);
uniform_init(vec, range);

t->set_value(ttml::core::from_vector(vec, shape, device));
}

void normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, NormalParams params) {
void normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, NormalParams params) {
auto* device = &autograd::ctx().get_device();
assert(device);
size_t volume = shape.logical_shape().volume();
size_t volume = shape.volume();
std::vector<float> vec(volume);
normal_init(vec, params);
t->set_value(ttml::core::from_vector(vec, shape, device));
}

void constant_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, float value) {
void constant_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, float value) {
auto* device = &autograd::ctx().get_device();
t->set_value(core::full(shape, value, device));
}

void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params) {
void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, FanParams params) {
auto* device = &autograd::ctx().get_device();
assert(device);
size_t volume = shape.logical_shape().volume();
size_t volume = shape.volume();
std::vector<float> vec(volume);
xavier_uniform_init(vec, params);

t->set_value(ttml::core::from_vector(vec, shape, device));
}

void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params) {
void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, FanParams params) {
auto* device = &autograd::ctx().get_device();
assert(device);
size_t volume = shape.logical_shape().volume();
size_t volume = shape.volume();
std::vector<float> vec(volume);
xavier_normal_init(vec, params);

t->set_value(ttml::core::from_vector(vec, shape, device));
}

void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_in) {
void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, int fan_in) {
auto* device = &autograd::ctx().get_device();
assert(device);
size_t volume = shape.logical_shape().volume();
size_t volume = shape.volume();
std::vector<float> vec(volume);
kaiming_uniform_init(vec, fan_in);

t->set_value(ttml::core::from_vector(vec, shape, device));
}

void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_out) {
void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, int fan_out) {
auto* device = &autograd::ctx().get_device();
assert(device);
size_t volume = shape.logical_shape().volume();
size_t volume = shape.volume();
std::vector<float> vec(volume);
kaiming_normal_init(vec, fan_out);

Expand Down
14 changes: 7 additions & 7 deletions tt-train/sources/ttml/init/tensor_initializers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@

namespace ttml::init {

void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, UniformRange range);
void uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, UniformRange range);

void normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, NormalParams params);
void normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, NormalParams params);

void constant_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, float value);
void constant_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, float value);

void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params);
void xavier_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, FanParams params);

void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, FanParams params);
void xavier_normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, FanParams params);

void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_in);
void kaiming_uniform_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, int fan_in);

void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::Shape& shape, int fan_out);
void kaiming_normal_init(ttml::autograd::TensorPtr& t, const ttnn::SimpleShape& shape, int fan_out);

} // namespace ttml::init
4 changes: 2 additions & 2 deletions tt-train/sources/ttml/models/gpt2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ void weights_initialization(Transformer& model) {
for (auto& [name, tensor_ptr] : params) {
const auto& tensor = tensor_ptr->get_value();
if (name.find("weight") != std::string::npos) {
init::normal_init(tensor_ptr, tensor.get_shape(), {0.F, 0.02F});
init::normal_init(tensor_ptr, tensor.get_logical_shape(), {0.F, 0.02F});
} else if (name.find("bias") != std::string::npos) {
init::constant_init(tensor_ptr, tensor.get_shape(), 0.F);
init::constant_init(tensor_ptr, tensor.get_logical_shape(), 0.F);
}
}
}
Expand Down
Loading
Loading