Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replacing get_shape() #16786

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this file changed?

Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0
#include "debug/dprint.h"
#include "ttnn/cpp/ttnn/deprecated/tt_dnn/kernels/compute/moreh_common.hpp"

namespace NAMESPACE {
void MAIN {
int i{0};
const auto num_rows_per_core = get_arg_val<uint32_t>(i++);
const auto Wt = get_arg_val<uint32_t>(i++);
const auto origin_w = get_arg_val<uint32_t>(i++);
const auto p = get_arg_val<uint32_t>(i++);
const bool p_is_negative = get_arg_val<uint32_t>(i++) == 1;

std::uint8_t input_id{tt::CB::c_in0};
const auto cb_x = input_id++; // input
const auto cb_one = input_id++; // one
const auto cb_decimal = input_id++; // decimal
const auto cb_mask_w = input_id++; // mask_w

std::uint8_t output_id{tt::CB::c_out0};
const auto cb_y = output_id++; // output

std::uint8_t intermed_id{tt::CB::c_intermed0};
const auto cb_tmp0 = intermed_id++;
const auto cb_tmp1 = intermed_id++;
const auto cb_tmp2 = intermed_id++;
const auto cb_tmp3 = intermed_id++;

const auto cb_xabs = cb_tmp0; // |x|
const auto cb_xpow = cb_tmp1; // |x|^p
const auto cb_logx = cb_tmp2; // log(|x|)
const auto cb_exp_lxmd = cb_tmp3; // exp(log(|x|) * decimal)

constexpr uint32_t onetile = 1;
constexpr uint32_t dst0 = 0;
constexpr uint32_t dst1 = 1;

binary_op_init_common(tt::CB::c_in0, tt::CB::c_in0);

cb_wait_front(cb_one, onetile); // comes from the reader
cb_wait_front(cb_decimal, onetile); // comes from the reader

constexpr uint32_t TILE_W = 32;
const bool do_mask_w = (origin_w % TILE_W) != 0;
const auto mask_w = do_mask_w ? (origin_w % TILE_W) : TILE_W;

if (do_mask_w) {
cb_wait_front(cb_mask_w, onetile); // comes from the reader
}
for (uint32_t row_idx = 0; row_idx < num_rows_per_core; ++row_idx) {
for (uint32_t col_idx = 0; col_idx < Wt; ++col_idx) {
// |x|
tile_regs_acquire();
cb_wait_front(cb_x, onetile); // comes from the reader
cb_reserve_back(cb_xabs, onetile);

copy_tile_init_with_dt(cb_x);
copy_tile(cb_x, 0, dst0);

if (do_mask_w && (col_idx == Wt - 1)) {
copy_tile_init_with_dt(cb_mask_w);
copy_tile(cb_mask_w, 0, dst1);

mask_tile_init();
mask_tile(dst0, dst1);
}

abs_tile_init();
abs_tile(dst0);
tile_regs_commit();

tile_regs_wait();
pack_tile_with_dt(dst0, cb_xabs);
tile_regs_release();

cb_pop_front(cb_x, onetile);
cb_push_back(cb_xabs, onetile);

power_tile_to_cb(cb_xabs, cb_xpow, cb_logx, cb_decimal, cb_exp_lxmd, cb_y, p, p_is_negative);
}
}

cb_pop_front(cb_one, onetile);
cb_pop_front(cb_decimal, onetile);
if (do_mask_w) {
cb_pop_front(cb_mask_w, onetile);
}
} // void MAIN
} // namespace NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "ttnn/cpp/ttnn/deprecated/tt_dnn/kernels/dataflow/moreh_common.hpp"

void kernel_main() {
int i{0};
const auto input_addr = get_arg_val<uint32_t>(i++);
const bool input_is_dram = get_arg_val<uint32_t>(i++) == 1;
const auto decimal = get_arg_val<uint32_t>(i++);
const auto num_rows_per_core = get_arg_val<uint32_t>(i++);
const auto Wt = get_arg_val<uint32_t>(i++);
const auto tile_offset = get_arg_val<uint32_t>(i++);
const auto origin_w = get_arg_val<uint32_t>(i++);

uint32_t cb_id{0};
const auto cb_id_input = cb_id++;
const auto cb_id_one = cb_id++;
const auto cb_id_decimal = cb_id++;
const auto cb_id_mask_w = cb_id++;

const uint32_t input_tile_bytes = get_tile_size(cb_id_input);
const auto input_data_format = get_dataformat(cb_id_input);

const InterleavedAddrGenFast<true> dram_input_addrg = {
.bank_base_address = input_addr, .page_size = input_tile_bytes, .data_format = input_data_format};

const InterleavedAddrGenFast<false> l1_input_addrg = {
.bank_base_address = input_addr, .page_size = input_tile_bytes, .data_format = input_data_format};

Scalar one;
one.f = 1.0f;
fill_cb_with_value(cb_id_one, one.u);
fill_cb_with_value(cb_id_decimal, decimal);

constexpr uint32_t TILE_W = 32;
const bool do_mask_w = (origin_w % TILE_W) != 0;
const auto mask_w = do_mask_w ? (origin_w % TILE_W) : TILE_W;

if (do_mask_w) {
generate_mask_w(cb_id_mask_w, mask_w);
}

const auto start_tile_idx = tile_offset;
const auto input_l1_write_ptr = get_write_ptr(cb_id_input);

for (uint32_t row_idx = 0; row_idx < num_rows_per_core; ++row_idx) {
for (uint32_t col_idx = 0; col_idx < Wt; ++col_idx) {
const auto tile_idx = start_tile_idx + row_idx * Wt + col_idx;
cb_reserve_back(cb_id_input, 1);
if (input_is_dram) {
noc_async_read_tile(tile_idx, dram_input_addrg, input_l1_write_ptr);
} else {
noc_async_read_tile(tile_idx, l1_input_addrg, input_l1_write_ptr);
}
noc_async_read_barrier();
cb_push_back(cb_id_input, 1);
}
}

} // void kernel_main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <stdint.h>

#include "dataflow_api.h"

void kernel_main() {
int i{0};
const auto output_addr = get_arg_val<uint32_t>(i++);
const bool output_is_dram = get_arg_val<uint32_t>(i++) == 1;
const auto num_rows_per_core = get_arg_val<uint32_t>(i++);
const auto Wt = get_arg_val<uint32_t>(i++);
const auto tile_offset = get_arg_val<uint32_t>(i++);

uint32_t cb_id{16};
const auto cb_id_output = cb_id++;

const uint32_t output_tile_bytes = get_tile_size(cb_id_output);
const auto output_data_format = get_dataformat(cb_id_output);

const InterleavedAddrGenFast<true> dram_output_addrg = {
.bank_base_address = output_addr, .page_size = output_tile_bytes, .data_format = output_data_format};

const InterleavedAddrGenFast<false> l1_output_addrg = {
.bank_base_address = output_addr, .page_size = output_tile_bytes, .data_format = output_data_format};

const auto start_tile_idx = tile_offset;
const auto output_l1_read_addr = get_read_ptr(cb_id_output);

for (uint32_t row_idx = 0; row_idx < num_rows_per_core; ++row_idx) {
for (uint32_t col_idx = 0; col_idx < Wt; ++col_idx) {
const auto tile_idx = start_tile_idx + row_idx * Wt + col_idx;
cb_wait_front(cb_id_output, 1);
if (output_is_dram) {
noc_async_write_tile(tile_idx, dram_output_addrg, output_l1_read_addr);
} else {
noc_async_write_tile(tile_idx, l1_output_addrg, output_l1_read_addr);
}
noc_async_write_barrier();
cb_pop_front(cb_id_output, 1);
}
}
} // void kernel_main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include "moreh_abs_pow_device_operation.hpp"

#include "ttnn/operations/moreh/moreh_helper_functions.hpp"
#include "ttnn/tensor/tensor.hpp"

namespace ttnn::operations::moreh::moreh_abs_pow {

std::tuple<uint32_t, float, bool> get_floored_p_and_decimal_and_p_is_negative(float p) {
auto floored_p = std::floor(p);
auto decimal = p - floored_p;
bool p_is_negative = floored_p < 0.0f;
if (p_is_negative) {
floored_p = -floored_p;
}
return std::make_tuple(static_cast<uint32_t>(floored_p), decimal, p_is_negative);
}

MorehAbsPowOperation::program_factory_t MorehAbsPowOperation::select_program_factory(
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
// Case for int32
return MorehAbsPowFactory{};
}

void validate_tensors(
const MorehAbsPowOperation::operation_attributes_t& operation_attributes,
const MorehAbsPowOperation::tensor_args_t& tensor_args) {
const auto& input = tensor_args.input;
auto& output = tensor_args.output;

check_tensor(input, "moreh_abs_pow", "input", {DataType::BFLOAT16, DataType::INT32});
check_tensor(output, "moreh_abs_pow", "output", {DataType::BFLOAT16, DataType::INT32});
}

void MorehAbsPowOperation::validate_on_program_cache_miss(
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
validate_tensors(operation_attributes, tensor_args);
};

void MorehAbsPowOperation::validate_on_program_cache_hit(
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
validate_tensors(operation_attributes, tensor_args);
};
MorehAbsPowOperation::spec_return_value_t MorehAbsPowOperation::compute_output_specs(
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
if (tensor_args.output.has_value()) {
return tensor_args.output->get_tensor_spec();
}
const auto& input = tensor_args.input;
return TensorSpec(
input.get_logical_shape(),
TensorLayout(input.get_dtype(), PageConfig(input.get_layout()), operation_attributes.memory_config));
}

MorehAbsPowOperation::tensor_return_value_t MorehAbsPowOperation::create_output_tensors(
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) {
if (tensor_args.output.has_value()) {
log_debug(tt::LogOp, "{}:{} use output tensor", __func__, __LINE__);
return {tensor_args.output.value()};
}

log_debug(tt::LogOp, "{}:{} create output tensor", __func__, __LINE__);
return create_device_tensor(compute_output_specs(operation_attributes, tensor_args), tensor_args.input.device());
};

std::tuple<MorehAbsPowOperation::operation_attributes_t, MorehAbsPowOperation::tensor_args_t>
MorehAbsPowOperation::invoke(
const Tensor& input,
const float p,
const std::optional<Tensor>& output,
const std::optional<MemoryConfig>& memory_config,
const std::optional<DeviceComputeKernelConfig>& compute_kernel_config) {
const operation_attributes_t operation_attributes{
p,
memory_config.value_or(input.memory_config()),
init_device_compute_kernel_config(input.device()->arch(), compute_kernel_config, MathFidelity::HiFi4)};
const tensor_args_t tensor_args{input, output};

return {operation_attributes, tensor_args};
}
} // namespace ttnn::operations::moreh::moreh_abs_pow
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include <variant>

#include "ttnn/decorators.hpp"
#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
#include "ttnn/tensor/types.hpp"

#define MOREH_ABS_POW_FACTORY_H(name) \
struct name { \
struct shared_variables_t { \
KernelHandle reader_kernels_id; \
KernelHandle writer_kernels_id; \
std::size_t num_cores_to_be_used; \
std::size_t num_cores_y; \
}; \
\
using cached_program_t = ttnn::device_operation::CachedProgram<shared_variables_t>; \
\
static cached_program_t create( \
const operation_attributes_t& operation_attributes, \
const tensor_args_t& tensor_args, \
tensor_return_value_t& output_tensor); \
\
static void override_runtime_arguments( \
cached_program_t& cached_program, \
const operation_attributes_t& operation_attributes, \
const tensor_args_t& tensor_args, \
tensor_return_value_t& output_tensor); \
};

namespace ttnn::operations::moreh::moreh_abs_pow {

std::tuple<uint32_t, float, bool> get_floored_p_and_decimal_and_p_is_negative(float p);

struct MorehAbsPowOperation {
struct operation_attributes_t {
const float p;

const MemoryConfig memory_config;
const DeviceComputeKernelConfig compute_kernel_config;
};
struct tensor_args_t {
const Tensor& input;
const std::optional<Tensor>& output;
};

using spec_return_value_t = TensorSpec;
using tensor_return_value_t = Tensor;

MOREH_ABS_POW_FACTORY_H(MorehAbsPowFactory)

using program_factory_t = std::variant<MorehAbsPowFactory>;
static program_factory_t select_program_factory(const operation_attributes_t&, const tensor_args_t&);
static void validate_on_program_cache_miss(const operation_attributes_t&, const tensor_args_t&);
static void validate_on_program_cache_hit(const operation_attributes_t&, const tensor_args_t&);
static spec_return_value_t compute_output_specs(const operation_attributes_t&, const tensor_args_t&);
static tensor_return_value_t create_output_tensors(const operation_attributes_t&, const tensor_args_t&);
static std::tuple<operation_attributes_t, tensor_args_t> invoke(
const Tensor& input,
const float p,
const std::optional<Tensor>& output,
const std::optional<MemoryConfig>& memory_config,
const std::optional<DeviceComputeKernelConfig>& compute_kernel_config);
};

} // namespace ttnn::operations::moreh::moreh_abs_pow

namespace ttnn::prim {
constexpr auto moreh_abs_pow = ttnn::
register_operation<"ttnn::prim::moreh_abs_pow", ttnn::operations::moreh::moreh_abs_pow::MorehAbsPowOperation>();
} // namespace ttnn::prim
Loading
Loading