-
Notifications
You must be signed in to change notification settings - Fork 97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Replacing get_shape() #16786
Open
jbedichekTT
wants to merge
21
commits into
main
Choose a base branch
from
replace-get-shape-with-get_logical_shape-and-get_padded_shape
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Replacing get_shape() #16786
Changes from 8 commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
8ccd319
#0: Replacing get_shape()
jbedichekTT e27a663
Fixing use of get_shape()
jbedichekTT cc5774d
Fixingmissing function call
jbedichekTT 98323d0
Fixed more shape calls
jbedichekTT 3763ebf
Continuing to fix get_shape() calls
jbedichekTT 831e700
Reverting to get_shape where code needs more general refactoring
jbedichekTT cfdb97f
Fixing bugs in reverting back to get_shape in some instances
jbedichekTT 58f3e06
Still reverting back to get_shape in some instances
jbedichekTT 428e499
Fixing mistakes
jbedichekTT 41c0092
Deleted duplicate Moreh
jbedichekTT 628f2fd
Fixing shapes
jbedichekTT e9493a9
fixing shape calls
jbedichekTT 07d577b
Merged and fixed more shape calls
jbedichekTT 93c57b8
fixed shape error
jbedichekTT 242fd94
Changed function arguments to SimpleShape
jbedichekTT 93c3221
Refactoring variable names
jbedichekTT 03b65d0
Changed shape used for rank
jbedichekTT 6458d2f
Fixing refactoring variable names and rank calls
jbedichekTT 89d0692
Fixed typos and variable names
jbedichekTT d1f4f4f
Saving current changes for testing
jbedichekTT d852339
Fixed shape calls
jbedichekTT File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
There are no files selected for viewing
91 changes: 91 additions & 0 deletions
91
ttnn/cpp/ttnn/operations/moreh/moreh/moreh_abs_pow/device/kernels/moreh_abs_pow_kernel.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
#include "debug/dprint.h" | ||
#include "ttnn/cpp/ttnn/deprecated/tt_dnn/kernels/compute/moreh_common.hpp" | ||
|
||
namespace NAMESPACE { | ||
void MAIN { | ||
int i{0}; | ||
const auto num_rows_per_core = get_arg_val<uint32_t>(i++); | ||
const auto Wt = get_arg_val<uint32_t>(i++); | ||
const auto origin_w = get_arg_val<uint32_t>(i++); | ||
const auto p = get_arg_val<uint32_t>(i++); | ||
const bool p_is_negative = get_arg_val<uint32_t>(i++) == 1; | ||
|
||
std::uint8_t input_id{tt::CB::c_in0}; | ||
const auto cb_x = input_id++; // input | ||
const auto cb_one = input_id++; // one | ||
const auto cb_decimal = input_id++; // decimal | ||
const auto cb_mask_w = input_id++; // mask_w | ||
|
||
std::uint8_t output_id{tt::CB::c_out0}; | ||
const auto cb_y = output_id++; // output | ||
|
||
std::uint8_t intermed_id{tt::CB::c_intermed0}; | ||
const auto cb_tmp0 = intermed_id++; | ||
const auto cb_tmp1 = intermed_id++; | ||
const auto cb_tmp2 = intermed_id++; | ||
const auto cb_tmp3 = intermed_id++; | ||
|
||
const auto cb_xabs = cb_tmp0; // |x| | ||
const auto cb_xpow = cb_tmp1; // |x|^p | ||
const auto cb_logx = cb_tmp2; // log(|x|) | ||
const auto cb_exp_lxmd = cb_tmp3; // exp(log(|x|) * decimal) | ||
|
||
constexpr uint32_t onetile = 1; | ||
constexpr uint32_t dst0 = 0; | ||
constexpr uint32_t dst1 = 1; | ||
|
||
binary_op_init_common(tt::CB::c_in0, tt::CB::c_in0); | ||
|
||
cb_wait_front(cb_one, onetile); // comes from the reader | ||
cb_wait_front(cb_decimal, onetile); // comes from the reader | ||
|
||
constexpr uint32_t TILE_W = 32; | ||
const bool do_mask_w = (origin_w % TILE_W) != 0; | ||
const auto mask_w = do_mask_w ? (origin_w % TILE_W) : TILE_W; | ||
|
||
if (do_mask_w) { | ||
cb_wait_front(cb_mask_w, onetile); // comes from the reader | ||
} | ||
for (uint32_t row_idx = 0; row_idx < num_rows_per_core; ++row_idx) { | ||
for (uint32_t col_idx = 0; col_idx < Wt; ++col_idx) { | ||
// |x| | ||
tile_regs_acquire(); | ||
cb_wait_front(cb_x, onetile); // comes from the reader | ||
cb_reserve_back(cb_xabs, onetile); | ||
|
||
copy_tile_init_with_dt(cb_x); | ||
copy_tile(cb_x, 0, dst0); | ||
|
||
if (do_mask_w && (col_idx == Wt - 1)) { | ||
copy_tile_init_with_dt(cb_mask_w); | ||
copy_tile(cb_mask_w, 0, dst1); | ||
|
||
mask_tile_init(); | ||
mask_tile(dst0, dst1); | ||
} | ||
|
||
abs_tile_init(); | ||
abs_tile(dst0); | ||
tile_regs_commit(); | ||
|
||
tile_regs_wait(); | ||
pack_tile_with_dt(dst0, cb_xabs); | ||
tile_regs_release(); | ||
|
||
cb_pop_front(cb_x, onetile); | ||
cb_push_back(cb_xabs, onetile); | ||
|
||
power_tile_to_cb(cb_xabs, cb_xpow, cb_logx, cb_decimal, cb_exp_lxmd, cb_y, p, p_is_negative); | ||
} | ||
} | ||
|
||
cb_pop_front(cb_one, onetile); | ||
cb_pop_front(cb_decimal, onetile); | ||
if (do_mask_w) { | ||
cb_pop_front(cb_mask_w, onetile); | ||
} | ||
} // void MAIN | ||
} // namespace NAMESPACE |
62 changes: 62 additions & 0 deletions
62
ttnn/cpp/ttnn/operations/moreh/moreh/moreh_abs_pow/device/kernels/reader_moreh_abs_pow.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include "ttnn/cpp/ttnn/deprecated/tt_dnn/kernels/dataflow/moreh_common.hpp" | ||
|
||
void kernel_main() { | ||
int i{0}; | ||
const auto input_addr = get_arg_val<uint32_t>(i++); | ||
const bool input_is_dram = get_arg_val<uint32_t>(i++) == 1; | ||
const auto decimal = get_arg_val<uint32_t>(i++); | ||
const auto num_rows_per_core = get_arg_val<uint32_t>(i++); | ||
const auto Wt = get_arg_val<uint32_t>(i++); | ||
const auto tile_offset = get_arg_val<uint32_t>(i++); | ||
const auto origin_w = get_arg_val<uint32_t>(i++); | ||
|
||
uint32_t cb_id{0}; | ||
const auto cb_id_input = cb_id++; | ||
const auto cb_id_one = cb_id++; | ||
const auto cb_id_decimal = cb_id++; | ||
const auto cb_id_mask_w = cb_id++; | ||
|
||
const uint32_t input_tile_bytes = get_tile_size(cb_id_input); | ||
const auto input_data_format = get_dataformat(cb_id_input); | ||
|
||
const InterleavedAddrGenFast<true> dram_input_addrg = { | ||
.bank_base_address = input_addr, .page_size = input_tile_bytes, .data_format = input_data_format}; | ||
|
||
const InterleavedAddrGenFast<false> l1_input_addrg = { | ||
.bank_base_address = input_addr, .page_size = input_tile_bytes, .data_format = input_data_format}; | ||
|
||
Scalar one; | ||
one.f = 1.0f; | ||
fill_cb_with_value(cb_id_one, one.u); | ||
fill_cb_with_value(cb_id_decimal, decimal); | ||
|
||
constexpr uint32_t TILE_W = 32; | ||
const bool do_mask_w = (origin_w % TILE_W) != 0; | ||
const auto mask_w = do_mask_w ? (origin_w % TILE_W) : TILE_W; | ||
|
||
if (do_mask_w) { | ||
generate_mask_w(cb_id_mask_w, mask_w); | ||
} | ||
|
||
const auto start_tile_idx = tile_offset; | ||
const auto input_l1_write_ptr = get_write_ptr(cb_id_input); | ||
|
||
for (uint32_t row_idx = 0; row_idx < num_rows_per_core; ++row_idx) { | ||
for (uint32_t col_idx = 0; col_idx < Wt; ++col_idx) { | ||
const auto tile_idx = start_tile_idx + row_idx * Wt + col_idx; | ||
cb_reserve_back(cb_id_input, 1); | ||
if (input_is_dram) { | ||
noc_async_read_tile(tile_idx, dram_input_addrg, input_l1_write_ptr); | ||
} else { | ||
noc_async_read_tile(tile_idx, l1_input_addrg, input_l1_write_ptr); | ||
} | ||
noc_async_read_barrier(); | ||
cb_push_back(cb_id_input, 1); | ||
} | ||
} | ||
|
||
} // void kernel_main() |
45 changes: 45 additions & 0 deletions
45
ttnn/cpp/ttnn/operations/moreh/moreh/moreh_abs_pow/device/kernels/writer_moreh_abs_pow.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include <stdint.h> | ||
|
||
#include "dataflow_api.h" | ||
|
||
void kernel_main() { | ||
int i{0}; | ||
const auto output_addr = get_arg_val<uint32_t>(i++); | ||
const bool output_is_dram = get_arg_val<uint32_t>(i++) == 1; | ||
const auto num_rows_per_core = get_arg_val<uint32_t>(i++); | ||
const auto Wt = get_arg_val<uint32_t>(i++); | ||
const auto tile_offset = get_arg_val<uint32_t>(i++); | ||
|
||
uint32_t cb_id{16}; | ||
const auto cb_id_output = cb_id++; | ||
|
||
const uint32_t output_tile_bytes = get_tile_size(cb_id_output); | ||
const auto output_data_format = get_dataformat(cb_id_output); | ||
|
||
const InterleavedAddrGenFast<true> dram_output_addrg = { | ||
.bank_base_address = output_addr, .page_size = output_tile_bytes, .data_format = output_data_format}; | ||
|
||
const InterleavedAddrGenFast<false> l1_output_addrg = { | ||
.bank_base_address = output_addr, .page_size = output_tile_bytes, .data_format = output_data_format}; | ||
|
||
const auto start_tile_idx = tile_offset; | ||
const auto output_l1_read_addr = get_read_ptr(cb_id_output); | ||
|
||
for (uint32_t row_idx = 0; row_idx < num_rows_per_core; ++row_idx) { | ||
for (uint32_t col_idx = 0; col_idx < Wt; ++col_idx) { | ||
const auto tile_idx = start_tile_idx + row_idx * Wt + col_idx; | ||
cb_wait_front(cb_id_output, 1); | ||
if (output_is_dram) { | ||
noc_async_write_tile(tile_idx, dram_output_addrg, output_l1_read_addr); | ||
} else { | ||
noc_async_write_tile(tile_idx, l1_output_addrg, output_l1_read_addr); | ||
} | ||
noc_async_write_barrier(); | ||
cb_pop_front(cb_id_output, 1); | ||
} | ||
} | ||
} // void kernel_main() |
84 changes: 84 additions & 0 deletions
84
ttnn/cpp/ttnn/operations/moreh/moreh/moreh_abs_pow/device/moreh_abs_pow_device_operation.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include "moreh_abs_pow_device_operation.hpp" | ||
|
||
#include "ttnn/operations/moreh/moreh_helper_functions.hpp" | ||
#include "ttnn/tensor/tensor.hpp" | ||
|
||
namespace ttnn::operations::moreh::moreh_abs_pow { | ||
|
||
std::tuple<uint32_t, float, bool> get_floored_p_and_decimal_and_p_is_negative(float p) { | ||
auto floored_p = std::floor(p); | ||
auto decimal = p - floored_p; | ||
bool p_is_negative = floored_p < 0.0f; | ||
if (p_is_negative) { | ||
floored_p = -floored_p; | ||
} | ||
return std::make_tuple(static_cast<uint32_t>(floored_p), decimal, p_is_negative); | ||
} | ||
|
||
MorehAbsPowOperation::program_factory_t MorehAbsPowOperation::select_program_factory( | ||
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) { | ||
// Case for int32 | ||
return MorehAbsPowFactory{}; | ||
} | ||
|
||
void validate_tensors( | ||
const MorehAbsPowOperation::operation_attributes_t& operation_attributes, | ||
const MorehAbsPowOperation::tensor_args_t& tensor_args) { | ||
const auto& input = tensor_args.input; | ||
auto& output = tensor_args.output; | ||
|
||
check_tensor(input, "moreh_abs_pow", "input", {DataType::BFLOAT16, DataType::INT32}); | ||
check_tensor(output, "moreh_abs_pow", "output", {DataType::BFLOAT16, DataType::INT32}); | ||
} | ||
|
||
void MorehAbsPowOperation::validate_on_program_cache_miss( | ||
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) { | ||
validate_tensors(operation_attributes, tensor_args); | ||
}; | ||
|
||
void MorehAbsPowOperation::validate_on_program_cache_hit( | ||
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) { | ||
validate_tensors(operation_attributes, tensor_args); | ||
}; | ||
MorehAbsPowOperation::spec_return_value_t MorehAbsPowOperation::compute_output_specs( | ||
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) { | ||
if (tensor_args.output.has_value()) { | ||
return tensor_args.output->get_tensor_spec(); | ||
} | ||
const auto& input = tensor_args.input; | ||
return TensorSpec( | ||
input.get_logical_shape(), | ||
TensorLayout(input.get_dtype(), PageConfig(input.get_layout()), operation_attributes.memory_config)); | ||
} | ||
|
||
MorehAbsPowOperation::tensor_return_value_t MorehAbsPowOperation::create_output_tensors( | ||
const operation_attributes_t& operation_attributes, const tensor_args_t& tensor_args) { | ||
if (tensor_args.output.has_value()) { | ||
log_debug(tt::LogOp, "{}:{} use output tensor", __func__, __LINE__); | ||
return {tensor_args.output.value()}; | ||
} | ||
|
||
log_debug(tt::LogOp, "{}:{} create output tensor", __func__, __LINE__); | ||
return create_device_tensor(compute_output_specs(operation_attributes, tensor_args), tensor_args.input.device()); | ||
}; | ||
|
||
std::tuple<MorehAbsPowOperation::operation_attributes_t, MorehAbsPowOperation::tensor_args_t> | ||
MorehAbsPowOperation::invoke( | ||
const Tensor& input, | ||
const float p, | ||
const std::optional<Tensor>& output, | ||
const std::optional<MemoryConfig>& memory_config, | ||
const std::optional<DeviceComputeKernelConfig>& compute_kernel_config) { | ||
const operation_attributes_t operation_attributes{ | ||
p, | ||
memory_config.value_or(input.memory_config()), | ||
init_device_compute_kernel_config(input.device()->arch(), compute_kernel_config, MathFidelity::HiFi4)}; | ||
const tensor_args_t tensor_args{input, output}; | ||
|
||
return {operation_attributes, tensor_args}; | ||
} | ||
} // namespace ttnn::operations::moreh::moreh_abs_pow |
76 changes: 76 additions & 0 deletions
76
ttnn/cpp/ttnn/operations/moreh/moreh/moreh_abs_pow/device/moreh_abs_pow_device_operation.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#pragma once | ||
|
||
#include <variant> | ||
|
||
#include "ttnn/decorators.hpp" | ||
#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp" | ||
#include "ttnn/tensor/types.hpp" | ||
|
||
#define MOREH_ABS_POW_FACTORY_H(name) \ | ||
struct name { \ | ||
struct shared_variables_t { \ | ||
KernelHandle reader_kernels_id; \ | ||
KernelHandle writer_kernels_id; \ | ||
std::size_t num_cores_to_be_used; \ | ||
std::size_t num_cores_y; \ | ||
}; \ | ||
\ | ||
using cached_program_t = ttnn::device_operation::CachedProgram<shared_variables_t>; \ | ||
\ | ||
static cached_program_t create( \ | ||
const operation_attributes_t& operation_attributes, \ | ||
const tensor_args_t& tensor_args, \ | ||
tensor_return_value_t& output_tensor); \ | ||
\ | ||
static void override_runtime_arguments( \ | ||
cached_program_t& cached_program, \ | ||
const operation_attributes_t& operation_attributes, \ | ||
const tensor_args_t& tensor_args, \ | ||
tensor_return_value_t& output_tensor); \ | ||
}; | ||
|
||
namespace ttnn::operations::moreh::moreh_abs_pow { | ||
|
||
std::tuple<uint32_t, float, bool> get_floored_p_and_decimal_and_p_is_negative(float p); | ||
|
||
struct MorehAbsPowOperation { | ||
struct operation_attributes_t { | ||
const float p; | ||
|
||
const MemoryConfig memory_config; | ||
const DeviceComputeKernelConfig compute_kernel_config; | ||
}; | ||
struct tensor_args_t { | ||
const Tensor& input; | ||
const std::optional<Tensor>& output; | ||
}; | ||
|
||
using spec_return_value_t = TensorSpec; | ||
using tensor_return_value_t = Tensor; | ||
|
||
MOREH_ABS_POW_FACTORY_H(MorehAbsPowFactory) | ||
|
||
using program_factory_t = std::variant<MorehAbsPowFactory>; | ||
static program_factory_t select_program_factory(const operation_attributes_t&, const tensor_args_t&); | ||
static void validate_on_program_cache_miss(const operation_attributes_t&, const tensor_args_t&); | ||
static void validate_on_program_cache_hit(const operation_attributes_t&, const tensor_args_t&); | ||
static spec_return_value_t compute_output_specs(const operation_attributes_t&, const tensor_args_t&); | ||
static tensor_return_value_t create_output_tensors(const operation_attributes_t&, const tensor_args_t&); | ||
static std::tuple<operation_attributes_t, tensor_args_t> invoke( | ||
const Tensor& input, | ||
const float p, | ||
const std::optional<Tensor>& output, | ||
const std::optional<MemoryConfig>& memory_config, | ||
const std::optional<DeviceComputeKernelConfig>& compute_kernel_config); | ||
}; | ||
|
||
} // namespace ttnn::operations::moreh::moreh_abs_pow | ||
|
||
namespace ttnn::prim { | ||
constexpr auto moreh_abs_pow = ttnn:: | ||
register_operation<"ttnn::prim::moreh_abs_pow", ttnn::operations::moreh::moreh_abs_pow::MorehAbsPowOperation>(); | ||
} // namespace ttnn::prim |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this file changed?