Skip to content

Commit

Permalink
#0: Fixes from commit ae61802 (#16686)
Browse files Browse the repository at this point in the history
Add back deleted cq_id arg inside copy_host_to_device_tensor Update
remote cb sync tests to use set_sub_device_stall_group

YOLO-merging to fix perf!

### Ticket
Bug/unupdated test from original pr to remove sub_device_ids arg
propagation.

### Problem description
Add back mistakenly deleted arg and update microbenchmark tests to use
new api.

### What's changed
Describe the approach used to solve the problem.
Summarize the changes made and its impact.

### Checklist
- [ ] Post commit CI passes
https://github.com/tenstorrent/tt-metal/actions/runs/12755760197
- [ ] Blackhole Post commit (if applicable)
- [ ] Model regression CI testing passes (if applicable)
- [ ] Device performance regression CI testing passes (if applicable)
- [ ] **(For models and ops writers)** Full [new
models](https://github.com/tenstorrent/tt-metal/actions/workflows/full-new-models-suite.yaml)
tests passes
- [x] New/Existing tests provide coverage for changes

Single Card Perf:
https://github.com/tenstorrent/tt-metal/actions/runs/12755761880
T3K Perf:
https://github.com/tenstorrent/tt-metal/actions/runs/12755764705
TG Perf:
https://github.com/tenstorrent/tt-metal/actions/runs/12755767965
Perf Microbenchmark:
https://github.com/tenstorrent/tt-metal/actions/runs/12755770802
  • Loading branch information
tt-aho authored Jan 13, 2025
1 parent ea6d5c0 commit d5dcbbe
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -723,11 +723,13 @@ int main(int argc, char** argv) {
std::vector<std::pair<CoreCoord, CoreRangeSet>> sender_receiver_core_mapping = {
{ dram_reader_core_coord, l1_receiver_core }
};
std::vector<SubDeviceId> receiver_sub_device_ids = {};
if (use_sub_devices) {
SubDevice sender_sub_device = SubDevice(std::array{dram_reader_core});
SubDevice receiver_sub_device = SubDevice(std::array{l1_receiver_core});
SubDeviceManagerId sdm_id = device->create_sub_device_manager({sender_sub_device, receiver_sub_device}, 0);
device->load_sub_device_manager(sdm_id);
receiver_sub_device_ids.push_back(SubDeviceId{1});
}
////////////////////////////////////////////////////////////////////////////
// Input Setup
Expand Down Expand Up @@ -872,8 +874,18 @@ int main(int argc, char** argv) {

log_info(LogTest, "Num tests {}", num_tests);
for (uint32_t i = 0; i < num_tests; ++i) {
for (auto& program : programs) {
EnqueueProgram(device->command_queue(), program, false);
if (use_sub_devices) {
// Enqueue the sender program
EnqueueProgram(device->command_queue(), programs[0], false);
device->set_sub_device_stall_group(receiver_sub_device_ids);
for (uint32_t j = 1; j < programs.size(); ++j) {
EnqueueProgram(device->command_queue(), programs[j], false);
}
device->reset_sub_device_stall_group();
} else {
for (auto& program : programs) {
EnqueueProgram(device->command_queue(), program, false);
}
}
Finish(device->command_queue());
for (auto& program : programs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -700,11 +700,13 @@ int main(int argc, char** argv) {
}
CoreRangeSet l1_receiver_core{std::set<CoreRange>{l1_receiver_core_coord_range}};
std::vector<std::pair<CoreCoord, CoreRangeSet>> sender_receiver_core_mapping = { { dram_reader_core_coord, l1_receiver_core } };
std::vector<SubDeviceId> receiver_sub_device_ids = {};
if (use_sub_devices) {
SubDevice sender_sub_device = SubDevice(std::array{dram_reader_core});
SubDevice receiver_sub_device = SubDevice(std::array{l1_receiver_core});
SubDeviceManagerId sdm_id = device->create_sub_device_manager({sender_sub_device, receiver_sub_device}, 0);
device->load_sub_device_manager(sdm_id);
receiver_sub_device_ids.push_back(SubDeviceId{1});
}

////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -863,8 +865,18 @@ int main(int argc, char** argv) {

log_info(LogTest, "Num tests {}", num_tests);
for (uint32_t i = 0; i < num_tests; ++i) {
for (auto& program : programs) {
EnqueueProgram(device->command_queue(), program, false);
if (use_sub_devices) {
// Enqueue the sender program
EnqueueProgram(device->command_queue(), programs[0], false);
device->set_sub_device_stall_group(receiver_sub_device_ids);
for (uint32_t j = 1; j < programs.size(); ++j) {
EnqueueProgram(device->command_queue(), programs[j], false);
}
device->reset_sub_device_stall_group();
} else {
for (auto& program : programs) {
EnqueueProgram(device->command_queue(), program, false);
}
}
Finish(device->command_queue());
for (auto& program : programs) {
Expand Down
2 changes: 1 addition & 1 deletion ttnn/cpp/ttnn/operations/core/core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ ttnn::Tensor allocate_tensor_on_device(
}

void copy_host_to_device_tensor(const ttnn::Tensor& host_tensor, ttnn::Tensor device_tensor, uint8_t cq_id) {
tt::tt_metal::write_tensor(std::move(host_tensor), std::move(device_tensor));
tt::tt_metal::write_tensor(std::move(host_tensor), std::move(device_tensor), cq_id);
}

ttnn::Tensor from_device(const ttnn::Tensor& tensor, bool blocking, uint8_t cq_id) {
Expand Down

0 comments on commit d5dcbbe

Please sign in to comment.