Skip to content

Commit

Permalink
[UMD] Switching to new coord API (#17003)
Browse files Browse the repository at this point in the history
### Ticket
Related to #17002

### Problem description
One of the many changes leading towards tt-metal using new CoreCoords,
both in soc_descriptor and in umd's interface.
Due to complexity of all the maps present, the changes will be
relatively small.

### What's changed
- virtual_core_from_physical_core and
get_virtual_coordinate_from_physical_coordinates rewritten to use
translate_coord_to instead of convert_to_umd_coordinates and
translate_to_noc_table_coords
- removed worker_log_to_physical_routing and
physical_routing_to_virtual_routing from metal_soc_descriptor
- get_physical_tensix_core_from_logical and convert_to_umd_coordinates
rewritten to use translate_coord_to
- Rewritten some get_physical_core_from_logical_core to
get_physical_tensix_core_from_logical
- grayskull soc descriptor changed so that it is rowwise instead of
columnwise. This matches the way it is written in UMD, and it matches
other soc descriptors.
- bump umd to pull in some new changes in UMD

### Checklist
All runs on brosko/soc_new_api :
- [x] All post-commit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/12986193862
- [x] Blackhole post-commit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/12986195849
- [x] (Single-card) Model perf tests :
https://github.com/tenstorrent/tt-metal/actions/runs/12986198348
- [x] (Single-card) Device perf regressions :
https://github.com/tenstorrent/tt-metal/actions/runs/12986201144
- [x] (T3K) T3000 unit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/12986203715
- [x] (T3K) T3000 demo tests :
https://github.com/tenstorrent/tt-metal/actions/runs/12986205723
- [x] (TG) TG unit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/12986207280
- [x] (TG) TG demo tests :
https://github.com/tenstorrent/tt-metal/actions/runs/12986209173
- [x] (TGG) TGG unit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/12986211545
- [x] (TGG) TGG demo tests :
https://github.com/tenstorrent/tt-metal/actions/runs/12986213812
  • Loading branch information
broskoTT authored and williamlyTT committed Jan 30, 2025
1 parent 7deb7b2 commit 3001628
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 95 deletions.
2 changes: 1 addition & 1 deletion tt_metal/api/tt-metalium/device_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ class Device : public IDevice {
CoreCoord physical_worker_core_from_logical_core(const CoreCoord &logical_core) const;
CoreCoord dram_core_from_dram_channel(uint32_t dram_channel) const;
CoreType core_type_from_physical_core(const CoreCoord &physical_core) const;
CoreCoord virtual_core_from_physical_core(const CoreCoord &physical_coord, const CoreType& core_type) const;
CoreCoord virtual_core_from_physical_core(const CoreCoord& physical_coord) const;

chip_id_t id_;
uint32_t build_key_ = 0;
Expand Down
6 changes: 0 additions & 6 deletions tt_metal/api/tt-metalium/metal_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,6 @@ struct metal_SocDescriptor : public tt_SocDescriptor {
std::vector<tt_xy_pair> physical_harvested_workers;
std::vector<tt_xy_pair> physical_ethernet_cores;

std::unordered_map<int, int> worker_log_to_physical_routing_x;
std::unordered_map<int, int> worker_log_to_physical_routing_y;
// Physical to virtual maps are only applicable for x and y of tensix workers
std::unordered_map<int, int> physical_routing_to_virtual_routing_x;
std::unordered_map<int, int> physical_routing_to_virtual_routing_y;

std::map<CoreCoord, int> logical_eth_core_to_chan_map;
std::map<int, CoreCoord> chan_to_logical_eth_core_map;

Expand Down
2 changes: 1 addition & 1 deletion tt_metal/api/tt-metalium/tt_cluster.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class Cluster {

const metal_SocDescriptor &get_soc_desc(chip_id_t chip) const;
CoreCoord get_virtual_coordinate_from_logical_coordinates(chip_id_t chip_id, CoreCoord logical_coord, const CoreType& core_type) const;
CoreCoord get_virtual_coordinate_from_physical_coordinates(chip_id_t chip_id, CoreCoord physical_coord, const CoreType& core_type) const;
CoreCoord get_virtual_coordinate_from_physical_coordinates(chip_id_t chip_id, CoreCoord physical_coord) const;
tt_cxy_pair get_virtual_coordinate_from_logical_coordinates(tt_cxy_pair logical_coordinate, const CoreType& core_type) const;
const std::unordered_set<CoreCoord>& get_virtual_worker_cores(chip_id_t chip_id) const;
const std::unordered_set<CoreCoord>& get_virtual_eth_cores(chip_id_t chip_id) const;
Expand Down
8 changes: 4 additions & 4 deletions tt_metal/common/core_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,10 @@ const std::tuple<uint32_t, CoreRange>& get_physical_worker_grid_config(
uint32_t tensix_num_worker_cores = tensix_num_worker_cols * tensix_num_worker_rows;
const metal_SocDescriptor& soc_desc = tt::Cluster::instance().get_soc_desc(device_id);
// Get physical compute grid range based on SOC Desc and Logical Coords
CoreCoord tensix_worker_start_phys = soc_desc.get_physical_core_from_logical_core(
CoreCoord(0, 0), CoreType::WORKER); // Logical Worker Coords start at 0,0
CoreCoord tensix_worker_end_phys = soc_desc.get_physical_core_from_logical_core(
CoreCoord(tensix_num_worker_cols - 1, tensix_num_worker_rows - 1), CoreType::WORKER);
// Logical Worker Coords start at 0,0
CoreCoord tensix_worker_start_phys = soc_desc.get_physical_tensix_core_from_logical(CoreCoord(0, 0));
CoreCoord tensix_worker_end_phys = soc_desc.get_physical_tensix_core_from_logical(
CoreCoord(tensix_num_worker_cols - 1, tensix_num_worker_rows - 1));
CoreRange tensix_worker_physical_grid = CoreRange(tensix_worker_start_phys, tensix_worker_end_phys);
physical_grid_config_cache.insert(
{config_hash, std::make_tuple(tensix_num_worker_cores, tensix_worker_physical_grid)});
Expand Down
52 changes: 13 additions & 39 deletions tt_metal/common/metal_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,9 @@ CoreCoord metal_SocDescriptor::get_logical_ethernet_core_from_physical(const Cor
}

CoreCoord metal_SocDescriptor::get_physical_tensix_core_from_logical(const CoreCoord& logical_coord) const {
TT_FATAL(
(logical_coord.x < this->worker_grid_size.x) and (logical_coord.y < this->worker_grid_size.y),
"Bounds-Error -- Logical_core={} is outside of logical_grid_size={}",
logical_coord.str(),
this->worker_grid_size.str());
CoreCoord physical_tensix_core({
static_cast<size_t>(this->worker_log_to_physical_routing_x.at(logical_coord.x)),
static_cast<size_t>(this->worker_log_to_physical_routing_y.at(logical_coord.y)),
});
return physical_tensix_core;
tt::umd::CoreCoord physical_coord =
translate_coord_to({logical_coord, CoreType::TENSIX, CoordSystem::LOGICAL}, CoordSystem::PHYSICAL);
return {physical_coord.x, physical_coord.y};
}

CoreCoord metal_SocDescriptor::get_physical_dram_core_from_logical(const CoreCoord& logical_coord) const {
Expand Down Expand Up @@ -205,33 +198,20 @@ void metal_SocDescriptor::load_dram_metadata_from_device_descriptor() {

// UMD expects virtual NOC coordinates for worker cores
tt_cxy_pair metal_SocDescriptor::convert_to_umd_coordinates(const tt_cxy_pair& physical_cxy) const {
CoreCoord physical_coord({physical_cxy.x, physical_cxy.y});
const CoreDescriptor& core_desc = this->physical_cores.at(physical_coord);
CoreCoord virtual_coord = physical_coord;
if (core_desc.type == CoreType::WORKER or core_desc.type == CoreType::HARVESTED) {
virtual_coord.x = static_cast<size_t>(this->physical_routing_to_virtual_routing_x.at(physical_cxy.x));
virtual_coord.y = static_cast<size_t>(this->physical_routing_to_virtual_routing_y.at(physical_cxy.y));
}
return tt_cxy_pair(physical_cxy.chip, virtual_coord);
CoordSystem target_system = (this->arch == tt::ARCH::GRAYSKULL) ? CoordSystem::PHYSICAL : CoordSystem::VIRTUAL;
tt::umd::CoreCoord virtual_coord =
translate_coord_to((tt_xy_pair)physical_cxy, CoordSystem::PHYSICAL, target_system);
return tt_cxy_pair(physical_cxy.chip, virtual_coord.x, virtual_coord.y);
}

void metal_SocDescriptor::generate_physical_descriptors_from_virtual(uint32_t harvesting_mask) {
// No need to remap virtual descriptors to physical because Grayskull does not have translation tables enabled,
// meaning UMD removes the physical harvested rows rather than using virtual coordinates
if (harvesting_mask == 0 or (this->arch == tt::ARCH::GRAYSKULL)) {
this->worker_log_to_physical_routing_x = this->worker_log_to_routing_x;
this->worker_log_to_physical_routing_y = this->worker_log_to_routing_y;
this->physical_cores = this->cores;
this->physical_workers = this->workers;
this->physical_harvested_workers = this->harvested_workers;

for (const auto& [virtual_noc_core, core_desc] : this->cores) {
if (core_desc.type == CoreType::WORKER or core_desc.type == CoreType::HARVESTED) {
this->physical_routing_to_virtual_routing_x.insert({virtual_noc_core.x, virtual_noc_core.x});
this->physical_routing_to_virtual_routing_y.insert({virtual_noc_core.y, virtual_noc_core.y});
}
}

return;
}

Expand Down Expand Up @@ -264,10 +244,8 @@ void metal_SocDescriptor::generate_physical_descriptors_from_virtual(uint32_t ha
for (const auto& [virtual_noc_core, core_desc] : this->cores) {
if (core_desc.type == CoreType::WORKER or core_desc.type == CoreType::HARVESTED) {
virtual_y_coords.insert(virtual_noc_core.y);
this->physical_routing_to_virtual_routing_x.insert({virtual_noc_core.x, virtual_noc_core.x});
}
}
this->worker_log_to_physical_routing_x = this->worker_log_to_routing_x;

std::unordered_map<int, int> virtual_routing_to_physical_routing_y;
auto virtual_y_coord_it = virtual_y_coords.begin();
Expand All @@ -278,10 +256,12 @@ void metal_SocDescriptor::generate_physical_descriptors_from_virtual(uint32_t ha
}
int physical_y_coord = *virtual_y_coord_it;
virtual_y_coord_it++;
this->worker_log_to_physical_routing_y.insert({logical_y_coord, physical_y_coord});
int virtual_y_coord = this->worker_log_to_routing_y.at(logical_y_coord);
this->physical_routing_to_virtual_routing_y.insert({physical_y_coord, virtual_y_coord});
virtual_routing_to_physical_routing_y.insert({virtual_y_coord, physical_y_coord});
// This branch will never be executed for Grayskull, but for completeness keeping it in here.
// This will go away in the next PR anyway.
CoordSystem target_system = (this->arch == tt::ARCH::GRAYSKULL) ? CoordSystem::PHYSICAL : CoordSystem::VIRTUAL;
tt::umd::CoreCoord virtual_coord =
translate_coord_to({0, logical_y_coord, CoreType::TENSIX, CoordSystem::LOGICAL}, target_system);
virtual_routing_to_physical_routing_y.insert({virtual_coord.y, physical_y_coord});
}

// map physical harvested rows to virtual harvested rows
Expand All @@ -290,7 +270,6 @@ void metal_SocDescriptor::generate_physical_descriptors_from_virtual(uint32_t ha
v_it != virtual_harvested_rows.end() and p_it != row_coordinates_to_remove.end();
++v_it, ++p_it) {
virtual_routing_to_physical_routing_y.insert({*v_it, *p_it});
this->physical_routing_to_virtual_routing_y.insert({*p_it, *v_it});
}

for (const auto& [virtual_noc_core, core_desc] : this->cores) {
Expand All @@ -309,11 +288,6 @@ void metal_SocDescriptor::generate_physical_descriptors_from_virtual(uint32_t ha
}
this->physical_cores.insert({physical_noc_core, phys_core_desc});
}

TT_ASSERT(
this->physical_routing_to_virtual_routing_y.size() ==
this->worker_grid_size.y + row_coordinates_to_remove.size() and
this->physical_routing_to_virtual_routing_x.size() == this->worker_grid_size.x);
}

void metal_SocDescriptor::generate_logical_eth_coords_mapping() {
Expand Down
24 changes: 13 additions & 11 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,8 @@ std::unique_ptr<Allocator> Device::initialize_allocator(size_t l1_small_size, si
}
// Initialize core_type_from_noc_coord_table table
for (const auto& core: soc_desc.physical_cores) {
config.core_type_from_noc_coord_table.insert({this->virtual_core_from_physical_core(core.first, core.second.type), AllocCoreType::Invalid});
config.core_type_from_noc_coord_table.insert(
{this->virtual_core_from_physical_core(core.first), AllocCoreType::Invalid});
}

for (const CoreCoord& core : tt::get_logical_compute_cores(id_, num_hw_cqs_, dispatch_core_config)) {
Expand Down Expand Up @@ -720,7 +721,7 @@ void Device::initialize_and_launch_firmware() {
// Track Virtual Non Worker Cores (In this case only Eth) separately
uint32_t virtual_non_worker_cores_idx = 0;
for (const CoreCoord &core : eth_cores) {
auto virtual_core = this->virtual_core_from_physical_core(core, CoreType::ETH);
auto virtual_core = this->virtual_core_from_physical_core(core);
core_info->virtual_non_worker_cores[virtual_non_worker_cores_idx++] = {virtual_core.x, virtual_core.y, AddressableCoreType::ETH};
}
}
Expand Down Expand Up @@ -1173,11 +1174,12 @@ CoreCoord Device::compute_with_storage_grid_size() const {
}

CoreType Device::core_type_from_physical_core(const CoreCoord &physical_coord) const {
const metal_SocDescriptor &soc_desc = tt::Cluster::instance().get_soc_desc(this->id_);
if (soc_desc.physical_cores.find(physical_coord) == soc_desc.physical_cores.end())
TT_THROW("Physical core {} doesn't exist in metal_SocDescriptor.", physical_coord);

return soc_desc.physical_cores.at(physical_coord).type;
const metal_SocDescriptor& soc_desc = tt::Cluster::instance().get_soc_desc(this->id_);
CoreType core_type = soc_desc.translate_coord_to(physical_coord, CoordSystem::PHYSICAL, CoordSystem::PHYSICAL).core_type;
if (core_type == CoreType::TENSIX) {
core_type = CoreType::WORKER;
}
return core_type;
}

CoreType Device::core_type_from_virtual_core(const CoreCoord &virtual_coord) const {
Expand All @@ -1196,7 +1198,7 @@ CoreCoord Device::virtual_noc0_coordinate(uint8_t noc_index, CoreCoord coord) co
} else {
const auto& grid_size = this->grid_size();
// Coordinate in Physical NOC0 Space. Convert to Virtual.
coord = this->virtual_core_from_physical_core(coord, this->core_type_from_physical_core(coord));
coord = this->virtual_core_from_physical_core(coord);
// Derive virtual coord in noc_index space.
CoreCoord virtual_coord = {
hal.noc_coordinate(noc_index, grid_size.x, coord.x),
Expand All @@ -1219,7 +1221,7 @@ CoreCoord Device::virtual_noc_coordinate(uint8_t noc_index, CoreCoord coord) con
hal.noc_coordinate(noc_index, grid_size.x, coord.x),
hal.noc_coordinate(noc_index, grid_size.y, coord.y)
};
return this->virtual_core_from_physical_core(physical_coord, this->core_type_from_physical_core(physical_coord));
return this->virtual_core_from_physical_core(physical_coord);
}
}

Expand Down Expand Up @@ -1248,8 +1250,8 @@ CoreCoord Device::virtual_core_from_logical_core(const CoreCoord &logical_coord,
return tt::Cluster::instance().get_virtual_coordinate_from_logical_coordinates(this->id_, logical_coord, core_type);
}

CoreCoord Device::virtual_core_from_physical_core(const CoreCoord &physical_coord, const CoreType& core_type) const {
return tt::Cluster::instance().get_virtual_coordinate_from_physical_coordinates(this->id_, physical_coord, core_type);
CoreCoord Device::virtual_core_from_physical_core(const CoreCoord& physical_coord) const {
return tt::Cluster::instance().get_virtual_coordinate_from_physical_coordinates(this->id_, physical_coord);
}

CoreCoord Device::worker_core_from_logical_core(const CoreCoord &logical_core) const {
Expand Down
36 changes: 16 additions & 20 deletions tt_metal/llrt/tt_cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,8 @@ void Cluster::generate_virtual_to_umd_coord_mapping() {
this->virtual_eth_cores_[chip_id] = {};
for (auto& core_desc : this->get_soc_desc(chip_id).physical_cores) {
if (core_desc.second.type != CoreType::HARVESTED) {
CoreCoord virtual_coords = this->get_virtual_coordinate_from_physical_coordinates(chip_id, core_desc.first, core_desc.second.type);
CoreCoord virtual_coords =
this->get_virtual_coordinate_from_physical_coordinates(chip_id, core_desc.first);
tt_cxy_pair virtual_core = tt_cxy_pair(chip_id, virtual_coords.x, virtual_coords.y);
tt_cxy_pair umd_core = this->get_soc_desc(chip_id).convert_to_umd_coordinates(tt_cxy_pair(chip_id, core_desc.first.x, core_desc.first.y));
this->virtual_to_umd_coord_mapping_[virtual_core] = umd_core;
Expand All @@ -370,18 +371,19 @@ void Cluster::generate_logical_to_virtual_coord_mapping() {
this->worker_logical_to_virtual_y_.insert({board_type, {}});
this->eth_logical_to_virtual_.insert({board_type, {}});
for (auto x_coords : soc_desc.worker_log_to_routing_x) {
CoreCoord phys_core = soc_desc.get_physical_core_from_logical_core(CoreCoord(x_coords.first, 0), CoreType::WORKER);
CoreCoord virtual_coords = this->get_virtual_coordinate_from_physical_coordinates(chip_id, phys_core, CoreType::WORKER);
CoreCoord phys_core = soc_desc.get_physical_tensix_core_from_logical(CoreCoord(x_coords.first, 0));
CoreCoord virtual_coords = this->get_virtual_coordinate_from_physical_coordinates(chip_id, phys_core);
this->worker_logical_to_virtual_x_.at(board_type).insert({x_coords.first, virtual_coords.x});
}
for (auto y_coords : soc_desc.worker_log_to_routing_y) {
CoreCoord phys_core = soc_desc.get_physical_core_from_logical_core(CoreCoord(0, y_coords.first), CoreType::WORKER);
CoreCoord virtual_coords = this->get_virtual_coordinate_from_physical_coordinates(chip_id, phys_core, CoreType::WORKER);
CoreCoord phys_core = soc_desc.get_physical_tensix_core_from_logical(CoreCoord(0, y_coords.first));
CoreCoord virtual_coords = this->get_virtual_coordinate_from_physical_coordinates(chip_id, phys_core);
this->worker_logical_to_virtual_y_.at(board_type).insert({y_coords.first, virtual_coords.y});
}
for (std::size_t log_eth_core_y = 0; log_eth_core_y < soc_desc.physical_ethernet_cores.size(); log_eth_core_y++) {
CoreCoord logical_eth_core = {0, log_eth_core_y};
CoreCoord virtual_coords = this->get_virtual_coordinate_from_physical_coordinates(chip_id, soc_desc.physical_ethernet_cores.at(log_eth_core_y), CoreType::ETH);
CoreCoord virtual_coords = this->get_virtual_coordinate_from_physical_coordinates(
chip_id, soc_desc.physical_ethernet_cores.at(log_eth_core_y));
this->eth_logical_to_virtual_.at(board_type).insert({logical_eth_core, virtual_coords});
}
}
Expand All @@ -398,11 +400,10 @@ void Cluster::generate_virtual_to_profiler_flat_id_mapping() {
this->virtual_routing_to_profiler_flat_id_.insert({board_type, {}});
auto& soc_desc = this->get_soc_desc(chip_id);
for (const auto& core_to_profiler_id : soc_desc.physical_routing_to_profiler_flat_id) {
if (std::find(soc_desc.physical_workers.begin(), soc_desc.physical_workers.end(), core_to_profiler_id.first) != soc_desc.physical_workers.end()) {
this->virtual_routing_to_profiler_flat_id_.at(board_type).insert({this->get_virtual_coordinate_from_physical_coordinates(chip_id, core_to_profiler_id.first, CoreType::WORKER), core_to_profiler_id.second});
} else {
this->virtual_routing_to_profiler_flat_id_.at(board_type).insert({this->get_virtual_coordinate_from_physical_coordinates(chip_id, core_to_profiler_id.first, CoreType::ETH), core_to_profiler_id.second});
}
this->virtual_routing_to_profiler_flat_id_.at(board_type)
.insert(
{this->get_virtual_coordinate_from_physical_coordinates(chip_id, core_to_profiler_id.first),
core_to_profiler_id.second});
}
}
#endif
Expand Down Expand Up @@ -441,16 +442,11 @@ tt_cxy_pair Cluster::get_virtual_coordinate_from_logical_coordinates(tt_cxy_pair
auto xy_virtual_coord = this->get_virtual_coordinate_from_logical_coordinates(logical_coordinate.chip, CoreCoord(logical_coordinate.x, logical_coordinate.y), core_type);
return tt_cxy_pair(logical_coordinate.chip, xy_virtual_coord);
}
CoreCoord Cluster::get_virtual_coordinate_from_physical_coordinates(chip_id_t chip_id, CoreCoord physical_coord, const CoreType& core_type) const {
CoreCoord Cluster::get_virtual_coordinate_from_physical_coordinates(chip_id_t chip_id, CoreCoord physical_coord) const {
auto& soc_desc = this->get_soc_desc(chip_id);
if ((not (core_type == CoreType::WORKER or core_type == CoreType::ETH)) or this->target_type_ == TargetDevice::Simulator) {
return physical_coord;
}
tt_cxy_pair virtual_chip_coord = soc_desc.convert_to_umd_coordinates(tt_cxy_pair(chip_id, physical_coord.x, physical_coord.y));
std::size_t c = virtual_chip_coord.x;
std::size_t r = virtual_chip_coord.y;
this->driver_->translate_to_noc_table_coords(chip_id, r, c);
return CoreCoord{c, r};
tt::umd::CoreCoord translated_coord =
soc_desc.translate_coord_to(physical_coord, CoordSystem::PHYSICAL, CoordSystem::TRANSLATED);
return {translated_coord.x, translated_coord.y};
}

CoreCoord Cluster::get_logical_ethernet_core_from_virtual(chip_id_t chip, CoreCoord core) const {
Expand Down
Loading

0 comments on commit 3001628

Please sign in to comment.