Skip to content

Commit

Permalink
#0: seperate dispatch constants
Browse files Browse the repository at this point in the history
- Rename dispatch_constants to DispatchMemMap
- Delete old constants. New constants in DispatchConstants
- DispatchMemMap to be configured by DispatchSettings
  • Loading branch information
nhuang-tt committed Jan 29, 2025
1 parent 5ebd7d8 commit 59e073a
Show file tree
Hide file tree
Showing 33 changed files with 404 additions and 417 deletions.
2 changes: 1 addition & 1 deletion tests/tt_metal/tt_metal/device/test_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ TEST_F(DeviceFixture, TensixTestL1ToPCIeAt16BAlignedAddress) {
uint32_t base_l1_src_address = device->get_base_allocator_addr(HalMemType::L1) + hal.get_alignment(HalMemType::L1);
// This is a slow dispatch test dispatch core type is needed to query dispatch_constants
uint32_t base_pcie_dst_address =
dispatch_constants::get(CoreType::WORKER).get_host_command_queue_addr(CommandQueueHostAddrType::UNRESERVED) +
DispatchMemMap::get(CoreType::WORKER).get_host_command_queue_addr(CommandQueueHostAddrType::UNRESERVED) +
hal.get_alignment(HalMemType::L1);

uint32_t size_bytes = 2048 * 128;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ void test_EnqueueWriteBuffer_and_EnqueueReadBuffer(IDevice* device, CommandQueue
uint32_t cq_size = device->sysmem_manager().get_cq_size();
CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device->id());
uint32_t cq_start =
dispatch_constants::get(dispatch_core_type).get_host_command_queue_addr(CommandQueueHostAddrType::UNRESERVED);
DispatchMemMap::get(dispatch_core_type).get_host_command_queue_addr(CommandQueueHostAddrType::UNRESERVED);

std::vector<uint32_t> cq_zeros((cq_size - cq_start) / sizeof(uint32_t), 0);

Expand Down Expand Up @@ -549,7 +549,7 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestPageLargerThanAndUnalignedToTran
for (IDevice* device : devices_) {
TestBufferConfig config = {
.num_pages = num_round_robins * (device->num_banks(BufferType::DRAM)),
.page_size = dispatch_constants::TRANSFER_PAGE_SIZE + 32,
.page_size = DispatchConstants::TRANSFER_PAGE_SIZE + 32,
.buftype = BufferType::DRAM};
local_test_functions::test_EnqueueWriteBuffer_and_EnqueueReadBuffer(device, device->command_queue(), config);
}
Expand All @@ -559,8 +559,7 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestPageLargerThanMaxPrefetchCommand
constexpr uint32_t num_round_robins = 1;
for (IDevice* device : devices_) {
CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device->id());
const uint32_t max_prefetch_command_size =
dispatch_constants::get(dispatch_core_type).max_prefetch_command_size();
const uint32_t max_prefetch_command_size = DispatchMemMap::get(dispatch_core_type).max_prefetch_command_size();
TestBufferConfig config = {
.num_pages = 1, .page_size = max_prefetch_command_size + 2048, .buftype = BufferType::DRAM};
local_test_functions::test_EnqueueWriteBuffer_and_EnqueueReadBuffer(device, device->command_queue(), config);
Expand All @@ -571,8 +570,7 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestUnalignedPageLargerThanMaxPrefet
constexpr uint32_t num_round_robins = 1;
for (IDevice* device : devices_) {
CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device->id());
const uint32_t max_prefetch_command_size =
dispatch_constants::get(dispatch_core_type).max_prefetch_command_size();
const uint32_t max_prefetch_command_size = DispatchMemMap::get(dispatch_core_type).max_prefetch_command_size();
uint32_t unaligned_page_size = max_prefetch_command_size + 4;
TestBufferConfig config = {.num_pages = 1, .page_size = unaligned_page_size, .buftype = BufferType::DRAM};
local_test_functions::test_EnqueueWriteBuffer_and_EnqueueReadBuffer(device, device->command_queue(), config);
Expand Down Expand Up @@ -613,8 +611,8 @@ TEST_F(CommandQueueSingleCardBufferFixture, TestWrapHostHugepageOnEnqueueReadBuf
uint32_t page_size = 2048;
uint32_t command_issue_region_size = device->sysmem_manager().get_issue_queue_size(0);
CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device->id());
uint32_t cq_start = dispatch_constants::get(dispatch_core_type)
.get_host_command_queue_addr(CommandQueueHostAddrType::UNRESERVED);
uint32_t cq_start =
DispatchMemMap::get(dispatch_core_type).get_host_command_queue_addr(CommandQueueHostAddrType::UNRESERVED);

uint32_t max_command_size = command_issue_region_size - cq_start;
uint32_t buffer = 14240;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ TEST_F(MultiCommandQueueSingleDeviceEventFixture, TestEventsEnqueueWaitForEventC
for (uint cq_id = 0; cq_id < cqs.size(); cq_id++) {
for (size_t i = 0; i < num_cmds_per_cq * num_events_per_cq; i++) {
uint32_t host_addr =
completion_queue_base[cq_id] + i * dispatch_constants::TRANSFER_PAGE_SIZE + sizeof(CQDispatchCmd);
completion_queue_base[cq_id] + i * DispatchConstants::TRANSFER_PAGE_SIZE + sizeof(CQDispatchCmd);
tt::Cluster::instance().read_sysmem(&event, 4, host_addr, mmio_device_id, channel);
log_debug(
tt::LogTest,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using std::vector;
using namespace tt::tt_metal;

constexpr uint32_t completion_queue_event_offset = sizeof(CQDispatchCmd);
constexpr uint32_t completion_queue_page_size = dispatch_constants::TRANSFER_PAGE_SIZE;
constexpr uint32_t completion_queue_page_size = DispatchConstants::TRANSFER_PAGE_SIZE;

enum class DataMovementMode : uint8_t { WRITE = 0, READ = 1 };

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ int main(int argc, char** argv) {
uint32_t host_write_ptr = 0;

CoreType dispatch_core_type = dispatch_core_manager::instance().get_dispatch_core_type(device_id);
uint32_t prefetch_q_base = dispatch_constants::get(dispatch_core_type)
uint32_t prefetch_q_base = DispatchMemMap::get(dispatch_core_type)
.get_device_command_queue_addr(CommandQueueDeviceAddrType::UNRESERVED);

uint32_t reg_addr = prefetch_q_base;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,8 @@ int main(int argc, char** argv) {
vec.resize(page_size_g / sizeof(uint32_t));

CoreType core_type = dispatch_core_manager::instance().get_dispatch_core_type(device->id());
uint32_t dispatch_l1_unreserved_base = dispatch_constants::get(core_type).get_device_command_queue_addr(
CommandQueueDeviceAddrType::UNRESERVED);
uint32_t dispatch_l1_unreserved_base =
DispatchMemMap::get(core_type).get_device_command_queue_addr(CommandQueueDeviceAddrType::UNRESERVED);
for (int i = 0; i < warmup_iterations_g; i++) {
if (source_mem_g == 4) {
tt::Cluster::instance().read_core(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -436,8 +436,7 @@ int main(int argc, char** argv) {

// Want different buffers on each core, instead use big buffer and self-manage it
uint32_t dispatch_l1_unreserved_base =
dispatch_constants::get(CoreType::WORKER)
.get_device_command_queue_addr(CommandQueueDeviceAddrType::UNRESERVED);
DispatchMemMap::get(CoreType::WORKER).get_device_command_queue_addr(CommandQueueDeviceAddrType::UNRESERVED);
uint32_t l1_buf_base = tt::align(dispatch_l1_unreserved_base, dispatch_buffer_page_size_g);
TT_ASSERT((l1_buf_base & (dispatch_buffer_page_size_g - 1)) == 0);

Expand Down Expand Up @@ -526,13 +525,13 @@ int main(int argc, char** argv) {
const uint32_t prefetch_sync_sem = spoof_prefetch_core_sem_1_id;

const uint32_t host_completion_queue_wr_ptr =
dispatch_constants::get(CoreType::WORKER)
DispatchMemMap::get(CoreType::WORKER)
.get_host_command_queue_addr(CommandQueueHostAddrType::COMPLETION_Q_WR);
const uint32_t dev_completion_queue_wr_ptr =
dispatch_constants::get(CoreType::WORKER)
DispatchMemMap::get(CoreType::WORKER)
.get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_WR);
const uint32_t dev_completion_queue_rd_ptr =
dispatch_constants::get(CoreType::WORKER)
DispatchMemMap::get(CoreType::WORKER)
.get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);

std::vector<uint32_t> dispatch_compile_args = {
Expand All @@ -558,8 +557,8 @@ int main(int argc, char** argv) {
0, // prefetch_downstream_buffer_pages
num_compute_cores, // max_write_packed_cores
0,
dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
dispatch_constants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES,
DispatchConstants::DISPATCH_MESSAGE_ENTRIES,
DispatchConstants::DISPATCH_GO_SIGNAL_NOC_DATA_ENTRIES,
0,
0,
0,
Expand Down
Loading

0 comments on commit 59e073a

Please sign in to comment.