Skip to content

Commit

Permalink
#16171: Preload kernels before receiving go message
Browse files Browse the repository at this point in the history
Add a flag that lets brisc.cc start loading kernels before receiving a go
message. Fast dispatch ensures that that the flag will be set only after all
necessary program data is sent to the core.

This allows preparation for the following kernel (including loading NCRISC
IRAM, setting up CBs, and initializing local memory) to happen in parallel with
the round-trip to the dispatcher_s to sync up with the other kernels and ensure
that they're all launched at the same time.
  • Loading branch information
jbaumanTT committed Jan 13, 2025
1 parent 30f0824 commit 5d0bc7b
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 8 deletions.
14 changes: 12 additions & 2 deletions tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,14 @@ int main() {

WAYPOINT("GW");
uint8_t go_message_signal = RUN_MSG_DONE;
while ((go_message_signal = mailboxes->go_message.signal) != RUN_MSG_GO) {
// kernel_configs.preload is last in the launch message. so other data is
// valid by the time it's set. All multicast data from the dispatcher is
// written in order, so it will arrive in order. We also have a barrier
// before mcasting the launch message (as a hang workaround), which
// ensures that the unicast data will also have been received.
while (
((go_message_signal = mailboxes->go_message.signal) != RUN_MSG_GO) &&
!(mailboxes->launch[mailboxes->launch_msg_rd_ptr].kernel_config.preload & DISPATCH_ENABLE_FLAG_PRELOAD)) {
invalidate_l1_cache();
// While the go signal for kernel execution is not sent, check if the worker was signalled
// to reset its launch message read pointer.
Expand Down Expand Up @@ -436,7 +443,8 @@ int main() {
volatile tt_reg_ptr uint32_t* cfg_regs = core.cfg_regs_base(0);
cfg_regs[RISCV_IC_INVALIDATE_InvalidateAll_ADDR32] = RISCV_IC_BRISC_MASK | RISCV_IC_TRISC_ALL_MASK | RISCV_IC_NCRISC_MASK;

enum dispatch_core_processor_masks enables = (enum dispatch_core_processor_masks)launch_msg_address->kernel_config.enables;
enum dispatch_core_processor_masks enables =
(enum dispatch_core_processor_masks)launch_msg_address->kernel_config.enables;

run_triscs(enables);

Expand Down Expand Up @@ -493,6 +501,7 @@ int main() {
uint32_t end_cb_index = launch_msg_address->kernel_config.min_remote_cb_start_index;
experimental::setup_remote_cb_interfaces<true>(cb_l1_base, end_cb_index);
}
wait_for_go_message();
}
WAYPOINT("D");

Expand All @@ -517,6 +526,7 @@ int main() {
if (launch_msg_address->kernel_config.mode == DISPATCH_MODE_DEV) {
// Set launch message to invalid, so that the next time this slot is encountered, kernels are only run if a valid launch message is sent.
launch_msg_address->kernel_config.enables = 0;
launch_msg_address->kernel_config.preload = 0;
uint64_t dispatch_addr = NOC_XY_ADDR(
NOC_X(mailboxes->go_message.master_x),
NOC_Y(mailboxes->go_message.master_y),
Expand Down
3 changes: 2 additions & 1 deletion tt_metal/hw/firmware/src/brisck.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
#endif

void kernel_launch(uint32_t kernel_base_addr) {

#if defined(DEBUG_NULL_KERNELS) && !defined(DISPATCH_KERNEL)
wait_for_go_message();
#ifdef KERNEL_RUN_TIME
uint64_t end_time = c_tensix_core::read_wall_clock() + KERNEL_RUN_TIME;
while (c_tensix_core::read_wall_clock() < end_time);
Expand All @@ -40,6 +40,7 @@ void kernel_launch(uint32_t kernel_base_addr) {
#ifdef ALIGN_LOCAL_CBS_TO_REMOTE_CBS
ALIGN_LOCAL_CBS_TO_REMOTE_CBS
#endif
wait_for_go_message();
{
DeviceZoneScopedMainChildN("BRISC-KERNEL");
kernel_main();
Expand Down
7 changes: 6 additions & 1 deletion tt_metal/hw/firmware/src/ncrisck.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//
// SPDX-License-Identifier: Apache-2.0

#include <cstdint>

#include "risc_common.h"
#include "tensix.h"
#include "tensix_types.h"
Expand All @@ -28,8 +30,9 @@ uint32_t noc_nonposted_atomics_acked[NUM_NOCS];
uint32_t noc_posted_writes_num_issued[NUM_NOCS];

void kernel_launch(uint32_t kernel_base_addr) {
DeviceZoneScopedMainChildN("NCRISC-KERNEL");
#if defined(DEBUG_NULL_KERNELS) && !defined(DISPATCH_KERNEL)
wait_for_go_message();
DeviceZoneScopedMainChildN("NCRISC-KERNEL");
#ifdef KERNEL_RUN_TIME
uint64_t end_time = c_tensix_core::read_wall_clock() + KERNEL_RUN_TIME;
while (c_tensix_core::read_wall_clock() < KERNEL_RUN_TIME);
Expand All @@ -46,6 +49,8 @@ void kernel_launch(uint32_t kernel_base_addr) {
#ifdef ALIGN_LOCAL_CBS_TO_REMOTE_CBS
ALIGN_LOCAL_CBS_TO_REMOTE_CBS
#endif
wait_for_go_message();
DeviceZoneScopedMainChildN("NCRISC-KERNEL");
kernel_main();
if constexpr (NOC_MODE == DM_DEDICATED_NOC) {
WAYPOINT("NKFW");
Expand Down
8 changes: 5 additions & 3 deletions tt_metal/hw/firmware/src/trisck.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ volatile tt_reg_ptr uint * mailbox_base[4] = {
};
}

void kernel_launch(uint32_t kernel_base_addr)
{
DeviceZoneScopedMainChildN("TRISC-KERNEL");
void kernel_launch(uint32_t kernel_base_addr) {
#if defined(DEBUG_NULL_KERNELS) && !defined(DISPATCH_KERNEL)
wait_for_go_message();
DeviceZoneScopedMainChildN("TRISC-KERNEL");
#ifdef KERNEL_RUN_TIME
ckernel::wait(KERNEL_RUN_TIME);
#endif
Expand All @@ -57,6 +57,8 @@ void kernel_launch(uint32_t kernel_base_addr)
#if !defined(UCK_CHLKC_MATH) and defined ALIGN_LOCAL_CBS_TO_REMOTE_CBS
ALIGN_LOCAL_CBS_TO_REMOTE_CBS
#endif
wait_for_go_message();
DeviceZoneScopedMainChildN("TRISC-KERNEL");
run_kernel();
#endif
}
6 changes: 5 additions & 1 deletion tt_metal/hw/inc/dev_msgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ struct rta_offset_t {
// Maximums across all archs
constexpr auto NUM_PROGRAMMABLE_CORE_TYPES = 3u;
constexpr auto NUM_PROCESSORS_PER_CORE_TYPE = 5u;
enum dispatchenable_flags : uint8_t {
DISPATCH_ENABLE_FLAG_PRELOAD = 1 << 7,
};

struct kernel_config_msg_t {
volatile uint16_t watcher_kernel_ids[DISPATCH_CLASS_MAX];
Expand All @@ -122,7 +125,8 @@ struct kernel_config_msg_t {
volatile uint8_t min_remote_cb_start_index;
volatile uint8_t exit_erisc_kernel;
volatile uint8_t enables;
volatile uint8_t pad2[9];
volatile uint8_t pad2[8];
volatile uint8_t preload; // Must be at end, so it's only written when all other data is written.
} __attribute__((packed));

struct go_msg_t {
Expand Down
11 changes: 11 additions & 0 deletions tt_metal/hw/inc/firmware_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,14 @@ uint32_t firmware_config_init(

return kernel_config_base[core_type_index];
}

FORCE_INLINE
void wait_for_go_message() {
tt_l1_ptr mailboxes_t* const mailboxes = (tt_l1_ptr mailboxes_t*)(MEM_MAILBOX_BASE);

while (mailboxes->go_message.signal != RUN_MSG_GO) {
#if defined(ARCH_BLACKHOLE) && !defined(DISABLE_L1_DATA_CACHE)
asm("fence");
#endif
}
}
2 changes: 2 additions & 0 deletions tt_metal/impl/program/dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1101,6 +1101,7 @@ void assemble_device_commands(
uint32_t programmable_core_index = hal.get_programmable_core_type_index(HalProgrammableCoreType::TENSIX);
for (auto& kernel_group : program.get_kernel_groups(programmable_core_index)) {
kernel_group->launch_msg.kernel_config.mode = DISPATCH_MODE_DEV;
kernel_group->launch_msg.kernel_config.preload = DISPATCH_ENABLE_FLAG_PRELOAD;
for (uint32_t i = 0; i < NUM_PROGRAMMABLE_CORE_TYPES; i++) {
kernel_group->launch_msg.kernel_config.kernel_config_base[i] = 0;
}
Expand Down Expand Up @@ -1132,6 +1133,7 @@ void assemble_device_commands(
if (programmable_core_index != -1) {
for (auto& kernel_group : program.get_kernel_groups(programmable_core_index)) {
kernel_group->launch_msg.kernel_config.mode = DISPATCH_MODE_DEV;
kernel_group->launch_msg.kernel_config.preload = DISPATCH_ENABLE_FLAG_PRELOAD;
// Set the kernel_config_base addrs to 0 when generating the dispatch commands for the program
// Will be resolved at runtime
for (uint32_t i = 0; i < NUM_PROGRAMMABLE_CORE_TYPES; i++) {
Expand Down

0 comments on commit 5d0bc7b

Please sign in to comment.