Skip to content

Commit

Permalink
#0: add inf/eps/nan into HAL
Browse files Browse the repository at this point in the history
  • Loading branch information
cfjchu committed Jan 9, 2025
1 parent 33fda08 commit d46776a
Show file tree
Hide file tree
Showing 17 changed files with 162 additions and 219 deletions.
5 changes: 0 additions & 5 deletions tests/end_to_end_tests/test_host_side_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@ def test_global_var_toggle_and_device_eps():
ttnn.device.DisablePersistentKernelCache()
ttnn.device.EnableCompilationReports()
ttnn.device.DisableCompilationReports()
# Check that the tt_lib bindings take the correct path
# to device epsilon constants
assert ttnn.device.EPS_GS == 0.001953125
assert ttnn.device.EPS_WHB0 == 1.1920899822825959e-07
assert ttnn.device.EPS_BH == 1.1920899822825959e-07


@pytest.mark.eager_host_side
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,3 @@ def test_run_stats_ops(self, input_shapes_and_pcc, fn_kind, device, function_lev
run_single_pytorch_test(
f"stats-{fn_kind}", input_shapes, datagen_func, comparison_func, device, test_args, ttnn_op=is_ttnn_op
)


class TestEPS:
def test_basic_gs(self):
assert ttnn.device.EPS_GS == 0.001953125

def test_basic_whb0(self):
assert np.isclose(ttnn.device.EPS_WHB0, 1.19209e-07)

def test_basic_bh(self):
assert np.isclose(ttnn.device.EPS_BH, 1.19209e-07)
12 changes: 3 additions & 9 deletions tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,12 @@
from ttnn.device import Arch


def test_run_sfpu_attr(device):
assert ttnn.device.EPS_GS == 0.001953125
assert ttnn.device.EPS_WHB0 == 1.1920899822825959e-07
assert ttnn.device.EPS_BH == 1.1920899822825959e-07


def test_run_sfpu_eps(device):
shape = [1, 1, 32, 32]
eps_mapping = {
Arch.GRAYSKULL: ttnn.device.EPS_GS,
Arch.WORMHOLE_B0: ttnn.device.EPS_WHB0,
Arch.BLACKHOLE: ttnn.device.EPS_BH,
Arch.GRAYSKULL: 0.001953125,
Arch.WORMHOLE_B0: 1.1920899822825959e-07,
Arch.BLACKHOLE: 1.1920899822825959e-07,
}
value = eps_mapping[device.arch()]
assert np.isclose(value, device.sfpu_eps())
Expand Down
6 changes: 6 additions & 0 deletions tt_metal/experimental/hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,10 @@ uint32_t get_erisc_l1_unreserved_size() {
return 0;
}

float get_eps() { return HalSingleton::getInstance().get_eps(); }

float get_nan() { return HalSingleton::getInstance().get_nan(); }

float get_inf() { return HalSingleton::getInstance().get_inf(); }

} // namespace tt::tt_metal::experimental::hal
21 changes: 21 additions & 0 deletions tt_metal/experimental/hal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,25 @@ uint32_t get_erisc_l1_unreserved_base();
*/
uint32_t get_erisc_l1_unreserved_size();

/**
* @brief Uses the hardware abstraction layer to fetch the representable epsilon value.
*
* @return SFPU epsilon value
*/
float get_eps();

/**
* @brief Uses the hardware abstraction layer to fetch the representable NaN value.
*
* @return SFPU NaN value
*/
float get_nan();

/**
* @brief Uses the hardware abstraction layer to fetch the representable Infinity value.
*
* @return SFPU Infinity value
*/
float get_inf();

} // namespace tt::tt_metal::experimental::hal
44 changes: 0 additions & 44 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1491,44 +1491,6 @@ std::optional<DeviceAddr> Device::lowest_occupied_compute_l1_address(tt::stl::Sp
}
}

float Device::sfpu_eps() const {
switch (arch()) {
case tt::ARCH::GRAYSKULL: return tt::tt_metal::EPS_GS;
case tt::ARCH::WORMHOLE_B0: return tt::tt_metal::EPS_WHB0;
case tt::ARCH::BLACKHOLE: return tt::tt_metal::EPS_BH;
default: return std::numeric_limits<float>::epsilon();
}

return std::numeric_limits<float>::epsilon();
}

float Device::sfpu_nan() const {
switch (arch()) {
case tt::ARCH::GRAYSKULL: return tt::tt_metal::NAN_GS;
case tt::ARCH::WORMHOLE_B0: return tt::tt_metal::NAN_WHB0;
case tt::ARCH::BLACKHOLE: return tt::tt_metal::NAN_BH;
default: return std::numeric_limits<float>::quiet_NaN();
}

return std::numeric_limits<float>::quiet_NaN();
}

// machine inf
float Device::sfpu_inf() const{

switch (arch()) {
case tt::ARCH::GRAYSKULL:
return tt::tt_metal::INF_GS;
case tt::ARCH::WORMHOLE_B0:
return tt::tt_metal::INF_WHB0;
case tt::ARCH::BLACKHOLE:
return tt::tt_metal::INF_BH;
default:
return std::numeric_limits<float>::infinity();
}
return std::numeric_limits<float>::infinity();
}

std::pair<int, int> Device::build_processor_type_to_index(uint32_t programmable_core, uint32_t processor_class) const {
TT_ASSERT(programmable_core < this->build_state_indices_.size(),
"Programmable core type {} is not included in the FW or Kernel build state", programmable_core);
Expand Down Expand Up @@ -2011,12 +1973,6 @@ tt::stl::Span<const std::uint32_t> v1::BankIdsFromLogicalCore(
return device->bank_ids_from_logical_core(buffer_type, logical_core);
}

float v1::GetSfpuEps(IDevice* device) { return device->sfpu_eps(); }

float v1::GetSfpuNan(IDevice* device) { return device->sfpu_nan(); }

float v1::GetSfpuInf(IDevice* device) { return device->sfpu_inf(); }

std::size_t v1::GetNumProgramCacheEntries(IDevice* device) { return device->num_program_cache_entries(); }

} // namespace tt_metal
Expand Down
9 changes: 0 additions & 9 deletions tt_metal/impl/device/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,6 @@ class Device : public IDevice {
uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& core) const override;
uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& cores) const override;

// machine epsilon
float sfpu_eps() const override;

// machine nan
float sfpu_nan() const override;

// machine inf
float sfpu_inf() const override;

const JitBuildEnv& build_env() const override { return this->build_env_; }
const string build_firmware_target_path(uint32_t programmable_core, uint32_t processor_class, int i) const override;
const string build_kernel_target_path(uint32_t programmable_core, uint32_t processor_class, int i, const string& kernel_name) const override;
Expand Down
24 changes: 0 additions & 24 deletions tt_metal/include/tt_metal/deprecated/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,30 +212,6 @@ std::int32_t GetBankOffset(IDevice* device, BufferType buffer_type, std::uint32_
stl::Span<const std::uint32_t> BankIdsFromLogicalCore(
IDevice* device, BufferType buffer_type, CoreCoord logical_core);

/**
* @brief Retrieves the machine epsilon for the SFPU on the device.
*
* @param device The device to query.
* @return SFPU machine epsilon.
*/
float GetSfpuEps(IDevice* device);

/**
* @brief Retrieves the representation of NaN for the SFPU on the device.
*
* @param device The device to query.
* @return SFPU NaN value.
*/
float GetSfpuNan(IDevice* device);

/**
* @brief Retrieves the representation of infinity for the SFPU on the device.
*
* @param device The device to query.
* @return SFPU infinity value.
*/
float GetSfpuInf(IDevice* device);

/**
* @brief Retrieves the current worker mode of the device.
*
Expand Down
17 changes: 0 additions & 17 deletions tt_metal/include/tt_metal/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,6 @@ class JitBuildEnv;
class HWCommandQueue;
class TraceBuffer;

// TODO: These should be moved into arch specific host files that get exported here
static constexpr float EPS_GS = 0.001953125f;
static constexpr float EPS_WHB0 = 1.19209e-7f;
static constexpr float EPS_BH = EPS_WHB0;

static constexpr float NAN_GS = 6.9752e19;
static constexpr float NAN_WHB0 = 7.0040e+19;
static constexpr float NAN_BH = NAN_WHB0;

static constexpr float INF_GS = 1.6948e38;
static constexpr float INF_WHB0 = 1.7014e+38;
static constexpr float INF_BH = INF_WHB0;

inline namespace v0 {

class IDevice {
Expand Down Expand Up @@ -171,10 +158,6 @@ class IDevice {
virtual uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& core) const = 0;
virtual uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& cores) const = 0;

virtual float sfpu_eps() const = 0;
virtual float sfpu_nan() const = 0;
virtual float sfpu_inf() const = 0;

virtual const JitBuildEnv& build_env() const = 0;
virtual const string build_firmware_target_path(uint32_t programmable_core, uint32_t processor_class, int i) const = 0;
virtual const string build_kernel_target_path(uint32_t programmable_core, uint32_t processor_class, int i, const string& kernel_name) const = 0;
Expand Down
8 changes: 8 additions & 0 deletions tt_metal/llrt/blackhole/bh_hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ constexpr static std::uint32_t DRAM_BARRIER_BASE = 0;
constexpr static std::uint32_t DRAM_BARRIER_SIZE =
((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT;

static constexpr float EPS_BH = 1.19209e-7f;
static constexpr float NAN_BH = 7.0040e+19;
static constexpr float INF_BH = 1.7014e+38;

namespace tt {

namespace tt_metal {
Expand Down Expand Up @@ -81,6 +85,10 @@ void Hal::initialize_bh() {
this->coordinate_virtualization_enabled_ = COORDINATE_VIRTUALIZATION_ENABLED;
this->virtual_worker_start_x_ = VIRTUAL_TENSIX_START_X;
this->virtual_worker_start_y_ = VIRTUAL_TENSIX_START_Y;

this->eps_ = EPS_BH;
this->nan_ = NAN_BH;
this->inf_ = INF_BH;
}

} // namespace tt_metal
Expand Down
8 changes: 8 additions & 0 deletions tt_metal/llrt/grayskull/gs_hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ constexpr static std::uint32_t DRAM_BARRIER_BASE = 0;
constexpr static std::uint32_t DRAM_BARRIER_SIZE =
((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT;

static constexpr float EPS_GS = 0.001953125f;
static constexpr float NAN_GS = 6.9752e19;
static constexpr float INF_GS = 1.6948e38;

namespace tt {

namespace tt_metal {
Expand Down Expand Up @@ -165,6 +169,10 @@ void Hal::initialize_gs() {
this->coordinate_virtualization_enabled_ = COORDINATE_VIRTUALIZATION_ENABLED;
this->virtual_worker_start_x_ = VIRTUAL_TENSIX_START_X;
this->virtual_worker_start_y_ = VIRTUAL_TENSIX_START_Y;

this->eps_ = EPS_GS;
this->nan_ = NAN_GS;
this->inf_ = INF_GS;
}

} // namespace tt_metal
Expand Down
8 changes: 8 additions & 0 deletions tt_metal/llrt/hal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ class Hal {
uint32_t virtual_worker_start_x_;
uint32_t virtual_worker_start_y_;

float eps_;
float nan_;
float inf_;

void initialize_gs();
void initialize_wh();
void initialize_bh();
Expand All @@ -171,6 +175,10 @@ class Hal {

uint32_t get_num_nocs() const { return num_nocs_; }

float get_eps() const { return eps_; }
float get_nan() const { return nan_; }
float get_inf() const { return inf_; }

template <typename IndexType, typename SizeType, typename CoordType>
auto noc_coordinate(IndexType noc_index, SizeType noc_size, CoordType coord) const
-> decltype(noc_size - 1 - coord) {
Expand Down
8 changes: 8 additions & 0 deletions tt_metal/llrt/wormhole/wh_hal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ constexpr static std::uint32_t DRAM_BARRIER_BASE = 0;
constexpr static std::uint32_t DRAM_BARRIER_SIZE =
((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT;

static constexpr float EPS_WHB0 = 1.19209e-7f;
static constexpr float NAN_WHB0 = 7.0040e+19;
static constexpr float INF_WHB0 = 1.7014e+38;

namespace tt {

namespace tt_metal {
Expand Down Expand Up @@ -82,6 +86,10 @@ void Hal::initialize_wh() {
this->coordinate_virtualization_enabled_ = COORDINATE_VIRTUALIZATION_ENABLED;
this->virtual_worker_start_x_ = VIRTUAL_TENSIX_START_X;
this->virtual_worker_start_y_ = VIRTUAL_TENSIX_START_Y;

this->eps_ = EPS_WHB0;
this->nan_ = NAN_WHB0;
this->inf_ = INF_WHB0;
}

} // namespace tt_metal
Expand Down
Loading

0 comments on commit d46776a

Please sign in to comment.