#0: add inf/eps/nan into HAL

tenstorrent · Jan 9, 2025 · d46776a · d46776a
1 parent 33fda08
commit d46776a
Show file tree

Hide file tree

Showing 17 changed files with 162 additions and 219 deletions.
diff --git a/tests/end_to_end_tests/test_host_side_api.py b/tests/end_to_end_tests/test_host_side_api.py
@@ -15,11 +15,6 @@ def test_global_var_toggle_and_device_eps():
     ttnn.device.DisablePersistentKernelCache()
     ttnn.device.EnableCompilationReports()
     ttnn.device.DisableCompilationReports()
-    # Check that the tt_lib bindings take the correct path
-    # to device epsilon constants
-    assert ttnn.device.EPS_GS == 0.001953125
-    assert ttnn.device.EPS_WHB0 == 1.1920899822825959e-07
-    assert ttnn.device.EPS_BH == 1.1920899822825959e-07
 
 
 @pytest.mark.eager_host_side

diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_stats.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_stats.py
@@ -82,14 +82,3 @@ def test_run_stats_ops(self, input_shapes_and_pcc, fn_kind, device, function_lev
         run_single_pytorch_test(
             f"stats-{fn_kind}", input_shapes, datagen_func, comparison_func, device, test_args, ttnn_op=is_ttnn_op
         )
-
-
-class TestEPS:
-    def test_basic_gs(self):
-        assert ttnn.device.EPS_GS == 0.001953125
-
-    def test_basic_whb0(self):
-        assert np.isclose(ttnn.device.EPS_WHB0, 1.19209e-07)
-
-    def test_basic_bh(self):
-        assert np.isclose(ttnn.device.EPS_BH, 1.19209e-07)
diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py
@@ -11,18 +11,12 @@
 from ttnn.device import Arch
 
 
-def test_run_sfpu_attr(device):
-    assert ttnn.device.EPS_GS == 0.001953125
-    assert ttnn.device.EPS_WHB0 == 1.1920899822825959e-07
-    assert ttnn.device.EPS_BH == 1.1920899822825959e-07
-
-
 def test_run_sfpu_eps(device):
     shape = [1, 1, 32, 32]
     eps_mapping = {
-        Arch.GRAYSKULL: ttnn.device.EPS_GS,
-        Arch.WORMHOLE_B0: ttnn.device.EPS_WHB0,
-        Arch.BLACKHOLE: ttnn.device.EPS_BH,
+        Arch.GRAYSKULL: 0.001953125,
+        Arch.WORMHOLE_B0: 1.1920899822825959e-07,
+        Arch.BLACKHOLE: 1.1920899822825959e-07,
     }
     value = eps_mapping[device.arch()]
     assert np.isclose(value, device.sfpu_eps())

diff --git a/tt_metal/experimental/hal.cpp b/tt_metal/experimental/hal.cpp
@@ -41,4 +41,10 @@ uint32_t get_erisc_l1_unreserved_size() {
     return 0;
 }
 
+float get_eps() { return HalSingleton::getInstance().get_eps(); }
+
+float get_nan() { return HalSingleton::getInstance().get_nan(); }
+
+float get_inf() { return HalSingleton::getInstance().get_inf(); }
+
 }  // namespace tt::tt_metal::experimental::hal
diff --git a/tt_metal/experimental/hal.hpp b/tt_metal/experimental/hal.hpp
@@ -52,4 +52,25 @@ uint32_t get_erisc_l1_unreserved_base();
  */
 uint32_t get_erisc_l1_unreserved_size();
 
+/**
+ * @brief Uses the hardware abstraction layer to fetch the representable epsilon value.
+ *
+ * @return SFPU epsilon value
+ */
+float get_eps();
+
+/**
+ * @brief Uses the hardware abstraction layer to fetch the representable NaN value.
+ *
+ * @return SFPU NaN value
+ */
+float get_nan();
+
+/**
+ * @brief Uses the hardware abstraction layer to fetch the representable Infinity value.
+ *
+ * @return SFPU Infinity value
+ */
+float get_inf();
+
 }  // namespace tt::tt_metal::experimental::hal
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
@@ -1491,44 +1491,6 @@ std::optional<DeviceAddr> Device::lowest_occupied_compute_l1_address(tt::stl::Sp
     }
 }
 
-float Device::sfpu_eps() const {
-    switch (arch()) {
-        case tt::ARCH::GRAYSKULL: return tt::tt_metal::EPS_GS;
-        case tt::ARCH::WORMHOLE_B0: return tt::tt_metal::EPS_WHB0;
-        case tt::ARCH::BLACKHOLE: return tt::tt_metal::EPS_BH;
-        default: return std::numeric_limits<float>::epsilon();
-    }
-
-    return std::numeric_limits<float>::epsilon();
-}
-
-float Device::sfpu_nan() const {
-    switch (arch()) {
-        case tt::ARCH::GRAYSKULL: return tt::tt_metal::NAN_GS;
-        case tt::ARCH::WORMHOLE_B0: return tt::tt_metal::NAN_WHB0;
-        case tt::ARCH::BLACKHOLE: return tt::tt_metal::NAN_BH;
-        default: return std::numeric_limits<float>::quiet_NaN();
-    }
-
-    return std::numeric_limits<float>::quiet_NaN();
-}
-
-// machine inf
-float Device::sfpu_inf() const{
-
-    switch (arch()) {
-        case tt::ARCH::GRAYSKULL:
-            return tt::tt_metal::INF_GS;
-        case tt::ARCH::WORMHOLE_B0:
-            return tt::tt_metal::INF_WHB0;
-        case tt::ARCH::BLACKHOLE:
-            return tt::tt_metal::INF_BH;
-        default:
-            return std::numeric_limits<float>::infinity();
-    }
-    return std::numeric_limits<float>::infinity();
-}
-
 std::pair<int, int> Device::build_processor_type_to_index(uint32_t programmable_core, uint32_t processor_class) const {
     TT_ASSERT(programmable_core < this->build_state_indices_.size(),
         "Programmable core type {} is not included in the FW or Kernel build state", programmable_core);
@@ -2011,12 +1973,6 @@ tt::stl::Span<const std::uint32_t> v1::BankIdsFromLogicalCore(
     return device->bank_ids_from_logical_core(buffer_type, logical_core);
 }
 
-float v1::GetSfpuEps(IDevice* device) { return device->sfpu_eps(); }
-
-float v1::GetSfpuNan(IDevice* device) { return device->sfpu_nan(); }
-
-float v1::GetSfpuInf(IDevice* device) { return device->sfpu_inf(); }
-
 std::size_t v1::GetNumProgramCacheEntries(IDevice* device) { return device->num_program_cache_entries(); }
 
 }  // namespace tt_metal

diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp
@@ -157,15 +157,6 @@ class Device : public IDevice {
     uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& core) const override;
     uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& cores) const override;
 
-    // machine epsilon
-    float sfpu_eps() const override;
-
-    // machine nan
-    float sfpu_nan() const override;
-
-    // machine inf
-    float sfpu_inf() const override;
-
     const JitBuildEnv& build_env() const override { return this->build_env_; }
     const string build_firmware_target_path(uint32_t programmable_core, uint32_t processor_class, int i) const override;
     const string build_kernel_target_path(uint32_t programmable_core, uint32_t processor_class, int i, const string& kernel_name) const override;

diff --git a/tt_metal/include/tt_metal/deprecated/device.hpp b/tt_metal/include/tt_metal/deprecated/device.hpp
@@ -212,30 +212,6 @@ std::int32_t GetBankOffset(IDevice* device, BufferType buffer_type, std::uint32_
 stl::Span<const std::uint32_t> BankIdsFromLogicalCore(
     IDevice* device, BufferType buffer_type, CoreCoord logical_core);
 
-/**
- * @brief Retrieves the machine epsilon for the SFPU on the device.
- *
- * @param device The device to query.
- * @return SFPU machine epsilon.
- */
-float GetSfpuEps(IDevice* device);
-
-/**
- * @brief Retrieves the representation of NaN for the SFPU on the device.
- *
- * @param device The device to query.
- * @return SFPU NaN value.
- */
-float GetSfpuNan(IDevice* device);
-
-/**
- * @brief Retrieves the representation of infinity for the SFPU on the device.
- *
- * @param device The device to query.
- * @return SFPU infinity value.
- */
-float GetSfpuInf(IDevice* device);
-
 /**
  * @brief Retrieves the current worker mode of the device.
  *

diff --git a/tt_metal/include/tt_metal/device.hpp b/tt_metal/include/tt_metal/device.hpp
@@ -41,19 +41,6 @@ class JitBuildEnv;
 class HWCommandQueue;
 class TraceBuffer;
 
-// TODO: These should be moved into arch specific host files that get exported here
-static constexpr float  EPS_GS = 0.001953125f;
-static constexpr float  EPS_WHB0 = 1.19209e-7f;
-static constexpr float  EPS_BH = EPS_WHB0;
-
-static constexpr float  NAN_GS = 6.9752e19;
-static constexpr float  NAN_WHB0 = 7.0040e+19;
-static constexpr float  NAN_BH = NAN_WHB0;
-
-static constexpr float  INF_GS = 1.6948e38;
-static constexpr float  INF_WHB0 = 1.7014e+38;
-static constexpr float  INF_BH = INF_WHB0;
-
 inline namespace v0 {
 
 class IDevice {
@@ -171,10 +158,6 @@ class IDevice {
     virtual uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& core) const = 0;
     virtual uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& cores) const = 0;
 
-    virtual float sfpu_eps() const = 0;
-    virtual float sfpu_nan() const = 0;
-    virtual float sfpu_inf() const = 0;
-
     virtual const JitBuildEnv& build_env() const = 0;
     virtual const string build_firmware_target_path(uint32_t programmable_core, uint32_t processor_class, int i) const = 0;
     virtual const string build_kernel_target_path(uint32_t programmable_core, uint32_t processor_class, int i, const string& kernel_name) const = 0;

diff --git a/tt_metal/llrt/blackhole/bh_hal.cpp b/tt_metal/llrt/blackhole/bh_hal.cpp
@@ -21,6 +21,10 @@ constexpr static std::uint32_t DRAM_BARRIER_BASE = 0;
 constexpr static std::uint32_t DRAM_BARRIER_SIZE =
     ((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT;
 
+static constexpr float EPS_BH = 1.19209e-7f;
+static constexpr float NAN_BH = 7.0040e+19;
+static constexpr float INF_BH = 1.7014e+38;
+
 namespace tt {
 
 namespace tt_metal {
@@ -81,6 +85,10 @@ void Hal::initialize_bh() {
     this->coordinate_virtualization_enabled_ = COORDINATE_VIRTUALIZATION_ENABLED;
     this->virtual_worker_start_x_ = VIRTUAL_TENSIX_START_X;
     this->virtual_worker_start_y_ = VIRTUAL_TENSIX_START_Y;
+
+    this->eps_ = EPS_BH;
+    this->nan_ = NAN_BH;
+    this->inf_ = INF_BH;
 }
 
 }  // namespace tt_metal

diff --git a/tt_metal/llrt/grayskull/gs_hal.cpp b/tt_metal/llrt/grayskull/gs_hal.cpp
@@ -33,6 +33,10 @@ constexpr static std::uint32_t DRAM_BARRIER_BASE = 0;
 constexpr static std::uint32_t DRAM_BARRIER_SIZE =
     ((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT;
 
+static constexpr float EPS_GS = 0.001953125f;
+static constexpr float NAN_GS = 6.9752e19;
+static constexpr float INF_GS = 1.6948e38;
+
 namespace tt {
 
 namespace tt_metal {
@@ -165,6 +169,10 @@ void Hal::initialize_gs() {
     this->coordinate_virtualization_enabled_ = COORDINATE_VIRTUALIZATION_ENABLED;
     this->virtual_worker_start_x_ = VIRTUAL_TENSIX_START_X;
     this->virtual_worker_start_y_ = VIRTUAL_TENSIX_START_Y;
+
+    this->eps_ = EPS_GS;
+    this->nan_ = NAN_GS;
+    this->inf_ = INF_GS;
 }
 
 }  // namespace tt_metal

diff --git a/tt_metal/llrt/hal.hpp b/tt_metal/llrt/hal.hpp
@@ -154,6 +154,10 @@ class Hal {
     uint32_t virtual_worker_start_x_;
     uint32_t virtual_worker_start_y_;
 
+    float eps_;
+    float nan_;
+    float inf_;
+
     void initialize_gs();
     void initialize_wh();
     void initialize_bh();
@@ -171,6 +175,10 @@ class Hal {
 
     uint32_t get_num_nocs() const { return num_nocs_; }
 
+    float get_eps() const { return eps_; }
+    float get_nan() const { return nan_; }
+    float get_inf() const { return inf_; }
+
     template <typename IndexType, typename SizeType, typename CoordType>
     auto noc_coordinate(IndexType noc_index, SizeType noc_size, CoordType coord) const
         -> decltype(noc_size - 1 - coord) {

diff --git a/tt_metal/llrt/wormhole/wh_hal.cpp b/tt_metal/llrt/wormhole/wh_hal.cpp
@@ -21,6 +21,10 @@ constexpr static std::uint32_t DRAM_BARRIER_BASE = 0;
 constexpr static std::uint32_t DRAM_BARRIER_SIZE =
     ((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT;
 
+static constexpr float EPS_WHB0 = 1.19209e-7f;
+static constexpr float NAN_WHB0 = 7.0040e+19;
+static constexpr float INF_WHB0 = 1.7014e+38;
+
 namespace tt {
 
 namespace tt_metal {
@@ -82,6 +86,10 @@ void Hal::initialize_wh() {
     this->coordinate_virtualization_enabled_ = COORDINATE_VIRTUALIZATION_ENABLED;
     this->virtual_worker_start_x_ = VIRTUAL_TENSIX_START_X;
     this->virtual_worker_start_y_ = VIRTUAL_TENSIX_START_Y;
+
+    this->eps_ = EPS_WHB0;
+    this->nan_ = NAN_WHB0;
+    this->inf_ = INF_WHB0;
 }
 
 }  // namespace tt_metal