diff --git a/tests/end_to_end_tests/test_host_side_api.py b/tests/end_to_end_tests/test_host_side_api.py index a6ff4a0e2f3..36a3743b6ff 100644 --- a/tests/end_to_end_tests/test_host_side_api.py +++ b/tests/end_to_end_tests/test_host_side_api.py @@ -15,11 +15,6 @@ def test_global_var_toggle_and_device_eps(): ttnn.device.DisablePersistentKernelCache() ttnn.device.EnableCompilationReports() ttnn.device.DisableCompilationReports() - # Check that the tt_lib bindings take the correct path - # to device epsilon constants - assert ttnn.device.EPS_GS == 0.001953125 - assert ttnn.device.EPS_WHB0 == 1.1920899822825959e-07 - assert ttnn.device.EPS_BH == 1.1920899822825959e-07 @pytest.mark.eager_host_side diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_stats.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_stats.py index 3f731c1f119..417f25c1950 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_stats.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_stats.py @@ -82,14 +82,3 @@ def test_run_stats_ops(self, input_shapes_and_pcc, fn_kind, device, function_lev run_single_pytorch_test( f"stats-{fn_kind}", input_shapes, datagen_func, comparison_func, device, test_args, ttnn_op=is_ttnn_op ) - - -class TestEPS: - def test_basic_gs(self): - assert ttnn.device.EPS_GS == 0.001953125 - - def test_basic_whb0(self): - assert np.isclose(ttnn.device.EPS_WHB0, 1.19209e-07) - - def test_basic_bh(self): - assert np.isclose(ttnn.device.EPS_BH, 1.19209e-07) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py index 27045816934..caac3eba6aa 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py @@ -11,18 +11,12 @@ from ttnn.device import Arch -def test_run_sfpu_attr(device): - assert ttnn.device.EPS_GS == 0.001953125 - assert ttnn.device.EPS_WHB0 == 1.1920899822825959e-07 - assert ttnn.device.EPS_BH == 1.1920899822825959e-07 - - def test_run_sfpu_eps(device): shape = [1, 1, 32, 32] eps_mapping = { - Arch.GRAYSKULL: ttnn.device.EPS_GS, - Arch.WORMHOLE_B0: ttnn.device.EPS_WHB0, - Arch.BLACKHOLE: ttnn.device.EPS_BH, + Arch.GRAYSKULL: 0.001953125, + Arch.WORMHOLE_B0: 1.1920899822825959e-07, + Arch.BLACKHOLE: 1.1920899822825959e-07, } value = eps_mapping[device.arch()] assert np.isclose(value, device.sfpu_eps()) diff --git a/tt_metal/experimental/hal.cpp b/tt_metal/experimental/hal.cpp index c748f34e2ed..007a1c1c682 100644 --- a/tt_metal/experimental/hal.cpp +++ b/tt_metal/experimental/hal.cpp @@ -41,4 +41,10 @@ uint32_t get_erisc_l1_unreserved_size() { return 0; } +float get_eps() { return HalSingleton::getInstance().get_eps(); } + +float get_nan() { return HalSingleton::getInstance().get_nan(); } + +float get_inf() { return HalSingleton::getInstance().get_inf(); } + } // namespace tt::tt_metal::experimental::hal diff --git a/tt_metal/experimental/hal.hpp b/tt_metal/experimental/hal.hpp index 3d9b4108913..223ab493370 100644 --- a/tt_metal/experimental/hal.hpp +++ b/tt_metal/experimental/hal.hpp @@ -52,4 +52,25 @@ uint32_t get_erisc_l1_unreserved_base(); */ uint32_t get_erisc_l1_unreserved_size(); +/** + * @brief Uses the hardware abstraction layer to fetch the representable epsilon value. + * + * @return SFPU epsilon value + */ +float get_eps(); + +/** + * @brief Uses the hardware abstraction layer to fetch the representable NaN value. + * + * @return SFPU NaN value + */ +float get_nan(); + +/** + * @brief Uses the hardware abstraction layer to fetch the representable Infinity value. + * + * @return SFPU Infinity value + */ +float get_inf(); + } // namespace tt::tt_metal::experimental::hal diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp index 742a53b8976..3910aa679b5 100644 --- a/tt_metal/impl/device/device.cpp +++ b/tt_metal/impl/device/device.cpp @@ -1491,44 +1491,6 @@ std::optional Device::lowest_occupied_compute_l1_address(tt::stl::Sp } } -float Device::sfpu_eps() const { - switch (arch()) { - case tt::ARCH::GRAYSKULL: return tt::tt_metal::EPS_GS; - case tt::ARCH::WORMHOLE_B0: return tt::tt_metal::EPS_WHB0; - case tt::ARCH::BLACKHOLE: return tt::tt_metal::EPS_BH; - default: return std::numeric_limits::epsilon(); - } - - return std::numeric_limits::epsilon(); -} - -float Device::sfpu_nan() const { - switch (arch()) { - case tt::ARCH::GRAYSKULL: return tt::tt_metal::NAN_GS; - case tt::ARCH::WORMHOLE_B0: return tt::tt_metal::NAN_WHB0; - case tt::ARCH::BLACKHOLE: return tt::tt_metal::NAN_BH; - default: return std::numeric_limits::quiet_NaN(); - } - - return std::numeric_limits::quiet_NaN(); -} - -// machine inf -float Device::sfpu_inf() const{ - - switch (arch()) { - case tt::ARCH::GRAYSKULL: - return tt::tt_metal::INF_GS; - case tt::ARCH::WORMHOLE_B0: - return tt::tt_metal::INF_WHB0; - case tt::ARCH::BLACKHOLE: - return tt::tt_metal::INF_BH; - default: - return std::numeric_limits::infinity(); - } - return std::numeric_limits::infinity(); -} - std::pair Device::build_processor_type_to_index(uint32_t programmable_core, uint32_t processor_class) const { TT_ASSERT(programmable_core < this->build_state_indices_.size(), "Programmable core type {} is not included in the FW or Kernel build state", programmable_core); @@ -2011,12 +1973,6 @@ tt::stl::Span v1::BankIdsFromLogicalCore( return device->bank_ids_from_logical_core(buffer_type, logical_core); } -float v1::GetSfpuEps(IDevice* device) { return device->sfpu_eps(); } - -float v1::GetSfpuNan(IDevice* device) { return device->sfpu_nan(); } - -float v1::GetSfpuInf(IDevice* device) { return device->sfpu_inf(); } - std::size_t v1::GetNumProgramCacheEntries(IDevice* device) { return device->num_program_cache_entries(); } } // namespace tt_metal diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp index 88bfdeb4b91..f2e64a4fadb 100644 --- a/tt_metal/impl/device/device.hpp +++ b/tt_metal/impl/device/device.hpp @@ -157,15 +157,6 @@ class Device : public IDevice { uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& core) const override; uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& cores) const override; - // machine epsilon - float sfpu_eps() const override; - - // machine nan - float sfpu_nan() const override; - - // machine inf - float sfpu_inf() const override; - const JitBuildEnv& build_env() const override { return this->build_env_; } const string build_firmware_target_path(uint32_t programmable_core, uint32_t processor_class, int i) const override; const string build_kernel_target_path(uint32_t programmable_core, uint32_t processor_class, int i, const string& kernel_name) const override; diff --git a/tt_metal/include/tt_metal/deprecated/device.hpp b/tt_metal/include/tt_metal/deprecated/device.hpp index 944fb6bb3e3..7054bc1f772 100644 --- a/tt_metal/include/tt_metal/deprecated/device.hpp +++ b/tt_metal/include/tt_metal/deprecated/device.hpp @@ -212,30 +212,6 @@ std::int32_t GetBankOffset(IDevice* device, BufferType buffer_type, std::uint32_ stl::Span BankIdsFromLogicalCore( IDevice* device, BufferType buffer_type, CoreCoord logical_core); -/** - * @brief Retrieves the machine epsilon for the SFPU on the device. - * - * @param device The device to query. - * @return SFPU machine epsilon. - */ -float GetSfpuEps(IDevice* device); - -/** - * @brief Retrieves the representation of NaN for the SFPU on the device. - * - * @param device The device to query. - * @return SFPU NaN value. - */ -float GetSfpuNan(IDevice* device); - -/** - * @brief Retrieves the representation of infinity for the SFPU on the device. - * - * @param device The device to query. - * @return SFPU infinity value. - */ -float GetSfpuInf(IDevice* device); - /** * @brief Retrieves the current worker mode of the device. * diff --git a/tt_metal/include/tt_metal/device.hpp b/tt_metal/include/tt_metal/device.hpp index 3cb6a375706..eee2e6db894 100644 --- a/tt_metal/include/tt_metal/device.hpp +++ b/tt_metal/include/tt_metal/device.hpp @@ -41,19 +41,6 @@ class JitBuildEnv; class HWCommandQueue; class TraceBuffer; -// TODO: These should be moved into arch specific host files that get exported here -static constexpr float EPS_GS = 0.001953125f; -static constexpr float EPS_WHB0 = 1.19209e-7f; -static constexpr float EPS_BH = EPS_WHB0; - -static constexpr float NAN_GS = 6.9752e19; -static constexpr float NAN_WHB0 = 7.0040e+19; -static constexpr float NAN_BH = NAN_WHB0; - -static constexpr float INF_GS = 1.6948e38; -static constexpr float INF_WHB0 = 1.7014e+38; -static constexpr float INF_BH = INF_WHB0; - inline namespace v0 { class IDevice { @@ -171,10 +158,6 @@ class IDevice { virtual uint32_t get_noc_unicast_encoding(uint8_t noc_index, const CoreCoord& core) const = 0; virtual uint32_t get_noc_multicast_encoding(uint8_t noc_index, const CoreRange& cores) const = 0; - virtual float sfpu_eps() const = 0; - virtual float sfpu_nan() const = 0; - virtual float sfpu_inf() const = 0; - virtual const JitBuildEnv& build_env() const = 0; virtual const string build_firmware_target_path(uint32_t programmable_core, uint32_t processor_class, int i) const = 0; virtual const string build_kernel_target_path(uint32_t programmable_core, uint32_t processor_class, int i, const string& kernel_name) const = 0; diff --git a/tt_metal/llrt/blackhole/bh_hal.cpp b/tt_metal/llrt/blackhole/bh_hal.cpp index 5c6513a264a..f78b93be6bb 100644 --- a/tt_metal/llrt/blackhole/bh_hal.cpp +++ b/tt_metal/llrt/blackhole/bh_hal.cpp @@ -21,6 +21,10 @@ constexpr static std::uint32_t DRAM_BARRIER_BASE = 0; constexpr static std::uint32_t DRAM_BARRIER_SIZE = ((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT; +static constexpr float EPS_BH = 1.19209e-7f; +static constexpr float NAN_BH = 7.0040e+19; +static constexpr float INF_BH = 1.7014e+38; + namespace tt { namespace tt_metal { @@ -81,6 +85,10 @@ void Hal::initialize_bh() { this->coordinate_virtualization_enabled_ = COORDINATE_VIRTUALIZATION_ENABLED; this->virtual_worker_start_x_ = VIRTUAL_TENSIX_START_X; this->virtual_worker_start_y_ = VIRTUAL_TENSIX_START_Y; + + this->eps_ = EPS_BH; + this->nan_ = NAN_BH; + this->inf_ = INF_BH; } } // namespace tt_metal diff --git a/tt_metal/llrt/grayskull/gs_hal.cpp b/tt_metal/llrt/grayskull/gs_hal.cpp index 8538b7495ab..0c867682ef0 100644 --- a/tt_metal/llrt/grayskull/gs_hal.cpp +++ b/tt_metal/llrt/grayskull/gs_hal.cpp @@ -33,6 +33,10 @@ constexpr static std::uint32_t DRAM_BARRIER_BASE = 0; constexpr static std::uint32_t DRAM_BARRIER_SIZE = ((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT; +static constexpr float EPS_GS = 0.001953125f; +static constexpr float NAN_GS = 6.9752e19; +static constexpr float INF_GS = 1.6948e38; + namespace tt { namespace tt_metal { @@ -165,6 +169,10 @@ void Hal::initialize_gs() { this->coordinate_virtualization_enabled_ = COORDINATE_VIRTUALIZATION_ENABLED; this->virtual_worker_start_x_ = VIRTUAL_TENSIX_START_X; this->virtual_worker_start_y_ = VIRTUAL_TENSIX_START_Y; + + this->eps_ = EPS_GS; + this->nan_ = NAN_GS; + this->inf_ = INF_GS; } } // namespace tt_metal diff --git a/tt_metal/llrt/hal.hpp b/tt_metal/llrt/hal.hpp index 7810c4c409e..7082a604638 100644 --- a/tt_metal/llrt/hal.hpp +++ b/tt_metal/llrt/hal.hpp @@ -154,6 +154,10 @@ class Hal { uint32_t virtual_worker_start_x_; uint32_t virtual_worker_start_y_; + float eps_; + float nan_; + float inf_; + void initialize_gs(); void initialize_wh(); void initialize_bh(); @@ -171,6 +175,10 @@ class Hal { uint32_t get_num_nocs() const { return num_nocs_; } + float get_eps() const { return eps_; } + float get_nan() const { return nan_; } + float get_inf() const { return inf_; } + template auto noc_coordinate(IndexType noc_index, SizeType noc_size, CoordType coord) const -> decltype(noc_size - 1 - coord) { diff --git a/tt_metal/llrt/wormhole/wh_hal.cpp b/tt_metal/llrt/wormhole/wh_hal.cpp index a82bad6c6b3..b514f79939c 100644 --- a/tt_metal/llrt/wormhole/wh_hal.cpp +++ b/tt_metal/llrt/wormhole/wh_hal.cpp @@ -21,6 +21,10 @@ constexpr static std::uint32_t DRAM_BARRIER_BASE = 0; constexpr static std::uint32_t DRAM_BARRIER_SIZE = ((sizeof(uint32_t) + DRAM_ALIGNMENT - 1) / DRAM_ALIGNMENT) * DRAM_ALIGNMENT; +static constexpr float EPS_WHB0 = 1.19209e-7f; +static constexpr float NAN_WHB0 = 7.0040e+19; +static constexpr float INF_WHB0 = 1.7014e+38; + namespace tt { namespace tt_metal { @@ -82,6 +86,10 @@ void Hal::initialize_wh() { this->coordinate_virtualization_enabled_ = COORDINATE_VIRTUALIZATION_ENABLED; this->virtual_worker_start_x_ = VIRTUAL_TENSIX_START_X; this->virtual_worker_start_y_ = VIRTUAL_TENSIX_START_Y; + + this->eps_ = EPS_WHB0; + this->nan_ = NAN_WHB0; + this->inf_ = INF_WHB0; } } // namespace tt_metal diff --git a/ttnn/cpp/pybind11/device.cpp b/ttnn/cpp/pybind11/device.cpp index 0c8f118dd04..5e4dc24cf27 100644 --- a/ttnn/cpp/pybind11/device.cpp +++ b/ttnn/cpp/pybind11/device.cpp @@ -16,7 +16,7 @@ #include "tt_metal/host_api.hpp" #include "tt_metal/impl/trace/trace.hpp" #include "ttnn/operations/experimental/auto_format/auto_format.hpp" - +#include "tt_metal/experimental/hal.hpp" using namespace tt::tt_metal; namespace py = pybind11; @@ -132,47 +132,49 @@ void device_module(py::module& m_device) { .def(py::self == py::self) .def(py::self != py::self); - auto pyIDevice = static_cast>>(m_device.attr("IDevice")) - .def("id", &IDevice::id, "Device's ID") - .def("arch", &IDevice::arch, "Device's arch") - .def( - "compute_with_storage_grid_size", - &IDevice::compute_with_storage_grid_size, - "Grid size (x, y) denoting region that can be targeted by ops") - .def("dram_grid_size", &IDevice::dram_grid_size, "Grid size (x, y) denoting dram cores that can be targeted") - .def( - "worker_core_from_logical_core", - &IDevice::worker_core_from_logical_core, - "Convert a logical core coordinate into a physical worker core coordinate") - .def( - "enable_program_cache", - &IDevice::enable_program_cache, - "Enable caching for all programs sent to this device") - .def( - "disable_and_clear_program_cache", - &IDevice::disable_and_clear_program_cache, - "Disable and clear program cache for this device") - .def( - "num_program_cache_entries", - &IDevice::num_program_cache_entries, - "Number of entries in the program cache for this device") - .def("enable_async", &IDevice::enable_async) - .def( - "create_sub_device_manager", - [](IDevice* device, - const std::vector& sub_devices, - DeviceAddr local_l1_size) -> SubDeviceManagerId { - SubDeviceManagerId sub_device_manager_id; - device->push_work( - [device, sub_devices, local_l1_size, &sub_device_manager_id] { - sub_device_manager_id = device->create_sub_device_manager(sub_devices, local_l1_size); - }, - /*blocking=*/true); - return sub_device_manager_id; - }, - py::arg("sub_devices"), - py::arg("local_l1_size"), - R"doc( + auto pyIDevice = + static_cast>>(m_device.attr("IDevice")) + .def("id", &IDevice::id, "Device's ID") + .def("arch", &IDevice::arch, "Device's arch") + .def( + "compute_with_storage_grid_size", + &IDevice::compute_with_storage_grid_size, + "Grid size (x, y) denoting region that can be targeted by ops") + .def( + "dram_grid_size", &IDevice::dram_grid_size, "Grid size (x, y) denoting dram cores that can be targeted") + .def( + "worker_core_from_logical_core", + &IDevice::worker_core_from_logical_core, + "Convert a logical core coordinate into a physical worker core coordinate") + .def( + "enable_program_cache", + &IDevice::enable_program_cache, + "Enable caching for all programs sent to this device") + .def( + "disable_and_clear_program_cache", + &IDevice::disable_and_clear_program_cache, + "Disable and clear program cache for this device") + .def( + "num_program_cache_entries", + &IDevice::num_program_cache_entries, + "Number of entries in the program cache for this device") + .def("enable_async", &IDevice::enable_async) + .def( + "create_sub_device_manager", + [](IDevice* device, + const std::vector& sub_devices, + DeviceAddr local_l1_size) -> SubDeviceManagerId { + SubDeviceManagerId sub_device_manager_id; + device->push_work( + [device, sub_devices, local_l1_size, &sub_device_manager_id] { + sub_device_manager_id = device->create_sub_device_manager(sub_devices, local_l1_size); + }, + /*blocking=*/true); + return sub_device_manager_id; + }, + py::arg("sub_devices"), + py::arg("local_l1_size"), + R"doc( Creates a sub-device manager for the given device. Args: @@ -182,21 +184,21 @@ void device_module(py::module& m_device) { Returns: SubDeviceManagerId: The ID of the created sub-device manager. )doc") - .def( - "create_sub_device_manager_with_fabric", - [](IDevice* device, const std::vector& sub_devices, DeviceAddr local_l1_size) { - std::tuple manager_and_sub_device_ids; - device->push_work( - [device, sub_devices, local_l1_size, &manager_and_sub_device_ids] { - manager_and_sub_device_ids = - device->create_sub_device_manager_with_fabric(sub_devices, local_l1_size); - }, - /*blocking=*/true); - return manager_and_sub_device_ids; - }, - py::arg("sub_devices"), - py::arg("local_l1_size"), - R"doc( + .def( + "create_sub_device_manager_with_fabric", + [](IDevice* device, const std::vector& sub_devices, DeviceAddr local_l1_size) { + std::tuple manager_and_sub_device_ids; + device->push_work( + [device, sub_devices, local_l1_size, &manager_and_sub_device_ids] { + manager_and_sub_device_ids = + device->create_sub_device_manager_with_fabric(sub_devices, local_l1_size); + }, + /*blocking=*/true); + return manager_and_sub_device_ids; + }, + py::arg("sub_devices"), + py::arg("local_l1_size"), + R"doc( Creates a sub-device manager for the given device. This will automatically create a sub-device of ethernet cores for use with fabric. Note that this is a temporary API until migration to actual fabric is complete. @@ -208,33 +210,33 @@ void device_module(py::module& m_device) { SubDeviceManagerId: The ID of the created sub-device manager. SubDeviceId: The ID of the sub-device that will be used for fabric. )doc") - .def( - "load_sub_device_manager", - [](IDevice* device, SubDeviceManagerId sub_device_manager_id) { - device->push_work( - [device, sub_device_manager_id] { device->load_sub_device_manager(sub_device_manager_id); }); - }, - py::arg("sub_device_manager_id"), - R"doc( + .def( + "load_sub_device_manager", + [](IDevice* device, SubDeviceManagerId sub_device_manager_id) { + device->push_work( + [device, sub_device_manager_id] { device->load_sub_device_manager(sub_device_manager_id); }); + }, + py::arg("sub_device_manager_id"), + R"doc( Loads the sub-device manager with the given ID. Args: sub_device_manager_id (SubDeviceManagerId): The ID of the sub-device manager to load. )doc") - .def( - "clear_loaded_sub_device_manager", - [](IDevice* device) { device->push_work([device] { device->clear_loaded_sub_device_manager(); }); }, - R"doc( + .def( + "clear_loaded_sub_device_manager", + [](IDevice* device) { device->push_work([device] { device->clear_loaded_sub_device_manager(); }); }, + R"doc( Clears the loaded sub-device manager for the given device. )doc") - .def( - "remove_sub_device_manager", - [](IDevice* device, SubDeviceManagerId sub_device_manager_id) { - device->push_work( - [device, sub_device_manager_id] { device->remove_sub_device_manager(sub_device_manager_id); }); - }, - py::arg("sub_device_manager_id"), - R"doc( + .def( + "remove_sub_device_manager", + [](IDevice* device, SubDeviceManagerId sub_device_manager_id) { + device->push_work( + [device, sub_device_manager_id] { device->remove_sub_device_manager(sub_device_manager_id); }); + }, + py::arg("sub_device_manager_id"), + R"doc( Removes the sub-device manager with the given ID. Args: @@ -261,9 +263,18 @@ void device_module(py::module& m_device) { Resets the sub_device_ids that will be stalled on by default for Fast Dispatch commands such as reading, writing, synchronizing back to all SubDevice IDs. )doc") - .def("sfpu_eps", &IDevice::sfpu_eps, R"doc(Returns machine epsilon value for current device.)doc") - .def("sfpu_nan", &IDevice::sfpu_nan, R"doc(Returns NaN value for current device.)doc") - .def("sfpu_inf", &IDevice::sfpu_inf, R"doc(Returns Infinity value for current device.)doc"); + .def( + "sfpu_eps", + [](IDevice* device) { return tt::tt_metal::experimental::hal::get_eps(); }, + R"doc(Returns machine epsilon value for current architecture.)doc") + .def( + "sfpu_nan", + [](IDevice* device) { return tt::tt_metal::experimental::hal::get_nan(); }, + R"doc(Returns NaN value for current architecture.)doc") + .def( + "sfpu_inf", + [](IDevice* device) { return tt::tt_metal::experimental::hal::get_inf(); }, + R"doc(Returns Infinity value for current architecture.)doc"); auto pyDevice = static_cast>>(m_device.attr("Device")); pyDevice @@ -276,19 +287,6 @@ void device_module(py::module& m_device) { py::arg("l1_small_size") = DEFAULT_L1_SMALL_SIZE, py::arg("trace_region_size") = DEFAULT_TRACE_REGION_SIZE); - // *** eps constant *** - m_device.attr("EPS_GS") = EPS_GS; - m_device.attr("EPS_WHB0") = EPS_WHB0; - m_device.attr("EPS_BH") = EPS_BH; - - m_device.attr("NAN_GS") = NAN_GS; - m_device.attr("NAN_WHB0") = NAN_WHB0; - m_device.attr("NAN_BH") = NAN_BH; - - m_device.attr("INF_GS") = INF_GS; - m_device.attr("INF_WHB0") = INF_WHB0; - m_device.attr("INF_BH") = INF_BH; - m_device.def( "CreateDevice", [](int device_id, diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp index a441d668eb9..153b3a32130 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp @@ -9,6 +9,7 @@ #include "ttnn/operations/eltwise/unary/unary.hpp" #include "ttnn/types.hpp" #include "tt_metal/common/bfloat16.hpp" +#include "tt_metal/experimental/hal.hpp" #include "ttnn/operations/eltwise/binary/binary_composite.hpp" #include "ttnn/cpp/ttnn/operations/eltwise/ternary/where.hpp" #include "ttnn/cpp/ttnn/operations/copy.hpp" @@ -65,7 +66,7 @@ Tensor _addalpha( // nextafter Tensor _nextafter(const Tensor& input_a, const Tensor& input_b, const std::optional& output_mem_config) { - const float eps = input_a.device()->sfpu_eps(); + const float eps = tt::tt_metal::experimental::hal::get_eps(); Tensor result(input_a); { Tensor eps_gt(input_a); diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp index d5dd9086f66..470b6aa7c71 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/unary/device/unary_composite_op.cpp @@ -22,6 +22,7 @@ #include "ttnn/run_operation.hpp" #include "ttnn/types.hpp" #include "ttnn/operations/data_movement/bcast/bcast.hpp" +#include "tt_metal/experimental/hal.hpp" namespace ttnn::operations::unary { @@ -66,7 +67,7 @@ Tensor _acosh(const Tensor& input_a, const std::optional& output_m // input > 1, output is acosh(input) Tensor nan_res = ttnn::multiply( ttnn::le(input_a, t_one, std::nullopt, output_mem_config), - input_a.device()->sfpu_nan(), + tt::tt_metal::experimental::hal::get_nan(), std::nullopt, output_mem_config); t_result = ttnn::multiply( @@ -821,13 +822,16 @@ Tensor _logit(const Tensor& input_a, float eps, const std::optionalsfpu_inf(), std::nullopt, output_mem_config); + ttnn::sign(input_a, output_mem_config), + tt::tt_metal::experimental::hal::get_inf(), + std::nullopt, + output_mem_config); Tensor logit_result = ttnn::where( ttnn::eq(logit_input, 1.0, std::nullopt, output_mem_config), t_inf, ttnn::where( ttnn::ltz(log_input, output_mem_config), - input_a.device()->sfpu_nan(), + tt::tt_metal::experimental::hal::get_nan(), ttnn::log(log_input, output_mem_config))); return logit_result; } diff --git a/ttnn/ttnn/device.py b/ttnn/ttnn/device.py index 6cbfaa85ead..c657ec2224e 100644 --- a/ttnn/ttnn/device.py +++ b/ttnn/ttnn/device.py @@ -21,9 +21,6 @@ def get_device_core_grid(device): DispatchCoreAxis = ttnn._ttnn.device.DispatchCoreAxis DispatchCoreConfig = ttnn._ttnn.device.DispatchCoreConfig Arch = ttnn._ttnn.device.Arch -EPS_GS = ttnn._ttnn.device.EPS_GS -EPS_WHB0 = ttnn._ttnn.device.EPS_WHB0 -EPS_BH = ttnn._ttnn.device.EPS_BH DEFAULT_L1_SMALL_SIZE = ttnn._ttnn.device.DEFAULT_L1_SMALL_SIZE DEFAULT_TRACE_REGION_SIZE = ttnn._ttnn.device.DEFAULT_TRACE_REGION_SIZE