diff --git a/tests/ttnn/unit_tests/operations/ccl/test_reduce_scatter_async.py b/tests/ttnn/unit_tests/operations/ccl/test_reduce_scatter_async.py index ce8e060cd7f..af848c87710 100644 --- a/tests/ttnn/unit_tests/operations/ccl/test_reduce_scatter_async.py +++ b/tests/ttnn/unit_tests/operations/ccl/test_reduce_scatter_async.py @@ -185,7 +185,7 @@ def run_reduce_scatter_test( else: logger.info(f"Running {num_iters} iterations of reduce scatter") for i in range(num_iters): - output_tensor_mesh = ttnn.reduce_scatter_async( + output_tensor_mesh = ttnn.experimental.reduce_scatter_async( input_tensor_mesh, dim=dim, math_op=math_op, @@ -330,9 +330,6 @@ def test_line_reduce_scatter_async_post_commit( ) -@pytest.mark.skip( - "persistent fabric test with cluster-axis API and multiple concurrent reduce_scatter instances not enabled yet" -) @skip_for_grayskull("Requires eth connected devices to run") @pytest.mark.parametrize( "num_devices, num_links, per_chip_output_shape, dim, layout", @@ -399,9 +396,6 @@ def test_line_reduce_scatter_async_on_T3K_cols_post_commit( ) -@pytest.mark.skip( - "persistent fabric test with cluster-axis API and multiple concurrent reduce_scatter instances not enabled yet" -) @skip_for_grayskull("Requires eth connected devices to run") @pytest.mark.parametrize( "num_devices, num_links, per_chip_output_shape, dim, layout", diff --git a/ttnn/cpp/ttnn/operations/experimental/ccl/reduce_scatter_async/reduce_scatter.hpp b/ttnn/cpp/ttnn/operations/experimental/ccl/reduce_scatter_async/reduce_scatter.hpp index 5a46906c747..b2288c93649 100644 --- a/ttnn/cpp/ttnn/operations/experimental/ccl/reduce_scatter_async/reduce_scatter.hpp +++ b/ttnn/cpp/ttnn/operations/experimental/ccl/reduce_scatter_async/reduce_scatter.hpp @@ -44,7 +44,11 @@ struct ExecuteReduceScatter { } // namespace experimental } // namespace operations -constexpr auto reduce_scatter_async = - ttnn::register_operation<"ttnn::reduce_scatter_async", ttnn::operations::experimental::ccl::ExecuteReduceScatter>(); +namespace experimental { + +constexpr auto reduce_scatter_async = ttnn::register_operation< + "ttnn::experimental::reduce_scatter_async", + ttnn::operations::experimental::ccl::ExecuteReduceScatter>(); +} // namespace experimental } // namespace ttnn diff --git a/ttnn/cpp/ttnn/operations/experimental/ccl/reduce_scatter_async/reduce_scatter_pybind.cpp b/ttnn/cpp/ttnn/operations/experimental/ccl/reduce_scatter_async/reduce_scatter_pybind.cpp index 8b379a0236c..867ebb9ef6c 100644 --- a/ttnn/cpp/ttnn/operations/experimental/ccl/reduce_scatter_async/reduce_scatter_pybind.cpp +++ b/ttnn/cpp/ttnn/operations/experimental/ccl/reduce_scatter_async/reduce_scatter_pybind.cpp @@ -95,7 +95,7 @@ void bind_reduce_scatter(pybind11::module& module, const ccl_operation_t& operat void py_bind_reduce_scatter_async(pybind11::module& module) { detail::bind_reduce_scatter( module, - ttnn::reduce_scatter_async, + ttnn::experimental::reduce_scatter_async, R"doc( Performs an reduce_scatter operation on multi-device :attr:`input_tensor` across all devices. This operation requires a persistent