diff --git a/tests/kokkos-based/hermitian_matrix_rank2_update_kokkos.cpp b/tests/kokkos-based/hermitian_matrix_rank2_update_kokkos.cpp new file mode 100644 index 00000000..e43da405 --- /dev/null +++ b/tests/kokkos-based/hermitian_matrix_rank2_update_kokkos.cpp @@ -0,0 +1,331 @@ + /* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include "gtest_fixtures.hpp" +#include "helpers.hpp" + +namespace{ + +//////////////////////////////////////////////////////////// +// Utilities +//////////////////////////////////////////////////////////// + +// create rank-1 mdspan (vector) +template ::mdspan_r1_t> +inline mdspan_t make_mdspan(value_type *data, std::size_t ext) { + return mdspan_t(data, ext); +} + +template ::mdspan_r1_t> +inline mdspan_t make_mdspan(const value_type *data, std::size_t ext) { + return mdspan_t(data, ext); +} + +template +inline auto make_mdspan(std::vector &v) { + return make_mdspan(v.data(), v.size()); +} + +template +inline auto make_mdspan(const std::vector &v) { + return make_mdspan(v.data(), v.size()); +} + +// create rank-2 mdspan (matrix) +template ::mdspan_r2_t> +inline mdspan_t make_mdspan(value_type *data, std::size_t ext0, std::size_t ext1) { + return mdspan_t(data, ext0, ext1); +} + +template +inline bool is_same_vector( + const mdspan, LayoutPolicy1, AccessorPolicy1> &v1, + const mdspan, LayoutPolicy2, AccessorPolicy2> &v2) +{ + const auto size = v1.extent(0); + if (size != v2.extent(0)) + return false; + const auto v1_view = KokkosKernelsSTD::Impl::mdspan_to_view(v1); + const auto v2_view = KokkosKernelsSTD::Impl::mdspan_to_view(v2); + int diff = false; + Kokkos::parallel_reduce(size, + KOKKOS_LAMBDA(const std::size_t i, decltype(diff) &d){ + d = d || !(v1_view(i) == v2_view(i)); + }, diff); + return !diff; +} + +template +inline bool is_same_vector( + const mdspan, LayoutPolicy, AccessorPolicy> &v1, + const std::vector &v2) +{ + return is_same_vector(v1, make_mdspan(v2)); +} + +template +inline bool is_same_vector( + const std::vector &v1, + const mdspan, LayoutPolicy, AccessorPolicy> &v2) +{ + return is_same_vector(v2, v1); +} + +template +inline bool is_same_vector( + const std::vector &v1, + const std::vector &v2) +{ + return is_same_vector(make_mdspan(v1), make_mdspan(v2)); +} + +// real diff: d = |v1 - v2| +template +class value_diff { +public: + value_diff(const T &val1, const T &val2): _v(fabs(val1 - val2)) {} + operator T() const { return _v; } +protected: + value_diff() = default; + T _v; +}; + +// real diff: d = max(|R(v1) - R(v2)|, |I(v1) - I(v2)|) +// Note: returned value is of underlying real type +template +class value_diff>: public value_diff { + using base = value_diff; +public: + value_diff(const std::complex &val1, const std::complex &val2) { + const T dreal = base(val1.real(), val2.real()); + const T dimag = base(val1.imag(), val2.imag()); + base::_v = std::max(dreal, dimag); + } +}; + +template +class value_diff>: public value_diff { + using base = value_diff; +public: + KOKKOS_INLINE_FUNCTION + value_diff(const Kokkos::complex &val1, const Kokkos::complex &val2) { + const T dreal = base(val1.real(), val2.real()); + const T dimag = base(val1.imag(), val2.imag()); + base::_v = dreal > dimag ? dreal : dimag; // can't use std::max on GPU + } +}; + +template +inline bool is_same_matrix( + const mdspan, LayoutPolicy1, AccessorPolicy1> &A, + const mdspan, LayoutPolicy2, AccessorPolicy2> &B, + ToleranceType tolerance) +{ + const auto ext0 = A.extent(0); + const auto ext1 = A.extent(1); + if (B.extent(0) != ext0 or B.extent(1) != ext1) + return false; + const auto A_view = KokkosKernelsSTD::Impl::mdspan_to_view(A); + const auto B_view = KokkosKernelsSTD::Impl::mdspan_to_view(B); + int diff = false; + Kokkos::parallel_reduce(ext0, + KOKKOS_LAMBDA(std::size_t i, decltype(diff) &d) { + for (decltype(i) j = 0; j < ext1; ++j) { + d = d || (value_diff(A_view(i, j), B_view(i, j)) > tolerance); + } + }, diff); + return !diff; +} + +namespace Impl { + +template struct _tolerance_out { using type = T; }; +template struct _tolerance_out> { using type = T; }; + +} + +template +Impl::_tolerance_out::type tolerance(double double_tol, float float_tol); + +template <> double tolerance(double double_tol, float float_tol) { return double_tol; } +template <> float tolerance( double double_tol, float float_tol) { return float_tol; } +template <> double tolerance>(double double_tol, float float_tol) { return double_tol; } +template <> float tolerance>( double double_tol, float float_tol) { return float_tol; } + +template +struct check_types: public std::true_type {}; + +// checks if std::complex and Kokkos::complex are aligned +// (they can get misalligned when Kokkos is build with Kokkos_ENABLE_COMPLEX_ALIGN=ON) +template +struct check_types> { + static constexpr bool value = alignof(std::complex) == alignof(Kokkos::complex); +}; + +template +inline constexpr auto check_types_v = check_types::value; + +template +void run_checked_tests(const std::string_view test_prefix, const std::string_view method_name, + const std::string_view test_postfix, const std::string_view type_spec, + const cb_type cb) { + if constexpr (check_types_v) { + cb(); + } else { + std::cout << "***\n" + << "*** Warning: " << test_prefix << method_name << test_postfix << " skipped for " + << type_spec << " (type check failed)\n" + << "***" << std::endl; + /* avoid dispatcher check failure if all cases are skipped this way */ + KokkosKernelsSTD::Impl::signal_kokkos_impl_called(method_name); + } +} + +#define FOR_ALL_BLAS2_TYPES(TEST_DEF) \ + TEST_DEF(double) \ + TEST_DEF(float) \ + TEST_DEF(complex_double) + + +//////////////////////////////////////////////////////////// +// Tests +//////////////////////////////////////////////////////////// + +template +void hermitian_matrix_rank_2_update_gold_solution(const x_t &x, const y_t &y, A_t &A, Triangle /* t */) +{ + using KokkosKernelsSTD::mtxr2update_impl::conj_if_needed; + using size_type = std::experimental::extents<>::size_type; + constexpr bool low = std::is_same_v; + for (size_type j = 0; j < A.extent(1); ++j) { + const size_type i1 = low ? A.extent(0) : j + 1; + for (size_type i = low ? j : 0; i < i1; ++i) { + A(i,j) += x(i) * conj_if_needed(y(j)) + + y(i) * conj_if_needed(x(j)); + } + } +} + +template +void test_kokkos_matrix_update(const x_t &x, A_t &A, gold_t get_gold, action_t action) +{ + using value_type = typename x_t::value_type; + + // backup x to verify it is not changed after kernel + auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); + + // compute gold + auto A_copy = kokkostesting::create_stdvector_and_copy_rowwise(A); + auto A_gold = make_mdspan(A_copy.data(), A.extent(0), A.extent(1)); + get_gold(A_gold); + + // run tested routine + action(); + + // compare results with gold + EXPECT_TRUE(is_same_matrix(A_gold, A, tolerance(1e-9, 1e-2f))); + + // x should not change after kernel + EXPECT_TRUE(is_same_vector(x, x_preKernel)); +} + +template +void test_kokkos_matrix_update(const x_t &x, const y_t &y, A_t &A, + gold_t get_gold, action_t action) +{ + // backup y to verify it is not changed after kernel + auto y_preKernel = kokkostesting::create_stdvector_and_copy(y); + test_kokkos_matrix_update(x, A, get_gold, action); + EXPECT_TRUE(is_same_vector(y, y_preKernel)); +} + +template +void test_kokkos_hermitian_matrix_rank2_update_impl(const x_t &x, const y_t &y, A_t &A, Triangle t) +{ + const auto get_gold = [&](auto A_gold) { + hermitian_matrix_rank_2_update_gold_solution(x, y, A_gold, t); + }; + const auto compute = [&]() { + std::experimental::linalg::hermitian_matrix_rank_2_update( + KokkosKernelsSTD::kokkos_exec<>(), x, y, A, t); + }; + test_kokkos_matrix_update(x, y, A, get_gold, compute); +} + +} // anonymous namespace + +#define DEFINE_TESTS(blas_val_type) \ +TEST_F(blas2_signed_##blas_val_type##_fixture, \ + kokkos_hermitian_matrix_rank2_update) { \ + using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ + run_checked_tests("kokkos_", "hermitian_matrix_rank2_update", "", \ + #blas_val_type, [&]() { \ + \ + test_kokkos_hermitian_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0, \ + std::experimental::linalg::lower_triangle); \ + test_kokkos_hermitian_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0, \ + std::experimental::linalg::upper_triangle); \ + \ + }); \ +} + +FOR_ALL_BLAS2_TYPES(DEFINE_TESTS); diff --git a/tests/kokkos-based/symmetric_matrix_rank2_update_kokkos.cpp b/tests/kokkos-based/symmetric_matrix_rank2_update_kokkos.cpp new file mode 100644 index 00000000..0b4d33cd --- /dev/null +++ b/tests/kokkos-based/symmetric_matrix_rank2_update_kokkos.cpp @@ -0,0 +1,329 @@ + /* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include "gtest_fixtures.hpp" +#include "helpers.hpp" + +namespace{ + +//////////////////////////////////////////////////////////// +// Utilities +//////////////////////////////////////////////////////////// + +// create rank-1 mdspan (vector) +template ::mdspan_r1_t> +inline mdspan_t make_mdspan(value_type *data, std::size_t ext) { + return mdspan_t(data, ext); +} + +template ::mdspan_r1_t> +inline mdspan_t make_mdspan(const value_type *data, std::size_t ext) { + return mdspan_t(data, ext); +} + +template +inline auto make_mdspan(std::vector &v) { + return make_mdspan(v.data(), v.size()); +} + +template +inline auto make_mdspan(const std::vector &v) { + return make_mdspan(v.data(), v.size()); +} + +// create rank-2 mdspan (matrix) +template ::mdspan_r2_t> +inline mdspan_t make_mdspan(value_type *data, std::size_t ext0, std::size_t ext1) { + return mdspan_t(data, ext0, ext1); +} + +template +inline bool is_same_vector( + const mdspan, LayoutPolicy1, AccessorPolicy1> &v1, + const mdspan, LayoutPolicy2, AccessorPolicy2> &v2) +{ + const auto size = v1.extent(0); + if (size != v2.extent(0)) + return false; + const auto v1_view = KokkosKernelsSTD::Impl::mdspan_to_view(v1); + const auto v2_view = KokkosKernelsSTD::Impl::mdspan_to_view(v2); + int diff = false; + Kokkos::parallel_reduce(size, + KOKKOS_LAMBDA(const std::size_t i, decltype(diff) &d){ + d = d || !(v1_view(i) == v2_view(i)); + }, diff); + return !diff; +} + +template +inline bool is_same_vector( + const mdspan, LayoutPolicy, AccessorPolicy> &v1, + const std::vector &v2) +{ + return is_same_vector(v1, make_mdspan(v2)); +} + +template +inline bool is_same_vector( + const std::vector &v1, + const mdspan, LayoutPolicy, AccessorPolicy> &v2) +{ + return is_same_vector(v2, v1); +} + +template +inline bool is_same_vector( + const std::vector &v1, + const std::vector &v2) +{ + return is_same_vector(make_mdspan(v1), make_mdspan(v2)); +} + +// real diff: d = |v1 - v2| +template +class value_diff { +public: + value_diff(const T &val1, const T &val2): _v(fabs(val1 - val2)) {} + operator T() const { return _v; } +protected: + value_diff() = default; + T _v; +}; + +// real diff: d = max(|R(v1) - R(v2)|, |I(v1) - I(v2)|) +// Note: returned value is of underlying real type +template +class value_diff>: public value_diff { + using base = value_diff; +public: + value_diff(const std::complex &val1, const std::complex &val2) { + const T dreal = base(val1.real(), val2.real()); + const T dimag = base(val1.imag(), val2.imag()); + base::_v = std::max(dreal, dimag); + } +}; + +template +class value_diff>: public value_diff { + using base = value_diff; +public: + KOKKOS_INLINE_FUNCTION + value_diff(const Kokkos::complex &val1, const Kokkos::complex &val2) { + const T dreal = base(val1.real(), val2.real()); + const T dimag = base(val1.imag(), val2.imag()); + base::_v = dreal > dimag ? dreal : dimag; // can't use std::max on GPU + } +}; + +template +inline bool is_same_matrix( + const mdspan, LayoutPolicy1, AccessorPolicy1> &A, + const mdspan, LayoutPolicy2, AccessorPolicy2> &B, + ToleranceType tolerance) +{ + const auto ext0 = A.extent(0); + const auto ext1 = A.extent(1); + if (B.extent(0) != ext0 or B.extent(1) != ext1) + return false; + const auto A_view = KokkosKernelsSTD::Impl::mdspan_to_view(A); + const auto B_view = KokkosKernelsSTD::Impl::mdspan_to_view(B); + int diff = false; + Kokkos::parallel_reduce(ext0, + KOKKOS_LAMBDA(std::size_t i, decltype(diff) &d) { + for (decltype(i) j = 0; j < ext1; ++j) { + d = d || (value_diff(A_view(i, j), B_view(i, j)) > tolerance); + } + }, diff); + return !diff; +} + +namespace Impl { + +template struct _tolerance_out { using type = T; }; +template struct _tolerance_out> { using type = T; }; + +} + +template +Impl::_tolerance_out::type tolerance(double double_tol, float float_tol); + +template <> double tolerance(double double_tol, float float_tol) { return double_tol; } +template <> float tolerance( double double_tol, float float_tol) { return float_tol; } +template <> double tolerance>(double double_tol, float float_tol) { return double_tol; } +template <> float tolerance>( double double_tol, float float_tol) { return float_tol; } + +template +struct check_types: public std::true_type {}; + +// checks if std::complex and Kokkos::complex are aligned +// (they can get misalligned when Kokkos is build with Kokkos_ENABLE_COMPLEX_ALIGN=ON) +template +struct check_types> { + static constexpr bool value = alignof(std::complex) == alignof(Kokkos::complex); +}; + +template +inline constexpr auto check_types_v = check_types::value; + +template +void run_checked_tests(const std::string_view test_prefix, const std::string_view method_name, + const std::string_view test_postfix, const std::string_view type_spec, + const cb_type cb) { + if constexpr (check_types_v) { + cb(); + } else { + std::cout << "***\n" + << "*** Warning: " << test_prefix << method_name << test_postfix << " skipped for " + << type_spec << " (type check failed)\n" + << "***" << std::endl; + /* avoid dispatcher check failure if all cases are skipped this way */ + KokkosKernelsSTD::Impl::signal_kokkos_impl_called(method_name); + } +} + +#define FOR_ALL_BLAS2_TYPES(TEST_DEF) \ + TEST_DEF(double) \ + TEST_DEF(float) \ + TEST_DEF(complex_double) + + +//////////////////////////////////////////////////////////// +// Tests +//////////////////////////////////////////////////////////// + +template +void symmetric_matrix_rank_2_update_gold_solution(const x_t &x, const y_t &y, A_t &A, Triangle /* t */) +{ + using size_type = std::experimental::extents<>::size_type; + constexpr bool low = std::is_same_v; + for (size_type j = 0; j < A.extent(1); ++j) { + const size_type i1 = low ? A.extent(0) : j + 1; + for (size_type i = low ? j : 0; i < i1; ++i) { + A(i,j) += x(i) * y(j) + y(i) * x(j); + } + } +} + +template +void test_kokkos_matrix_update(const x_t &x, A_t &A, gold_t get_gold, action_t action) +{ + using value_type = typename x_t::value_type; + + // backup x to verify it is not changed after kernel + auto x_preKernel = kokkostesting::create_stdvector_and_copy(x); + + // compute gold + auto A_copy = kokkostesting::create_stdvector_and_copy_rowwise(A); + auto A_gold = make_mdspan(A_copy.data(), A.extent(0), A.extent(1)); + get_gold(A_gold); + + // run tested routine + action(); + + // compare results with gold + EXPECT_TRUE(is_same_matrix(A_gold, A, tolerance(1e-9, 1e-2f))); + + // x should not change after kernel + EXPECT_TRUE(is_same_vector(x, x_preKernel)); +} + +template +void test_kokkos_matrix_update(const x_t &x, const y_t &y, A_t &A, + gold_t get_gold, action_t action) +{ + // backup y to verify it is not changed after kernel + auto y_preKernel = kokkostesting::create_stdvector_and_copy(y); + test_kokkos_matrix_update(x, A, get_gold, action); + EXPECT_TRUE(is_same_vector(y, y_preKernel)); +} + +template +void test_kokkos_symmetric_matrix_rank2_update_impl(const x_t &x, const y_t &y, A_t &A, Triangle t) +{ + const auto get_gold = [&](auto A_gold) { + symmetric_matrix_rank_2_update_gold_solution(x, y, A_gold, t); + }; + const auto compute = [&]() { + std::experimental::linalg::symmetric_matrix_rank_2_update( + KokkosKernelsSTD::kokkos_exec<>(), x, y, A, t); + }; + test_kokkos_matrix_update(x, y, A, get_gold, compute); +} + +} // anonymous namespace + +#define DEFINE_TESTS(blas_val_type) \ +TEST_F(blas2_signed_##blas_val_type##_fixture, \ + kokkos_symmetric_matrix_rank2_update) { \ + using val_t = typename blas2_signed_##blas_val_type##_fixture::value_type; \ + run_checked_tests("kokkos_", "symmetric_matrix_rank2_update", "", \ + #blas_val_type, [&]() { \ + \ + test_kokkos_symmetric_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0, \ + std::experimental::linalg::lower_triangle); \ + test_kokkos_symmetric_matrix_rank2_update_impl(x_e0, y_e0, A_sym_e0, \ + std::experimental::linalg::upper_triangle); \ + \ + }); \ +} + +FOR_ALL_BLAS2_TYPES(DEFINE_TESTS); diff --git a/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas2_matrix_rank_2_update.hpp b/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas2_matrix_rank_2_update.hpp new file mode 100644 index 00000000..cc3cf8ab --- /dev/null +++ b/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas2_matrix_rank_2_update.hpp @@ -0,0 +1,305 @@ + /* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_BLAS2_MATRIX_RANK_2_UPDATE_HPP_ +#define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_BLAS2_MATRIX_RANK_2_UPDATE_HPP_ + +#include +#include "signal_kokkos_impl_called.hpp" + +namespace KokkosKernelsSTD { + +namespace mtxr2update_impl { + +// manages parallel execution of independent action +// called like action(i, j) for each matrix element A(i, j) +template +class ParallelMatrixVisitor { +public: + KOKKOS_INLINE_FUNCTION ParallelMatrixVisitor(ExecSpace &&exec_in, MatrixType A_in): + exec(exec_in), A(A_in), ext0(A.extent(0)), ext1(A.extent(1)) + {} + + template + KOKKOS_INLINE_FUNCTION + void for_each_matrix_element(ActionType action) { + if (ext0 > ext1) { // parallel rows + Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, ext0), + KOKKOS_LAMBDA(const auto i) { + using idx_type = std::remove_const_t; + for (idx_type j = 0; j < ext1; ++j) { + action(i, j); + } + }); + } else { // parallel columns + Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, ext1), + KOKKOS_LAMBDA(const auto j) { + using idx_type = std::remove_const_t; + for (idx_type i = 0; i < ext0; ++i) { + action(i, j); + } + }); + } + exec.fence(); + } + + template + void for_each_triangle_matrix_element(std::experimental::linalg::upper_triangle_t t, ActionType action) { + Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, ext1), + KOKKOS_LAMBDA(const auto j) { + using idx_type = std::remove_const_t; + for (idx_type i = 0; i <= j; ++i) { + action(i, j); + } + }); + exec.fence(); + } + + template + void for_each_triangle_matrix_element(std::experimental::linalg::lower_triangle_t t, ActionType action) { + for_each_triangle_matrix_element(std::experimental::linalg::upper_triangle, + [action](const auto i, const auto j) { + action(j, i); + }); + } + +private: + ExecSpace exec; + MatrixType A; + size_t ext0; + size_t ext1; +}; + +// Note: phrase it simply and the same as in specification ("has unique layout") +template ::size_type numRows, + std::experimental::extents<>::size_type numCols> + +inline constexpr bool is_unique_layout_v = Layout::template mapping< + std::experimental::extents >::is_always_unique(); + +template +struct is_layout_blas_packed: public std::false_type {}; + +template +struct is_layout_blas_packed< + std::experimental::linalg::layout_blas_packed>: + public std::true_type {}; + +template +inline constexpr bool is_layout_blas_packed_v = is_layout_blas_packed::value; + +// Note: will only signal failure for layout_blas_packed with different triangle +template +struct triangle_layout_match: public std::true_type {}; + +template +struct triangle_layout_match< + std::experimental::linalg::layout_blas_packed, + Triangle2> +{ + static constexpr bool value = std::is_same_v; +}; + +template +inline constexpr bool triangle_layout_match_v = triangle_layout_match::value; + +// This version of conj_if_needed() also handles Kokkos::complex +template +KOKKOS_INLINE_FUNCTION +T conj_if_needed(const T &value) +{ + return value; +} + +template +KOKKOS_INLINE_FUNCTION +auto conj_if_needed(const Kokkos::complex &value) +{ + return Kokkos::conj(value); +} + +template +KOKKOS_INLINE_FUNCTION +auto conj_if_needed(const std::complex &value) +{ + return std::conj(value); +} + +template +KOKKOS_INLINE_FUNCTION +constexpr bool static_extent_match(size_type extent1, size_type extent2) +{ + return extent1 == std::experimental::dynamic_extent || + extent2 == std::experimental::dynamic_extent || + extent1 == extent2; +} + +} // namespace mtxr2update_impl + +// Rank-2 update of a symmetric matrix +// performs BLAS xSYR2/xSPR2: A[i,j] += x[i] * y[j] + y[i] * x[j] + +template::size_type ext_x, + class Layout_x, + class ElementType_y, + std::experimental::extents<>::size_type ext_y, + class Layout_y, + class ElementType_A, + std::experimental::extents<>::size_type numRows_A, + std::experimental::extents<>::size_type numCols_A, + class Layout_A, + class Triangle> + requires (mtxr2update_impl::is_unique_layout_v + or mtxr2update_impl::is_layout_blas_packed_v) +void symmetric_matrix_rank_2_update(kokkos_exec &&exec, + std::experimental::mdspan, Layout_x, + std::experimental::default_accessor> x, + std::experimental::mdspan, Layout_y, + std::experimental::default_accessor> y, + std::experimental::mdspan, Layout_A, + std::experimental::default_accessor> A, + Triangle t) +{ + // P1673 constraints (redundant to mdspan extents in the header) + static_assert(A.rank() == 2); + static_assert(x.rank() == 1); + static_assert(y.rank() == 1); + static_assert(mtxr2update_impl::triangle_layout_match_v); + + // P1673 mandates + static_assert(mtxr2update_impl::static_extent_match(A.static_extent(0), A.static_extent(1))); + static_assert(mtxr2update_impl::static_extent_match(A.static_extent(0), x.static_extent(0))); + static_assert(mtxr2update_impl::static_extent_match(A.static_extent(0), y.static_extent(0))); + + // P1673 preconditions + if ( A.extent(0) != A.extent(1) ){ + throw std::runtime_error("KokkosBlas: symmetric_matrix_rank_1_update: A.extent(0) != A.extent(1)"); + } + if ( A.extent(0) != x.extent(0) ){ + throw std::runtime_error("KokkosBlas: symmetric_matrix_rank_1_update: A.extent(0) != x.extent(0)"); + } + if ( A.extent(0) != y.extent(0) ){ + throw std::runtime_error("KokkosBlas: symmetric_matrix_rank_1_update: A.extent(0) != y.extent(0)"); + } + + Impl::signal_kokkos_impl_called("symmetric_matrix_rank2_update"); + + // convert mdspans to views + const auto x_view = Impl::mdspan_to_view(x); + const auto y_view = Impl::mdspan_to_view(y); + auto A_view = Impl::mdspan_to_view(A); + mtxr2update_impl::ParallelMatrixVisitor v(ExecSpace(), A_view); + v.for_each_triangle_matrix_element(t, + KOKKOS_LAMBDA(const auto i, const auto j) { + A_view(i, j) += x_view(i) * y_view(j) + y_view(i) * x_view(j); + }); +} + +// Rank-2 update of a Hermitian matrix +// performs BLAS xHER2/xHPR2: x[i] * conj(y[j]) + y[i] * conj(x[j]) + +template::size_type ext_x, + class Layout_x, + class ElementType_y, + std::experimental::extents<>::size_type ext_y, + class Layout_y, + class ElementType_A, + std::experimental::extents<>::size_type numRows_A, + std::experimental::extents<>::size_type numCols_A, + class Layout_A, + class Triangle> + requires (mtxr2update_impl::is_unique_layout_v + or mtxr2update_impl::is_layout_blas_packed_v) +void hermitian_matrix_rank_2_update(kokkos_exec &&exec, + std::experimental::mdspan, Layout_x, + std::experimental::default_accessor> x, + std::experimental::mdspan, Layout_y, + std::experimental::default_accessor> y, + std::experimental::mdspan, Layout_A, + std::experimental::default_accessor> A, + Triangle t) +{ + // P1673 constraints (redundant to mdspan extents in the header) + static_assert(A.rank() == 2); + static_assert(x.rank() == 1); + static_assert(y.rank() == 1); + static_assert(mtxr2update_impl::triangle_layout_match_v); + + // P1673 mandates + static_assert(mtxr2update_impl::static_extent_match(A.static_extent(0), A.static_extent(1))); + static_assert(mtxr2update_impl::static_extent_match(A.static_extent(0), x.static_extent(0))); + static_assert(mtxr2update_impl::static_extent_match(A.static_extent(0), y.static_extent(0))); + + // P1673 preconditions + if ( A.extent(0) != A.extent(1) ){ + throw std::runtime_error("KokkosBlas: hermitian_matrix_rank_1_update: A.extent(0) != A.extent(1)"); + } + if ( A.extent(0) != x.extent(0) ){ + throw std::runtime_error("KokkosBlas: hermitian_matrix_rank_1_update: A.extent(0) != x.extent(0)"); + } + if ( A.extent(0) != y.extent(0) ){ + throw std::runtime_error("KokkosBlas: hermitian_matrix_rank_1_update: A.extent(0) != y.extent(0)"); + } + + Impl::signal_kokkos_impl_called("hermitian_matrix_rank2_update"); + + // convert mdspans to views + const auto x_view = Impl::mdspan_to_view(x); + const auto y_view = Impl::mdspan_to_view(y); + auto A_view = Impl::mdspan_to_view(A); + mtxr2update_impl::ParallelMatrixVisitor v(ExecSpace(), A_view); + v.for_each_triangle_matrix_element(t, + KOKKOS_LAMBDA(const auto i, const auto j) { + const auto yjc = mtxr2update_impl::conj_if_needed(y_view(j)); + const auto xjc = mtxr2update_impl::conj_if_needed(x_view(j)); + A_view(i, j) += x_view(i) * yjc + y_view(i) * xjc; + }); +} + +} // namespace KokkosKernelsSTD +#endif diff --git a/tpl-implementations/include/experimental/linalg_kokkoskernels b/tpl-implementations/include/experimental/linalg_kokkoskernels index 4d6136bf..9ff9315d 100644 --- a/tpl-implementations/include/experimental/linalg_kokkoskernels +++ b/tpl-implementations/include/experimental/linalg_kokkoskernels @@ -10,6 +10,7 @@ #include "__p1673_bits/kokkos-kernels/blas1_vector_abs_sum_kk.hpp" #include "__p1673_bits/kokkos-kernels/blas1_vector_sum_of_squares_kk.hpp" #include "__p1673_bits/kokkos-kernels/blas2_matrix_rank_1_update.hpp" +#include "__p1673_bits/kokkos-kernels/blas2_matrix_rank_2_update.hpp" #include "__p1673_bits/kokkos-kernels/blas1_swap_elements_kk.hpp" #include "__p1673_bits/kokkos-kernels/blas2_gemv_kk.hpp" #include "__p1673_bits/kokkos-kernels/blas2_symv_kk.hpp" \ No newline at end of file