diff --git a/CMakeLists.txt b/CMakeLists.txt index e85ed1f485..2cd393a6ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ endif() # Set version number set(RAJA_VERSION_MAJOR 0) set(RAJA_VERSION_MINOR 12) -set(RAJA_VERSION_PATCHLEVEL 0) +set(RAJA_VERSION_PATCHLEVEL 1) if (RAJA_LOADED AND (NOT RAJA_LOADED STREQUAL "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}")) message(FATAL_ERROR "You are mixing RAJA versions. Loaded is ${RAJA_LOADED}, expected ${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}") diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 4ffff68c02..38e3b5fb4a 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -10,6 +10,14 @@ Version vxx.yy.zz -- Release date 20yy-mm-dd ============================================ +Version v0.12.1 -- Release date 2020-09-09 +============================================ + +This release contains fixes for errors when using a CUDA build with a +non-CUDA compiler and compiler warnings, plus some other bug fixes related +to OpenMP target compilation. + + Version v0.12.0 -- Release date 2020-09-03 ============================================ diff --git a/docs/sphinx/user_guide/conf.py b/docs/sphinx/user_guide/conf.py index 25b3bb2d91..dde33c05d7 100644 --- a/docs/sphinx/user_guide/conf.py +++ b/docs/sphinx/user_guide/conf.py @@ -68,7 +68,7 @@ # The short X.Y version. version = u'0.12' # The full version, including alpha/beta/rc tags. -release = u'0.12.0' +release = u'0.12.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b488e88050..72a12daef2 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -102,13 +102,13 @@ raja_add_executable( SOURCES multiview.cpp) if(ENABLE_TARGET_OPENMP) - raja_add_executable( - NAME target-kernel - SOURCES omp-target-kernel.cpp) - - raja_add_executable( - NAME omp-target-ltimes - SOURCES omp-target-ltimes.cpp) + # raja_add_executable( + # NAME target-kernel + # SOURCES omp-target-kernel.cpp) + # + # raja_add_executable( + # NAME omp-target-ltimes + # SOURCES omp-target-ltimes.cpp) endif() raja_add_executable( diff --git a/examples/raja-teams.cpp b/examples/raja-teams.cpp index 870b176bed..df5963d267 100644 --- a/examples/raja-teams.cpp +++ b/examples/raja-teams.cpp @@ -102,7 +102,7 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) std::cout << "\n Running RAJA-Teams examples...\n"; int num_of_backends = 1; -#if defined(RAJA_ENABLE_DEVICE) +#if defined(RAJA_DEVICE_ACTIVE) num_of_backends++; #endif @@ -119,13 +119,15 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) // Allocate memory for either host or device int N_tri = 5; - int *Ddat; - if (select_cpu_or_gpu == RAJA::expt::HOST) + int* Ddat = nullptr; + if (select_cpu_or_gpu == RAJA::expt::HOST) { Ddat = host_res.allocate(N_tri * N_tri); + } -#if defined(RAJA_ENABLE_DEVICE) - if (select_cpu_or_gpu == RAJA::expt::DEVICE) +#if defined(RAJA_DEVICE_ACTIVE) + if (select_cpu_or_gpu == RAJA::expt::DEVICE) { Ddat = device_res.allocate(N_tri * N_tri); + } #endif /* @@ -143,9 +145,9 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) */ if (select_cpu_or_gpu == RAJA::expt::HOST){ - std::cout << "\n Running Upper triangular pattern example on the host...\n"; + std::cout << "\n Running upper triangular pattern example on the host...\n"; }else { - std::cout << "\n Running Upper triangular pattern example on the device...\n"; + std::cout << "\n Running upper triangular pattern example on the device...\n"; } @@ -157,21 +159,20 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) RAJA::expt::loop(ctx, RAJA::RangeSegment(0, N_tri), [&](int r) { - // Array shared within threads of the same team - TEAM_SHARED int s_A[1]; - - RAJA::expt::loop(ctx, RAJA::RangeSegment(r, N_tri), [&](int c) { - if (c == r) s_A[0] = r; - D(r, c) = r * N_tri + c; - }); // loop j + // Array shared within threads of the same team + RAJA_TEAM_SHARED int s_A[1]; - ctx.teamSync(); + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), [&](int c) { + s_A[c] = r; + }); // loop c - RAJA::expt::loop(ctx, RAJA::RangeSegment(r, N_tri), [&](int c) { + ctx.teamSync(); - printf("r=%d, c=%d : D=%d : s_A = %d \n", r, c, D(r, c), s_A[0]); + RAJA::expt::loop(ctx, RAJA::RangeSegment(r, N_tri), [&](int c) { + D(r, c) = r * N_tri + c; + printf("r=%d, c=%d : D=%d : s_A = %d \n", r, c, D(r, c), s_A[0]); + }); // loop c - }); // loop c }); // loop r }); // outer lambda @@ -179,7 +180,7 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) host_res.deallocate(Ddat); } -#if defined(RAJA_ENABLE_DEVICE) +#if defined(RAJA_DEVICE_ACTIVE) if (select_cpu_or_gpu == RAJA::expt::DEVICE) { device_res.deallocate(Ddat); } diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 91dc8c56a8..ac5648f425 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -155,12 +155,14 @@ namespace RAJA { #endif // _OPENMP #endif // RAJA_ENABLE_OPENMP -#if defined(RAJA_ENABLE_CUDA) -#if not defined(__CUDACC__) -#error RAJA configured with ENABLE_CUDA, but CUDA not supported by current compiler -#endif // -#endif // RAJA_ENABLE_CUDA +#if defined(RAJA_ENABLE_CUDA) && defined(__CUDACC__) +#define RAJA_CUDA_ACTIVE +#endif // RAJA_ENABLE_CUDA && __CUDACC__ +#if defined(RAJA_CUDA_ACTIVE) || \ + defined(RAJA_ENABLE_HIP) +#define RAJA_DEVICE_ACTIVE +#endif /*! ****************************************************************************** diff --git a/include/RAJA/index/ListSegment.hpp b/include/RAJA/index/ListSegment.hpp index 0f5ad36e2e..cca8f2eda2 100644 --- a/include/RAJA/index/ListSegment.hpp +++ b/include/RAJA/index/ListSegment.hpp @@ -31,7 +31,7 @@ #include "RAJA/util/Span.hpp" #include "RAJA/util/types.hpp" -#if (defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA) +#if defined(RAJA_CUDA_ACTIVE) #include "RAJA/policy/cuda/raja_cudaerrchk.hpp" #else #define cudaErrchk(...) @@ -70,7 +70,7 @@ class TypedListSegment * won't see any different usage or behavior. */ -#if ((defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA)) || defined(RAJA_ENABLE_HIP) +#if defined(RAJA_DEVICE_ACTIVE) static constexpr bool Has_GPU = true; #else static constexpr bool Has_GPU = false; @@ -117,7 +117,7 @@ class TypedListSegment //! specialization for allocation of CPU_memory void allocate(CPU_memory) { m_data = new T[m_size]; } -#if (defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA) +#if defined(RAJA_CUDA_ACTIVE) //! copy data from container using BlockCopy template void copy(Container&& src, BlockCopy) diff --git a/include/RAJA/pattern/teams.hpp b/include/RAJA/pattern/teams.hpp index 9c28c74389..c8d36732c1 100644 --- a/include/RAJA/pattern/teams.hpp +++ b/include/RAJA/pattern/teams.hpp @@ -25,7 +25,7 @@ // #include "RAJA/pattern/teams/teams_sequential.hpp" -#if (defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA) +#if defined(RAJA_CUDA_ACTIVE) #include "RAJA/pattern/teams/teams_cuda.hpp" #endif diff --git a/include/RAJA/pattern/teams/teams_core.hpp b/include/RAJA/pattern/teams/teams_core.hpp index d40418e228..c313dbeeb7 100644 --- a/include/RAJA/pattern/teams/teams_core.hpp +++ b/include/RAJA/pattern/teams/teams_core.hpp @@ -28,16 +28,10 @@ #include "camp/concepts.hpp" #include "camp/tuple.hpp" -#if ((defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && \ - defined(RAJA_ENABLE_CUDA)) || \ - defined(RAJA_ENABLE_HIP) -#define RAJA_ENABLE_DEVICE -#endif - #if defined(RAJA_DEVICE_CODE) -#define TEAM_SHARED __shared__ +#define RAJA_TEAM_SHARED __shared__ #else -#define TEAM_SHARED +#define RAJA_TEAM_SHARED #endif namespace RAJA @@ -54,27 +48,27 @@ struct null_launch_t { // Support for host, and device template struct LoopPolicy { using host_policy_t = HOST_POLICY; -#if defined(RAJA_ENABLE_DEVICE) +#if defined(RAJA_DEVICE_ACTIVE) using device_policy_t = DEVICE_POLICY; #endif }; template struct LaunchPolicy { using host_policy_t = HOST_POLICY; -#if defined(RAJA_ENABLE_DEVICE) +#if defined(RAJA_DEVICE_ACTIVE) using device_policy_t = DEVICE_POLICY; #endif }; @@ -193,7 +187,7 @@ void launch(ExecPlace place, Resources const &team_resources, BODY const &body) launch_t::exec(LaunchContext(team_resources, HOST), body); break; } -#ifdef RAJA_ENABLE_DEVICE +#ifdef RAJA_DEVICE_ACTIVE case DEVICE: { using launch_t = LaunchExecute; launch_t::exec(LaunchContext(team_resources, DEVICE), body); diff --git a/include/RAJA/pattern/teams/teams_sequential.hpp b/include/RAJA/pattern/teams/teams_sequential.hpp index d2cfa267d9..7b2cf46b86 100644 --- a/include/RAJA/pattern/teams/teams_sequential.hpp +++ b/include/RAJA/pattern/teams/teams_sequential.hpp @@ -35,7 +35,8 @@ struct seq_launch_t { template <> struct LaunchExecute { template - static void exec(LaunchContext const &ctx, BODY const &body) + static void exec(LaunchContext const& RAJA_UNUSED_ARG(ctx), + BODY const& RAJA_UNUSED_ARG(body)) { RAJA_ABORT_OR_THROW("NULL Launch"); } diff --git a/include/RAJA/policy/cuda.hpp b/include/RAJA/policy/cuda.hpp index 370b1462e6..c99001367f 100644 --- a/include/RAJA/policy/cuda.hpp +++ b/include/RAJA/policy/cuda.hpp @@ -22,7 +22,7 @@ #include "RAJA/config.hpp" -#if (defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA) +#if defined(RAJA_CUDA_ACTIVE) #include #include diff --git a/include/RAJA/policy/cuda/policy.hpp b/include/RAJA/policy/cuda/policy.hpp index 3433fc3fb6..45ac5def7e 100644 --- a/include/RAJA/policy/cuda/policy.hpp +++ b/include/RAJA/policy/cuda/policy.hpp @@ -20,7 +20,7 @@ #include "RAJA/config.hpp" -#if defined(RAJA_ENABLE_CUDA) +#if defined(RAJA_CUDA_ACTIVE) #include diff --git a/include/RAJA/policy/openmp/policy.hpp b/include/RAJA/policy/openmp/policy.hpp index d1e7cda363..754f2e437c 100644 --- a/include/RAJA/policy/openmp/policy.hpp +++ b/include/RAJA/policy/openmp/policy.hpp @@ -66,9 +66,6 @@ namespace internal struct Parallel { }; -struct Collapse { -}; - struct For { }; diff --git a/include/RAJA/policy/openmp_target/forall.hpp b/include/RAJA/policy/openmp_target/forall.hpp index 1c88cb164f..76f1b772a6 100644 --- a/include/RAJA/policy/openmp_target/forall.hpp +++ b/include/RAJA/policy/openmp_target/forall.hpp @@ -68,11 +68,11 @@ RAJA_INLINE resources::EventProxy forall_impl(resources::Omp &om ib(begin_it[i]); } - return resources::EventProxy(&res); + return resources::EventProxy(&omp_res); } template -RAJA_INLINE resources::EventProxy forall_impl(resources::Resource &omp_res, +RAJA_INLINE resources::EventProxy forall_impl(resources::Omp &omp_res, const omp_target_parallel_for_exec_nt&, Iterable&& iter, Func&& loop_body) @@ -89,7 +89,7 @@ RAJA_INLINE resources::EventProxy forall_impl(resources::Resourc ib(begin_it[i]); } - return RAJA::resources::EventProxy(&res); + return resources::EventProxy(&omp_res); } } // namespace omp diff --git a/include/RAJA/policy/openmp_target/kernel/For.hpp b/include/RAJA/policy/openmp_target/kernel/For.hpp index 4cfde34338..0d376390a6 100644 --- a/include/RAJA/policy/openmp_target/kernel/For.hpp +++ b/include/RAJA/policy/openmp_target/kernel/For.hpp @@ -53,7 +53,8 @@ struct StatementExecutor(data); using len_t = decltype(len); - forall_impl(omp_target_parallel_for_exec{}, TypedRangeSegment(0, len), for_wrapper); + auto r = resources::Omp::get_default(); + forall_impl(r, omp_target_parallel_for_exec{}, TypedRangeSegment(0, len), for_wrapper); } }; diff --git a/include/RAJA/policy/openmp_target/policy.hpp b/include/RAJA/policy/openmp_target/policy.hpp index 129a7ffbac..1246d2f745 100644 --- a/include/RAJA/policy/openmp_target/policy.hpp +++ b/include/RAJA/policy/openmp_target/policy.hpp @@ -30,6 +30,9 @@ struct Target { struct Distribute { }; +struct Collapse { +}; + template struct omp_target_parallel_for_exec : make_policy_pattern_t struct get_resource{ - using type = Host; + using type = camp::resources::Host; }; template @@ -42,40 +45,66 @@ namespace RAJA return get_resource::type::get_default(); } -#if defined(RAJA_ENABLE_CUDA) +#if defined(RAJA_CUDA_ACTIVE) template struct get_resource>{ - using type = Cuda; + using type = camp::resources::Cuda; }; template struct get_resource>>{ - using type = Cuda; + using type = camp::resources::Cuda; }; #endif #if defined(RAJA_ENABLE_HIP) template struct get_resource>{ - using type = Hip; + using type = camp::resources::Hip; }; template struct get_resource>>{ - using type = Hip; + using type = camp::resources::Hip; + }; +#endif + +#if defined(RAJA_ENABLE_TARGET_OPENMP) + template<> + struct get_resource{ + using type = camp::resources::Omp; + }; + + template + struct get_resource>{ + using type = camp::resources::Omp; + }; + + template + struct get_resource>{ + using type = camp::resources::Omp; + }; + + template + struct get_resource>>{ + using type = camp::resources::Omp; }; #endif + } // end namespace resources namespace type_traits { template struct is_resource : std::false_type {}; template <> struct is_resource : std::true_type {}; -#if defined(RAJA_ENABLE_CUDA) +#if defined(RAJA_CUDA_ACTIVE) template <> struct is_resource : std::true_type {}; #endif #if defined(RAJA_ENABLE_HIP) template <> struct is_resource : std::true_type {}; +#endif +#if defined(RAJA_ENABLE_TARGET_OPENMP) + template <> struct is_resource : std::true_type {}; #endif } // end namespace type_traits diff --git a/scripts/lc-builds/blueos_clang-ibm-2019.10.03_omptarget.sh b/scripts/lc-builds/blueos_clang-ibm-2019.10.03_omptarget.sh index aeb1cb842d..4596023228 100755 --- a/scripts/lc-builds/blueos_clang-ibm-2019.10.03_omptarget.sh +++ b/scripts/lc-builds/blueos_clang-ibm-2019.10.03_omptarget.sh @@ -25,7 +25,7 @@ cmake \ -DENABLE_TARGET_OPENMP=On \ -DOpenMP_CXX_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ -DENABLE_ALL_WARNINGS=Off \ - -DENABLE_EXAMPLES=Off \ + -DENABLE_EXAMPLES=On \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. diff --git a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMax.hpp b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMax.hpp index 4ccf987e4a..0f12867cb8 100644 --- a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMax.hpp +++ b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMax.hpp @@ -50,7 +50,9 @@ void ForallReduceMaxMultipleTestImpl(IDX_TYPE first, DATA_TYPE current_max = default_val; - RAJA::ReduceMax max0; + // Workaround for broken omp-target reduction interface. + // This should be `max0;` not `max0(0);` + RAJA::ReduceMax max0(0); max0.reset(default_val); RAJA::ReduceMax max1(default_val); RAJA::ReduceMax max2(big_val); diff --git a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMin.hpp b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMin.hpp index 0e40310251..85ba396222 100644 --- a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMin.hpp +++ b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceMin.hpp @@ -50,7 +50,9 @@ void ForallReduceMinMultipleTestImpl(IDX_TYPE first, DATA_TYPE current_min = default_val; - RAJA::ReduceMin min0; + // Workaround for broken omp-target reduction interface. + // This should be `min0;` not `min0(0);` + RAJA::ReduceMin min0(0); min0.reset(default_val); RAJA::ReduceMin min1(default_val); RAJA::ReduceMin min2(big_val); diff --git a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceSum.hpp b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceSum.hpp index 2d47d851a2..a05a50765d 100644 --- a/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceSum.hpp +++ b/test/functional/forall/reduce-multiple-segment/tests/test-forall-segment-multiple-ReduceSum.hpp @@ -113,14 +113,16 @@ void ForallReduceSumMultipleStaggered2TestImpl(IDX_TYPE first, const DATA_TYPE index_len = static_cast(last - first); + // Workaround for broken omp-target reduction interface. + // This should be `sumX;` not `sumX(0);` RAJA::ReduceSum sum0(initval); - RAJA::ReduceSum sum1; + RAJA::ReduceSum sum1(0); RAJA::ReduceSum sum2(initval); - RAJA::ReduceSum sum3; + RAJA::ReduceSum sum3(0); RAJA::ReduceSum sum4(initval); - RAJA::ReduceSum sum5; + RAJA::ReduceSum sum5(0); RAJA::ReduceSum sum6(initval); - RAJA::ReduceSum sum7; + RAJA::ReduceSum sum7(0); sum0.reset(0); sum1.reset(initval * 1); diff --git a/test/functional/teams/tests/test-teams-BasicShared.hpp b/test/functional/teams/tests/test-teams-BasicShared.hpp index f2d0792c37..446fb9dac0 100644 --- a/test/functional/teams/tests/test-teams-BasicShared.hpp +++ b/test/functional/teams/tests/test-teams-BasicShared.hpp @@ -45,7 +45,7 @@ void TeamsBasicSharedTestImpl() RAJA::expt::loop(ctx, RAJA::RangeSegment(0, N), [&](int r) { // Array shared within threads of the same team - TEAM_SHARED int s_A[1]; + RAJA_TEAM_SHARED int s_A[1]; RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), [&](int c) { s_A[c] = r; diff --git a/test/include/RAJA_test-workgroup.hpp b/test/include/RAJA_test-workgroup.hpp index 4042418526..991fe5aeb3 100644 --- a/test/include/RAJA_test-workgroup.hpp +++ b/test/include/RAJA_test-workgroup.hpp @@ -105,7 +105,9 @@ struct NeverEqualAllocator ~NeverEqualAllocator() { - assert(m_allocations.empty()); + if (!m_allocations.empty()) { + RAJA_ABORT_OR_THROW("allocation map not empty at destruction"); + } } /*[[nodiscard]]*/ @@ -113,15 +115,21 @@ struct NeverEqualAllocator { void* ptr = malloc(size); auto iter_b = m_allocations.emplace(ptr, size); - assert(iter_b.second); + if (!iter_b.second) { + RAJA_ABORT_OR_THROW("failed to add allocation to map"); + } return ptr; } void deallocate(void* ptr, size_t size) noexcept { auto iter = m_allocations.find(ptr); - assert(iter != m_allocations.end()); - assert(iter->second == size); + if (iter == m_allocations.end()) { + RAJA_ABORT_OR_THROW("failed to find allocation in map"); + } + if (iter->second != size) { + RAJA_ABORT_OR_THROW("allocation size does not match known in map"); + } m_allocations.erase(iter); free(ptr); } diff --git a/test/include/RAJA_unit-test-types.hpp b/test/include/RAJA_unit-test-types.hpp index 9a51d23c9b..8c4924cf7b 100644 --- a/test/include/RAJA_unit-test-types.hpp +++ b/test/include/RAJA_unit-test-types.hpp @@ -51,10 +51,16 @@ using UnitIntegralTypes = ::testing::Typesmemcpy(&i_data, m_data_iptr, sizeof(CounterData)); - - if (m_data.capture_platform_active == RAJA::Platform::undefined && - i_data.capture_platform_active != RAJA::Platform::undefined) { - m_data = i_data; +#if defined(RAJA_ENABLE_TARGET_OPENMP) + if (omp_is_initial_device()) +#endif + { + CounterData i_data; + plugin_test_resource->memcpy(&i_data, m_data_iptr, sizeof(CounterData)); + + if (m_data.capture_platform_active == RAJA::Platform::undefined && + i_data.capture_platform_active != RAJA::Platform::undefined) { + m_data = i_data; + } } #endif } diff --git a/test/old-tests/unit/omp-target/CMakeLists.txt b/test/old-tests/unit/omp-target/CMakeLists.txt index 39b8010f10..3ad9eca426 100644 --- a/test/old-tests/unit/omp-target/CMakeLists.txt +++ b/test/old-tests/unit/omp-target/CMakeLists.txt @@ -18,7 +18,4 @@ if(ENABLE_TARGET_OPENMP) raja_add_test( NAME test-omp-target-reduce-tuplemaxloc SOURCES test-reduce-tuplemaxloc.cpp) - raja_add_test( - NAME test-omp-target-kernel - SOURCES kernel.cpp) endif(ENABLE_TARGET_OPENMP) diff --git a/test/old-tests/unit/omp-target/kernel.cpp b/test/old-tests/unit/omp-target/kernel.cpp deleted file mode 100644 index 4d990f77e3..0000000000 --- a/test/old-tests/unit/omp-target/kernel.cpp +++ /dev/null @@ -1,44 +0,0 @@ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016-20, Lawrence Livermore National Security, LLC -// and RAJA project contributors. See the RAJA/COPYRIGHT file for details. -// -// SPDX-License-Identifier: (BSD-3-Clause) -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - -#include "RAJA/RAJA.hpp" -#include "RAJA_gtest.hpp" - -using namespace RAJA; -using namespace RAJA::statement; - -TEST(Kernel, omptarget) -{ - - using Pol = RAJA::KernelPolicy< - For<0, RAJA::omp_target_parallel_for_exec<64> >, - For<1, RAJA::loop_exec> - >; - - double* array = new double[25*25]; - -#pragma omp target enter data map(to: array[0:25*25]) -#pragma omp target data use_device_ptr(array) - - RAJA::kernel( - RAJA::make_tuple( - RAJA::RangeSegment(0,25), - RAJA::RangeSegment(0,25)), - [=] (int i, int j) { - //array[i + (25*j)] = i*j; - int idx = i*j; - - //array[0] = i*j; - }); - - -//#pragma omp target update from(array[:25*25]) -// for (int i = 0; i < 25*25; i++) { -// std::cout << i << "=" << array[i] << std::endl; -// } -} -