diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 9af5ba72b..65d835370 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -27,6 +27,8 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -# With GitLab CI, included files cannot be empty. -variables: - INCLUDED_FILE_CANNOT_BE_EMPTY: "True" +clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: + variables: + SPEC: " ~shared +sycl ~openmp tests=basic %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" + MODULE_LIST: "rocm/5.7.1" + extends: .job_on_corona diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index 1b9bc0eda..ef997eb6d 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -18,14 +18,7 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. -# Overriding shared spec: Longer allocation + extra flags -xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: - variables: - SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" - MODULE_LIST: "cuda/11.2.0" - LASSEN_JOB_ALLOC: "1 -W 60 -q pci" - extends: .job_on_lassen - +# No jobs overridden ############ # Extra jobs @@ -36,7 +29,7 @@ xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: gcc_8_3_1: variables: - SPEC: " ~shared +openmp %gcc@=8.3.1 ${PROJECT_LASSEN_DEPS}" + SPEC: " ~shared +openmp %gcc@=8.3.1 ^blt@develop" extends: .job_on_lassen gcc_8_3_1_cuda_11_5_0_ats_disabled: @@ -69,8 +62,23 @@ clang_13_0_1_libcpp: # LSAN_OPTIONS: "suppressions=${CI_PROJECT_DIR}/tpl/RAJA/suppressions.asan" # extends: .job_on_lassen -# Activated in RAJA, but we don't use desul atomics here -#gcc_8_3_1_cuda_10_1_168_desul_atomics: -# variables: -# SPEC: "+openmp +cuda +desul %gcc@=8.3.1 cuda_arch=70 cuda_arch=70 ^cuda@10.1.243+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" -# extends: .job_on_lassen +clang_16_0_6_ibm_omptarget: + variables: + SPEC: " ~shared +openmp +omptarget %clang@=16.0.6.ibm.gcc.8.3.1 ^blt@develop" + ON_LASSEN: "OFF" + extends: .job_on_lassen + +xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: + variables: + SPEC: " ~shared +openmp cuda_arch=70 +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ^blt@develop" + MODULE_LIST: "cuda/11.2.0" + LASSEN_JOB_ALLOC: "1 -W 60 -q pci" + extends: .job_on_lassen + +xl_2023_06_28_gcc_11_2_1_cuda_11_8_0: + variables: + SPEC: " ~shared +openmp cuda_arch=70 +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.14.cuda.11.8.0.gcc.11.2.1 ^cuda@11.8.0+allow-unsupported-compilers ^blt@develop" + MODULE_LIST: "cuda/11.8.0" + LASSEN_JOB_ALLOC: "1 -W 60 -q pci" + extends: .job_on_lassen + diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 8e86158f0..cdd0b018d 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -18,27 +18,24 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. +# custom variant +intel_2023_2_1: + variables: + SPEC: "${PROJECT_POODLE_VARIANTS} +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" + extends: .job_on_poodle + +# omptask variant clang_14_0_6: variables: SPEC: "${PROJECT_POODLE_VARIANTS} +omptask %clang@=14.0.6 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle +# omptask variant gcc_10_3_1: variables: SPEC: "${PROJECT_POODLE_VARIANTS} +omptask %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle -intel_19_1_2_gcc_10_3_1: - variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_POODLE_DEPS}" - extends: .job_on_poodle - -intel_2022_1_0: - variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2022.1.0 ${PROJECT_POODLE_DEPS}" - allow_failure: true - extends: .job_on_poodle - ############ # Extra jobs ############ @@ -46,8 +43,7 @@ intel_2022_1_0: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -intel_2022_1_0_mpi: +intel_2023_2_1_mpi: variables: - SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" - allow_failure: true + SPEC: "~shared +openmp +mpi +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ^mvapich2 ^blt@develop" extends: .job_on_poodle diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index c19e36d12..4d74eec78 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -18,28 +18,25 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. +# custom variant +intel_2023_2_1: + variables: + SPEC: "${PROJECT_RUBY_VARIANTS} +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" + extends: .job_on_ruby + +# omptask variant clang_14_0_6: variables: SPEC: "${PROJECT_RUBY_VARIANTS} +omptask %clang@=14.0.6 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby +# omptask variant gcc_10_3_1: variables: SPEC: "${PROJECT_RUBY_VARIANTS} +omptask %gcc@=10.3.1 ${PROJECT_RUBY_DEPS}" RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" extends: .job_on_ruby -intel_19_1_2_gcc_10_3_1: - variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_RUBY_DEPS}" - RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=40 --nodes=1" - extends: .job_on_ruby - -intel_2022_1_0: - variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2022.1.0 ${PROJECT_RUBY_DEPS}" - extends: .job_on_ruby - ############ # Extra jobs ############ @@ -47,7 +44,7 @@ intel_2022_1_0: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -intel_2022_1_0_mpi: +intel_2023_2_1_mpi: variables: - SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" + SPEC: "~shared +openmp +mpi +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ^mvapich2 ^blt@develop" extends: .job_on_ruby diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index 00ed3c276..d8a43062a 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -27,13 +27,12 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -rocmcc_6_1_1_hip_openmp: +rocmcc_6_2_0_hip_openmp: variables: - SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" + SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" extends: .job_on_tioga -rocmcc_6_1_1_hip_openmp_mpi: +rocmcc_6_2_0_hip_openmp_mpi: variables: - SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" + SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" extends: .job_on_tioga - allow_failure: true diff --git a/.gitlab/subscribed-pipelines.yml b/.gitlab/subscribed-pipelines.yml index 7e60a05e9..2f0a610df 100644 --- a/.gitlab/subscribed-pipelines.yml +++ b/.gitlab/subscribed-pipelines.yml @@ -113,5 +113,3 @@ lassen-build-and-test: CI_MACHINE: "lassen" needs: [lassen-up-check, generate-job-lists] extends: [.build-and-test] - - diff --git a/.uberenv_config.json b/.uberenv_config.json index fda595d3a..5c3fc32d8 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -1,10 +1,10 @@ { -"package_name" : "raja_perf", +"package_name" : "raja-perf", "package_version" : "develop", "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_branch": "develop-2024-05-26", +"spack_branch": "develop-2024-10-06", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", "spack_packages_path": "tpl/RAJA/scripts/radiuss-spack-configs/packages", diff --git a/CMakeLists.txt b/CMakeLists.txt index 978c7ccf5..13da85d30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ endif() option(ENABLE_KOKKOS "Include Kokkos implementations of the kernels in the RAJA Perfsuite" Off) -if (ENABLE_KOKKOS OR ENABLE_SYCL) +if (ENABLE_KOKKOS OR RAJA_ENABLE_SYCL) set(CMAKE_CXX_STANDARD 17) set(BLT_CXX_STD c++17) else() @@ -155,7 +155,7 @@ endif() if (ENABLE_CUDA) list(APPEND RAJA_PERFSUITE_DEPENDS cuda) endif() -if (ENABLE_SYCL) +if (RAJA_ENABLE_SYCL) list(APPEND RAJA_PERFSUITE_DEPENDS sycl) endif() diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index f2d020918..984c834d4 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -28,6 +28,7 @@ job_unique_id=${CI_JOB_ID:-""} use_dev_shm=${USE_DEV_SHM:-true} spack_debug=${SPACK_DEBUG:-false} debug_mode=${DEBUG_MODE:-false} +push_to_registry=${PUSH_TO_REGISTRY:-true} raja_version=${UPDATE_RAJA:-""} sys_type=${SYS_TYPE:-""} @@ -59,6 +60,7 @@ then echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" use_dev_shm=false spack_debug=true + push_to_registry=false fi if [[ -n ${module_list} ]] @@ -149,7 +151,7 @@ then timed_message "Spack build of dependencies" ${uberenv_cmd} --skip-setup-and-env --spec="${spec}" ${prefix_opt} ${upstream_opt} - if [[ -n ${ci_registry_token} && ${debug_mode} == false ]] + if [[ -n ${ci_registry_token} && ${push_to_registry} == true ]] then timed_message "Push dependencies to buildcache" ${spack_cmd} -D ${spack_env_path} buildcache push --only dependencies gitlab_ci @@ -239,7 +241,7 @@ then fi date - if [[ "${truehostname}" == "corona" || "${truehostname}" == "tioga" ]] + if [[ "${truehostname}" == "tioga" ]] then module unload rocm fi diff --git a/src/algorithm/REDUCE_SUM-Cuda.cpp b/src/algorithm/REDUCE_SUM-Cuda.cpp index 302ab35d6..836089ab6 100644 --- a/src/algorithm/REDUCE_SUM-Cuda.cpp +++ b/src/algorithm/REDUCE_SUM-Cuda.cpp @@ -239,7 +239,8 @@ void REDUCE_SUM::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] __device__ (Index_type i, Real_type& sum) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-Hip.cpp b/src/algorithm/REDUCE_SUM-Hip.cpp index 831978015..f7c689593 100644 --- a/src/algorithm/REDUCE_SUM-Hip.cpp +++ b/src/algorithm/REDUCE_SUM-Hip.cpp @@ -266,7 +266,8 @@ void REDUCE_SUM::runHipVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] __device__ (Index_type i, Real_type& sum) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-OMP.cpp b/src/algorithm/REDUCE_SUM-OMP.cpp index 66c0fcb63..0c89c80b9 100644 --- a/src/algorithm/REDUCE_SUM-OMP.cpp +++ b/src/algorithm/REDUCE_SUM-OMP.cpp @@ -106,7 +106,8 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-OMPTarget.cpp b/src/algorithm/REDUCE_SUM-OMPTarget.cpp index a1d19fcc9..0ce3cfbae 100644 --- a/src/algorithm/REDUCE_SUM-OMPTarget.cpp +++ b/src/algorithm/REDUCE_SUM-OMPTarget.cpp @@ -66,7 +66,8 @@ void REDUCE_SUM::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_AR RAJA::forall>( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-Seq.cpp b/src/algorithm/REDUCE_SUM-Seq.cpp index d5259f6ae..b760eb67c 100644 --- a/src/algorithm/REDUCE_SUM-Seq.cpp +++ b/src/algorithm/REDUCE_SUM-Seq.cpp @@ -106,7 +106,8 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-Sycl.cpp b/src/algorithm/REDUCE_SUM-Sycl.cpp index 516048863..810a71bf2 100644 --- a/src/algorithm/REDUCE_SUM-Sycl.cpp +++ b/src/algorithm/REDUCE_SUM-Sycl.cpp @@ -76,11 +76,13 @@ void REDUCE_SUM::runSyclVariantImpl(VariantID vid) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { Real_type tsum = m_sum_init; + RAJA::forall< RAJA::sycl_exec >( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/basic/PI_REDUCE-Cuda.cpp b/src/basic/PI_REDUCE-Cuda.cpp index 8529897c3..449c0b634 100644 --- a/src/basic/PI_REDUCE-Cuda.cpp +++ b/src/basic/PI_REDUCE-Cuda.cpp @@ -168,7 +168,8 @@ void PI_REDUCE::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall< exec_policy >( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] __device__ (Index_type i, Real_type& pi) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-Hip.cpp b/src/basic/PI_REDUCE-Hip.cpp index ed2dfd8dd..2db8c8c98 100644 --- a/src/basic/PI_REDUCE-Hip.cpp +++ b/src/basic/PI_REDUCE-Hip.cpp @@ -168,7 +168,8 @@ void PI_REDUCE::runHipVariantRAJANewReduce(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] __device__ (Index_type i, Real_type& pi) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-OMP.cpp b/src/basic/PI_REDUCE-OMP.cpp index 21e93f080..4610d0831 100644 --- a/src/basic/PI_REDUCE-OMP.cpp +++ b/src/basic/PI_REDUCE-OMP.cpp @@ -107,7 +107,8 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-OMPTarget.cpp b/src/basic/PI_REDUCE-OMPTarget.cpp index 32a502878..f34c3bbc9 100644 --- a/src/basic/PI_REDUCE-OMPTarget.cpp +++ b/src/basic/PI_REDUCE-OMPTarget.cpp @@ -66,7 +66,8 @@ void PI_REDUCE::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG RAJA::forall>( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-Seq.cpp b/src/basic/PI_REDUCE-Seq.cpp index 3ca82eabe..fae77baf6 100644 --- a/src/basic/PI_REDUCE-Seq.cpp +++ b/src/basic/PI_REDUCE-Seq.cpp @@ -107,7 +107,8 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-Sycl.cpp b/src/basic/PI_REDUCE-Sycl.cpp index c95e29583..3f09ffdf7 100644 --- a/src/basic/PI_REDUCE-Sycl.cpp +++ b/src/basic/PI_REDUCE-Sycl.cpp @@ -87,7 +87,8 @@ void PI_REDUCE::runSyclVariantImpl(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/REDUCE3_INT-Cuda.cpp b/src/basic/REDUCE3_INT-Cuda.cpp index a8d68b31c..cf7bb9716 100644 --- a/src/basic/REDUCE3_INT-Cuda.cpp +++ b/src/basic/REDUCE3_INT-Cuda.cpp @@ -194,8 +194,10 @@ void REDUCE3_INT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), [=] __device__ (Index_type i, - Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-Hip.cpp b/src/basic/REDUCE3_INT-Hip.cpp index 12d172de7..f28aecc5b 100644 --- a/src/basic/REDUCE3_INT-Hip.cpp +++ b/src/basic/REDUCE3_INT-Hip.cpp @@ -194,8 +194,10 @@ void REDUCE3_INT::runHipVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), [=] __device__ (Index_type i, - Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-OMP.cpp b/src/basic/REDUCE3_INT-OMP.cpp index e8d328696..6cb8e43ef 100644 --- a/src/basic/REDUCE3_INT-OMP.cpp +++ b/src/basic/REDUCE3_INT-OMP.cpp @@ -128,8 +128,11 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-OMPTarget.cpp b/src/basic/REDUCE3_INT-OMPTarget.cpp index d0dc92f73..ffc2d9f9c 100644 --- a/src/basic/REDUCE3_INT-OMPTarget.cpp +++ b/src/basic/REDUCE3_INT-OMPTarget.cpp @@ -76,8 +76,11 @@ void REDUCE3_INT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_A RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-Seq.cpp b/src/basic/REDUCE3_INT-Seq.cpp index b4c27ef91..5a733fab7 100644 --- a/src/basic/REDUCE3_INT-Seq.cpp +++ b/src/basic/REDUCE3_INT-Seq.cpp @@ -124,8 +124,11 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-Sycl.cpp b/src/basic/REDUCE3_INT-Sycl.cpp index 58ac6f082..dbf81acaa 100644 --- a/src/basic/REDUCE3_INT-Sycl.cpp +++ b/src/basic/REDUCE3_INT-Sycl.cpp @@ -110,8 +110,11 @@ void REDUCE3_INT::runSyclVariantImpl(VariantID vid) RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=] (Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=] (Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Cuda.cpp b/src/basic/REDUCE_STRUCT-Cuda.cpp index daccf30b5..d726129d8 100644 --- a/src/basic/REDUCE_STRUCT-Cuda.cpp +++ b/src/basic/REDUCE_STRUCT-Cuda.cpp @@ -241,10 +241,14 @@ void REDUCE_STRUCT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=] __device__ (Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=] __device__ (Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Hip.cpp b/src/basic/REDUCE_STRUCT-Hip.cpp index 17fe5ad83..cac5a2989 100644 --- a/src/basic/REDUCE_STRUCT-Hip.cpp +++ b/src/basic/REDUCE_STRUCT-Hip.cpp @@ -241,10 +241,14 @@ void REDUCE_STRUCT::runHipVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=] __device__ (Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=] __device__ (Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-OMP.cpp b/src/basic/REDUCE_STRUCT-OMP.cpp index 7db18c01f..6086f0770 100644 --- a/src/basic/REDUCE_STRUCT-OMP.cpp +++ b/src/basic/REDUCE_STRUCT-OMP.cpp @@ -160,10 +160,14 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=](Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-OMPTarget.cpp b/src/basic/REDUCE_STRUCT-OMPTarget.cpp index 6618cc179..22de4dcdb 100644 --- a/src/basic/REDUCE_STRUCT-OMPTarget.cpp +++ b/src/basic/REDUCE_STRUCT-OMPTarget.cpp @@ -103,10 +103,14 @@ void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=](Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Seq.cpp b/src/basic/REDUCE_STRUCT-Seq.cpp index af82b4e29..9a4a2030e 100644 --- a/src/basic/REDUCE_STRUCT-Seq.cpp +++ b/src/basic/REDUCE_STRUCT-Seq.cpp @@ -150,10 +150,14 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=](Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/TRAP_INT-Cuda.cpp b/src/basic/TRAP_INT-Cuda.cpp index e58e86923..717fef6f5 100644 --- a/src/basic/TRAP_INT-Cuda.cpp +++ b/src/basic/TRAP_INT-Cuda.cpp @@ -175,7 +175,8 @@ void TRAP_INT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] __device__ (Index_type i, Real_type& sumx) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-Hip.cpp b/src/basic/TRAP_INT-Hip.cpp index e60b3ccff..de3140258 100644 --- a/src/basic/TRAP_INT-Hip.cpp +++ b/src/basic/TRAP_INT-Hip.cpp @@ -176,7 +176,8 @@ void TRAP_INT::runHipVariantRAJANewReduce(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] __device__ (Index_type i, Real_type& sumx) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-OMP.cpp b/src/basic/TRAP_INT-OMP.cpp index 0725b1af4..cbff58217 100644 --- a/src/basic/TRAP_INT-OMP.cpp +++ b/src/basic/TRAP_INT-OMP.cpp @@ -108,7 +108,8 @@ void TRAP_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-OMPTarget.cpp b/src/basic/TRAP_INT-OMPTarget.cpp index dc4753cf1..cd98d145c 100644 --- a/src/basic/TRAP_INT-OMPTarget.cpp +++ b/src/basic/TRAP_INT-OMPTarget.cpp @@ -73,7 +73,8 @@ void TRAP_INT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( RAJA::forall>( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-Seq.cpp b/src/basic/TRAP_INT-Seq.cpp index 1b9fa98fa..69f4f0f88 100644 --- a/src/basic/TRAP_INT-Seq.cpp +++ b/src/basic/TRAP_INT-Seq.cpp @@ -108,7 +108,8 @@ void TRAP_INT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-Sycl.cpp b/src/basic/TRAP_INT-Sycl.cpp index a9795c77e..b1ce89d9b 100644 --- a/src/basic/TRAP_INT-Sycl.cpp +++ b/src/basic/TRAP_INT-Sycl.cpp @@ -85,7 +85,8 @@ void TRAP_INT::runSyclVariantImpl(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/lcals/FIRST_MIN-Cuda.cpp b/src/lcals/FIRST_MIN-Cuda.cpp index 08f2ab240..3e7de06ef 100644 --- a/src/lcals/FIRST_MIN-Cuda.cpp +++ b/src/lcals/FIRST_MIN-Cuda.cpp @@ -114,8 +114,6 @@ void FIRST_MIN::runCudaVariantBase(VariantID vid) template < size_t block_size, typename MappingHelper > void FIRST_MIN::runCudaVariantRAJA(VariantID vid) { - using reduction_policy = RAJA::cuda_reduce; - using exec_policy = std::conditional_t, RAJA::cuda_exec_occ_calc>; @@ -133,15 +131,16 @@ void FIRST_MIN::runCudaVariantRAJA(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + RAJA::ReduceMinLoc minloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) { FIRST_MIN_BODY_RAJA; }); - m_minloc = loc.getLoc(); + m_minloc = minloc.getLoc(); } stopTimer(); @@ -168,22 +167,23 @@ void FIRST_MIN::runCudaVariantRAJANewReduce(VariantID vid) if ( vid == RAJA_CUDA ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=] __device__ (Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=] __device__ (Index_type i, + RAJA::expt::ValLocOp& minloc) { + FIRST_MIN_BODY_RAJA; } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-Hip.cpp b/src/lcals/FIRST_MIN-Hip.cpp index 3c6fd7b35..dda9793bf 100644 --- a/src/lcals/FIRST_MIN-Hip.cpp +++ b/src/lcals/FIRST_MIN-Hip.cpp @@ -114,8 +114,6 @@ void FIRST_MIN::runHipVariantBase(VariantID vid) template < size_t block_size, typename MappingHelper > void FIRST_MIN::runHipVariantRAJA(VariantID vid) { - using reduction_policy = RAJA::hip_reduce; - using exec_policy = std::conditional_t, RAJA::hip_exec_occ_calc>; @@ -133,15 +131,16 @@ void FIRST_MIN::runHipVariantRAJA(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + RAJA::ReduceMinLoc minloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) { FIRST_MIN_BODY_RAJA; }); - m_minloc = loc.getLoc(); + m_minloc = minloc.getLoc(); } stopTimer(); @@ -168,22 +167,23 @@ void FIRST_MIN::runHipVariantRAJANewReduce(VariantID vid) if ( vid == RAJA_HIP ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); - RAJA::forall( res, - RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=] __device__ (Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); - } - ); - - m_minloc = static_cast(tloc.getLoc()); + RAJA::forall( res, + RAJA::RangeSegment(ibegin, iend), + RAJA::expt::Reduce(&tminloc), + [=] __device__ (Index_type i, + RAJA::expt::ValLocOp& minloc) { + FIRST_MIN_BODY_RAJA; + } + ); + + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-OMP.cpp b/src/lcals/FIRST_MIN-OMP.cpp index e08b2918a..7f2b9d3f9 100644 --- a/src/lcals/FIRST_MIN-OMP.cpp +++ b/src/lcals/FIRST_MIN-OMP.cpp @@ -94,37 +94,39 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + RAJA::ReduceMinLoc minloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { FIRST_MIN_BODY_RAJA; }); - m_minloc = loc.getLoc(); + m_minloc = minloc.getLoc(); } stopTimer(); } else if (tune_idx == 1) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=](Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=](Index_type i, + RAJA::expt::ValLocOp& minloc) { + FIRST_MIN_BODY_RAJA; } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-OMPTarget.cpp b/src/lcals/FIRST_MIN-OMPTarget.cpp index 355aef7ec..7c4432d2a 100644 --- a/src/lcals/FIRST_MIN-OMPTarget.cpp +++ b/src/lcals/FIRST_MIN-OMPTarget.cpp @@ -62,22 +62,23 @@ void FIRST_MIN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG auto res{getOmpTargetResource()}; - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall>( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=](Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=](Index_type i, + RAJA::expt::ValLocOp& minloc) { + FIRST_MIN_BODY_RAJA; } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-Seq.cpp b/src/lcals/FIRST_MIN-Seq.cpp index bcee68750..abe14957f 100644 --- a/src/lcals/FIRST_MIN-Seq.cpp +++ b/src/lcals/FIRST_MIN-Seq.cpp @@ -86,37 +86,39 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + RAJA::ReduceMinLoc minloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { FIRST_MIN_BODY_RAJA; }); - m_minloc = loc.getLoc(); + m_minloc = minloc.getLoc(); } stopTimer(); } else if (tune_idx == 1) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=](Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=](Index_type i, + RAJA::expt::ValLocOp& minloc) { + FIRST_MIN_BODY_RAJA; } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-Sycl.cpp b/src/lcals/FIRST_MIN-Sycl.cpp index 616c84dcb..dddcc5aae 100644 --- a/src/lcals/FIRST_MIN-Sycl.cpp +++ b/src/lcals/FIRST_MIN-Sycl.cpp @@ -84,23 +84,24 @@ void FIRST_MIN::runSyclVariantImpl(VariantID vid) } else if ( vid == RAJA_SYCL ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall< RAJA::sycl_exec >( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=] (Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=] (Index_type i, + RAJA::expt::ValLocOp& minloc) { + FIRST_MIN_BODY_RAJA; } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN.hpp b/src/lcals/FIRST_MIN.hpp index a0d4a6786..f00100385 100644 --- a/src/lcals/FIRST_MIN.hpp +++ b/src/lcals/FIRST_MIN.hpp @@ -33,7 +33,7 @@ } #define FIRST_MIN_BODY_RAJA \ - loc.minloc(x[i], i); + minloc.minloc(x[i], i); #include "common/RPTypes.hpp" diff --git a/src/stream/DOT-Cuda.cpp b/src/stream/DOT-Cuda.cpp index 031355a3e..c8910ee8c 100644 --- a/src/stream/DOT-Cuda.cpp +++ b/src/stream/DOT-Cuda.cpp @@ -164,7 +164,8 @@ void DOT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] __device__ (Index_type i, Real_type& dot) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-Hip.cpp b/src/stream/DOT-Hip.cpp index 0c3c914a9..24984f300 100644 --- a/src/stream/DOT-Hip.cpp +++ b/src/stream/DOT-Hip.cpp @@ -164,7 +164,8 @@ void DOT::runHipVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] __device__ (Index_type i, Real_type& dot) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-OMP.cpp b/src/stream/DOT-OMP.cpp index 6492ce545..57c8d70d0 100644 --- a/src/stream/DOT-OMP.cpp +++ b/src/stream/DOT-OMP.cpp @@ -105,7 +105,8 @@ void DOT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-OMPTarget.cpp b/src/stream/DOT-OMPTarget.cpp index b16b77ccd..cf9a0ab6b 100644 --- a/src/stream/DOT-OMPTarget.cpp +++ b/src/stream/DOT-OMPTarget.cpp @@ -70,7 +70,8 @@ void DOT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ RAJA::forall>( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-Seq.cpp b/src/stream/DOT-Seq.cpp index 1a622d658..b98d78c42 100644 --- a/src/stream/DOT-Seq.cpp +++ b/src/stream/DOT-Seq.cpp @@ -105,7 +105,8 @@ void DOT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-Sycl.cpp b/src/stream/DOT-Sycl.cpp index 250f0b680..4f3fb40f5 100644 --- a/src/stream/DOT-Sycl.cpp +++ b/src/stream/DOT-Sycl.cpp @@ -76,11 +76,13 @@ void DOT::runSyclVariantImpl(VariantID vid) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { Real_type tdot = m_dot_init; + RAJA::forall< RAJA::sycl_exec >( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/tpl/RAJA b/tpl/RAJA index 378199aac..2fcd22ee2 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 378199aac342ee21c2ddfbcbb48413bd1dfac612 +Subproject commit 2fcd22ee2a1c62747b6481d6331dfae6526adc32