From fad8028aa363b7e4d265cc7eb851d742723b80aa Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 30 Aug 2024 15:23:48 -0700 Subject: [PATCH 01/49] Update GitLab CI content to match Adrien's RAJA PR --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 378199aac..9472f243b 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 378199aac342ee21c2ddfbcbb48413bd1dfac612 +Subproject commit 9472f243bc116660a33e5ee4ac1a19e2b18a13e1 From 1c960290903ef3a37c25840db945e2f0de5ff9d2 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 30 Aug 2024 15:27:10 -0700 Subject: [PATCH 02/49] Update files used in GitLab CI --- .gitlab/jobs/corona.yml | 8 +++++--- .gitlab/jobs/lassen.yml | 10 +++++----- .gitlab/jobs/poodle.yml | 5 ----- .gitlab/jobs/ruby.yml | 6 ------ .gitlab/jobs/tioga.yml | 8 ++++---- .uberenv_config.json | 2 +- 6 files changed, 15 insertions(+), 24 deletions(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 8fec233c5..c76eb9389 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -29,6 +29,8 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -# With GitLab CI, included files cannot be empty. -variables: - INCLUDED_FILE_CANNOT_BE_EMPTY: "True" +clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: + variables: + SPEC: " ~shared +sycl +openmp +tests %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" + MODULE_LIST: "rocm/5.7.1" + extends: .job_on_corona diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index c6eacf864..2748dc48f 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -71,8 +71,8 @@ clang_13_0_1_libcpp: # LSAN_OPTIONS: "suppressions=${CI_PROJECT_DIR}/tpl/RAJA/suppressions.asan" # extends: .job_on_lassen -# Activated in RAJA, but we don't use desul atomics here -#gcc_8_3_1_cuda_10_1_168_desul_atomics: -# variables: -# SPEC: "+openmp +cuda +desul %gcc@=8.3.1 cuda_arch=70 cuda_arch=70 ^cuda@10.1.243+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" -# extends: .job_on_lassen +clang_16_0_6_ibm_omptarget: + variables: + SPEC: " ~shared +openmp +omptarget %clang@=16.0.6.ibm.gcc.8.3.1 ^blt@develop" + ON_LASSEN: "OFF" + extends: .job_on_lassen diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index ed18f60f5..2a5ceab77 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -30,11 +30,6 @@ gcc_10_3_1: SPEC: "${PROJECT_POODLE_VARIANTS} +omptask %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle -intel_19_1_2_gcc_10_3_1: - variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_POODLE_DEPS}" - extends: .job_on_poodle - intel_2022_1_0: variables: SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2022.1.0 ${PROJECT_POODLE_DEPS}" diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 3502ed3fb..733c262aa 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -31,12 +31,6 @@ gcc_10_3_1: RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" extends: .job_on_ruby -intel_19_1_2_gcc_10_3_1: - variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_RUBY_DEPS}" - RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=40 --nodes=1" - extends: .job_on_ruby - intel_2022_1_0: variables: SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2022.1.0 ${PROJECT_RUBY_DEPS}" diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index bcf9eccb8..b4265bd2f 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -29,13 +29,13 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -rocmcc_6_1_1_hip_openmp: +rocmcc_6_2_0_hip_openmp: variables: - SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ${PROJECT_TIOGA_DEPS}" + SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ${PROJECT_TIOGA_DEPS}" extends: .job_on_tioga -rocmcc_6_1_1_hip_openmp_mpi: +rocmcc_6_2_0_hip_openmp_mpi: variables: - SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ${PROJECT_TIOGA_DEPS}" + SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ${PROJECT_TIOGA_DEPS}" extends: .job_on_tioga allow_failure: true diff --git a/.uberenv_config.json b/.uberenv_config.json index fda595d3a..f0680e500 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -4,7 +4,7 @@ "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_branch": "develop-2024-05-26", +"spack_branch": "develop-2024-07-07", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", "spack_packages_path": "tpl/RAJA/scripts/radiuss-spack-configs/packages", From 1e1535cf9211dbeb703fe3f117a848521cb8d662 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 4 Sep 2024 10:10:04 +0200 Subject: [PATCH 03/49] Update RAJA --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 9472f243b..6b30e1c7f 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 9472f243bc116660a33e5ee4ac1a19e2b18a13e1 +Subproject commit 6b30e1c7f6cbc7ab2ce888c9f0fddda6eec8d824 From 5676a48d770e5b44c245bf955e10fc199829e68b Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 18 Sep 2024 22:58:15 +0200 Subject: [PATCH 04/49] Update lassen jobs w.r.t. changes in radiuss-spack-configs --- .gitlab/jobs/lassen.yml | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index 2748dc48f..8ccb69766 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -20,14 +20,7 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. -# Overriding shared spec: Longer allocation + extra flags -xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: - variables: - SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" - MODULE_LIST: "cuda/11.2.0" - LASSEN_JOB_ALLOC: "1 -W 60 -q pci" - extends: .job_on_lassen - +# No jobs overridden ############ # Extra jobs @@ -38,20 +31,20 @@ xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: gcc_8_3_1: variables: - SPEC: " ~shared +openmp %gcc@=8.3.1 ${PROJECT_LASSEN_DEPS}" + SPEC: " ~shared +openmp %gcc@=8.3.1 ^blt@develop" extends: .job_on_lassen gcc_8_3_1_cuda_11_5_0_ats_disabled: extends: .job_on_lassen variables: - SPEC: " ~shared +openmp +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.5.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" + SPEC: " ~shared +openmp +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.5.0+allow-unsupported-compilers ^blt@develop" MODULE_LIST: "cuda/11.5.0" LASSEN_JOB_ALLOC: "1 --atsdisable -W 30 -q pci" gcc_8_3_1_cuda_11_5_0_ats_disabled_mpi: extends: .job_on_lassen variables: - SPEC: " ~shared +openmp +cuda +mpi %gcc@=8.3.1 cuda_arch=70 ^cuda@11.5.0+allow-unsupported-compilers ^spectrum-mpi ${PROJECT_LASSEN_DEPS}" + SPEC: " ~shared +openmp +cuda +mpi %gcc@=8.3.1 cuda_arch=70 ^cuda@11.5.0+allow-unsupported-compilers ^spectrum-mpi ^blt@develop" MODULE_LIST: "cuda/11.5.0" LASSEN_JOB_ALLOC: "1 --atsdisable -W 30 -q pci" @@ -61,7 +54,7 @@ gcc_8_3_1_cuda_11_5_0_ats_disabled_mpi: clang_13_0_1_libcpp: variables: - SPEC: " ~shared +openmp %clang@=13.0.1 cflags==\"-DGTEST_HAS_CXXABI_H_=0\" cxxflags==\"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0\" ${PROJECT_LASSEN_DEPS}" + SPEC: " ~shared +openmp %clang@=13.0.1 cflags==\"-DGTEST_HAS_CXXABI_H_=0\" cxxflags==\"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0\" ^blt@develop" extends: .job_on_lassen #clang_14_0_5_asan: @@ -76,3 +69,18 @@ clang_16_0_6_ibm_omptarget: SPEC: " ~shared +openmp +omptarget %clang@=16.0.6.ibm.gcc.8.3.1 ^blt@develop" ON_LASSEN: "OFF" extends: .job_on_lassen + +xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: + variables: + SPEC: " ~shared +openmp cuda_arch=70 +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ^blt@develop" + MODULE_LIST: "cuda/11.2.0" + LASSEN_JOB_ALLOC: "1 -W 60 -q pci" + extends: .job_on_lassen + +xl_2023_06_28_gcc_11_2_1_cuda_11_8_0: + variables: + SPEC: " ~shared +openmp cuda_arch=70 +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.14.cuda.11.8.0.gcc.11.2.1 ^cuda@11.8.0+allow-unsupported-compilers ^blt@develop" + MODULE_LIST: "cuda/11.8.0" + LASSEN_JOB_ALLOC: "1 -W 60 -q pci" + extends: .job_on_lassen + From ea7a9de7adfb9e43a3ea5bfa770a515c96b3736a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 18 Sep 2024 23:01:42 +0200 Subject: [PATCH 05/49] Update and standardize RADIUSS packages --- .uberenv_config.json | 2 +- tpl/RAJA | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.uberenv_config.json b/.uberenv_config.json index f0680e500..0a52d9d81 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -4,7 +4,7 @@ "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_branch": "develop-2024-07-07", +"spack_branch": "woptim/radiuss-packages-sync", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", "spack_packages_path": "tpl/RAJA/scripts/radiuss-spack-configs/packages", diff --git a/tpl/RAJA b/tpl/RAJA index 6b30e1c7f..8a083ddc0 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 6b30e1c7f6cbc7ab2ce888c9f0fddda6eec8d824 +Subproject commit 8a083ddc08ffd82845abd374c9510eeca95c63a9 From 5d0fcd122e9d95b570f4dc15ed68ca907f49f1ce Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:47:12 +0200 Subject: [PATCH 06/49] build_and_test.sh: allow to control whether or not to push to registry --- scripts/gitlab/build_and_test.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index f2d020918..44a89a1f3 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -28,6 +28,7 @@ job_unique_id=${CI_JOB_ID:-""} use_dev_shm=${USE_DEV_SHM:-true} spack_debug=${SPACK_DEBUG:-false} debug_mode=${DEBUG_MODE:-false} +push_to_registry=${PUSH_TO_REGISTRY:-true} raja_version=${UPDATE_RAJA:-""} sys_type=${SYS_TYPE:-""} @@ -59,6 +60,7 @@ then echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" use_dev_shm=false spack_debug=true + push_to_registry=false fi if [[ -n ${module_list} ]] @@ -149,7 +151,7 @@ then timed_message "Spack build of dependencies" ${uberenv_cmd} --skip-setup-and-env --spec="${spec}" ${prefix_opt} ${upstream_opt} - if [[ -n ${ci_registry_token} && ${debug_mode} == false ]] + if [[ -n ${ci_registry_token} && ${push_to_registry} == true ]] then timed_message "Push dependencies to buildcache" ${spack_cmd} -D ${spack_env_path} buildcache push --only dependencies gitlab_ci From 77cf546d3760f1dc03725e8ff67041f1d0399c30 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:50:13 +0200 Subject: [PATCH 07/49] From RAJA: Update RSC with fixes --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 8a083ddc0..85a7d0ee9 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 8a083ddc08ffd82845abd374c9510eeca95c63a9 +Subproject commit 85a7d0ee962fee6115a6804f1ff9fa5ad70beec1 From 3ee1f8f8a97978ba9594998a06a7c90924296eba Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 19 Sep 2024 21:07:40 +0200 Subject: [PATCH 08/49] Fix package name --- .uberenv_config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.uberenv_config.json b/.uberenv_config.json index 0a52d9d81..da25704e2 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -1,5 +1,5 @@ { -"package_name" : "raja_perf", +"package_name" : "raja-perf", "package_version" : "develop", "package_final_phase" : "initconfig", "package_source_dir" : "../..", From 99dd23dd7ceba3aa5373f61ca0f49a0860ddcf1a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 25 Sep 2024 12:12:32 +0200 Subject: [PATCH 09/49] Update RAJA --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 85a7d0ee9..b9edfea5f 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 85a7d0ee962fee6115a6804f1ff9fa5ad70beec1 +Subproject commit b9edfea5f31b149fa2fc2fece2a4773d9c4d64b2 From 6a82d5207a2fca55798b69a727d52e276f7eb3b7 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:35:55 +0200 Subject: [PATCH 10/49] Fix Spack branch name --- .uberenv_config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.uberenv_config.json b/.uberenv_config.json index da25704e2..e15410b55 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -4,7 +4,7 @@ "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_branch": "woptim/radiuss-packages-sync", +"spack_branch": "woptim/radiuss-packages-update", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", "spack_packages_path": "tpl/RAJA/scripts/radiuss-spack-configs/packages", From c06f8c880bfc0c7b4746287e6bb821ec7de87323 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:29:10 +0200 Subject: [PATCH 11/49] From RAJA: From RSC: Add missing sycl variant to RAJAPerf and fix c++17 logic --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index b9edfea5f..ba6c7760d 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit b9edfea5f31b149fa2fc2fece2a4773d9c4d64b2 +Subproject commit ba6c7760db27b9e856223bc9bcded238d3f3ce1d From f7653e11e6672f46807d0eebc510691d1957216d Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:20:51 +0200 Subject: [PATCH 12/49] Fix tests variant value in sycl spec --- .gitlab/jobs/corona.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 49baa0984..664fadab7 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -29,6 +29,6 @@ clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: variables: - SPEC: " ~shared +sycl +openmp +tests %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" + SPEC: " ~shared +sycl +openmp tests=benchmarks %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" MODULE_LIST: "rocm/5.7.1" extends: .job_on_corona From a27dcfe6e6242e1e9acc4757a5a0f64a8a9d0f8d Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:55:00 +0200 Subject: [PATCH 13/49] Turn off openmp support in sycl job on corona (same as RAJA CI) --- .gitlab/jobs/corona.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 664fadab7..2624cec45 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -29,6 +29,6 @@ clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: variables: - SPEC: " ~shared +sycl +openmp tests=benchmarks %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" + SPEC: " ~shared +sycl ~openmp tests=benchmarks %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" MODULE_LIST: "rocm/5.7.1" extends: .job_on_corona From e3267f70571e4233d88208ece18f2c90ffd1feab Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:45:21 +0200 Subject: [PATCH 14/49] turn off benchmarks --- .gitlab/jobs/corona.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 2624cec45..67f9b1d49 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -29,6 +29,6 @@ clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: variables: - SPEC: " ~shared +sycl ~openmp tests=benchmarks %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" + SPEC: " ~shared +sycl ~openmp %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" MODULE_LIST: "rocm/5.7.1" extends: .job_on_corona From aae34ed29693067f07950d1f04f8165e2663344f Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:13:53 +0200 Subject: [PATCH 15/49] Allow failure of sycl job on corona for now --- .gitlab/jobs/corona.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 67f9b1d49..c1fdc1c53 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -32,3 +32,4 @@ clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: SPEC: " ~shared +sycl ~openmp %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" MODULE_LIST: "rocm/5.7.1" extends: .job_on_corona + allow_failure: true From 98e70e486a2154d2a9a6fa67e1ec56faa2c777ee Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Tue, 1 Oct 2024 11:03:25 +0200 Subject: [PATCH 16/49] From RAJA: From RSC: RAJAPerf: Fix CMake variable for sycl support --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index ba6c7760d..0373c0225 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit ba6c7760db27b9e856223bc9bcded238d3f3ce1d +Subproject commit 0373c02255f9a4ed2ad5bc25d97d6d05fb74fc5f From 83e41243cc685fe5067f7e9be97407ba9b086c5c Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Tue, 1 Oct 2024 21:22:25 +0200 Subject: [PATCH 17/49] Update Spack ref (waiting for next snapshot tag) --- .uberenv_config.json | 2 +- tpl/RAJA | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.uberenv_config.json b/.uberenv_config.json index e15410b55..a88765962 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -4,7 +4,7 @@ "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_branch": "woptim/radiuss-packages-update", +"spack_commit": "d7f5dbaf8911387d6c38035f0d508702ee71b03a", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", "spack_packages_path": "tpl/RAJA/scripts/radiuss-spack-configs/packages", diff --git a/tpl/RAJA b/tpl/RAJA index 0373c0225..ba0b90203 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 0373c02255f9a4ed2ad5bc25d97d6d05fb74fc5f +Subproject commit ba0b90203df2542437f11d80a83cc99bb0f8ac3f From b379712110c50c6fa48dfe4c2542316a6ba17b1a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Fri, 4 Oct 2024 16:04:16 +0200 Subject: [PATCH 18/49] Update RSC to main --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index ba0b90203..38f9f4215 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit ba0b90203df2542437f11d80a83cc99bb0f8ac3f +Subproject commit 38f9f4215fe572424a5a9932898dc74506c44a21 From 460a946aa5545066827e1dc23930ee94e166183f Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Fri, 4 Oct 2024 16:41:03 +0200 Subject: [PATCH 19/49] From RAJA: Update RAJA and Update RSC to main --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 38f9f4215..ab60a8d66 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 38f9f4215fe572424a5a9932898dc74506c44a21 +Subproject commit ab60a8d6654e5c0e5fc450aa43e93262e9802387 From d124c331f1237d42ef37bd0c04d90d70141a8689 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Tue, 8 Oct 2024 10:36:11 +0200 Subject: [PATCH 20/49] Update Spack to appropriate snapshot branch --- .uberenv_config.json | 2 +- tpl/RAJA | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.uberenv_config.json b/.uberenv_config.json index a88765962..5c3fc32d8 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -4,7 +4,7 @@ "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_commit": "d7f5dbaf8911387d6c38035f0d508702ee71b03a", +"spack_branch": "develop-2024-10-06", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", "spack_packages_path": "tpl/RAJA/scripts/radiuss-spack-configs/packages", diff --git a/tpl/RAJA b/tpl/RAJA index ab60a8d66..6f9d25142 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit ab60a8d6654e5c0e5fc450aa43e93262e9802387 +Subproject commit 6f9d2514215156d80443be4ce8dbabc7fd2237bf From dcce16950cfc01e15ffd9a926407d7d1dcd22b64 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 11 Oct 2024 15:23:36 -0700 Subject: [PATCH 21/49] Update to RAJA develop and convert Seq and OMP reductions to new interface --- src/algorithm/REDUCE_SUM-OMP.cpp | 9 ++++++--- src/algorithm/REDUCE_SUM-Seq.cpp | 12 ++++++++---- src/basic/PI_REDUCE-OMP.cpp | 12 +++++++----- src/basic/PI_REDUCE-Seq.cpp | 10 +++++++--- src/basic/REDUCE3_INT-OMP.cpp | 13 +++++++++---- src/basic/REDUCE3_INT-Seq.cpp | 13 +++++++++---- src/basic/REDUCE_STRUCT-OMP.cpp | 16 +++++++++++----- src/basic/REDUCE_STRUCT-Seq.cpp | 18 ++++++++++++------ src/basic/TRAP_INT-OMP.cpp | 9 ++++++--- src/basic/TRAP_INT-Seq.cpp | 9 ++++++--- src/lcals/FIRST_MIN-OMP.cpp | 21 ++++++++++++--------- src/lcals/FIRST_MIN-Seq.cpp | 23 +++++++++++++---------- src/stream/DOT-OMP.cpp | 9 ++++++--- src/stream/DOT-Seq.cpp | 10 +++++++--- tpl/RAJA | 2 +- 15 files changed, 120 insertions(+), 66 deletions(-) diff --git a/src/algorithm/REDUCE_SUM-OMP.cpp b/src/algorithm/REDUCE_SUM-OMP.cpp index 1295887f5..6c9cd738e 100644 --- a/src/algorithm/REDUCE_SUM-OMP.cpp +++ b/src/algorithm/REDUCE_SUM-OMP.cpp @@ -76,6 +76,8 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -83,7 +85,7 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum sum(m_sum_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE_SUM_BODY; @@ -101,10 +103,11 @@ void REDUCE_SUM::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type tsum = m_sum_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/algorithm/REDUCE_SUM-Seq.cpp b/src/algorithm/REDUCE_SUM-Seq.cpp index 8d4fdacb2..8b2006c13 100644 --- a/src/algorithm/REDUCE_SUM-Seq.cpp +++ b/src/algorithm/REDUCE_SUM-Seq.cpp @@ -76,6 +76,8 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -83,8 +85,8 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum sum(m_sum_init); - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE_SUM_BODY; }); @@ -100,9 +102,11 @@ void REDUCE_SUM::runSeqVariant(VariantID vid, size_t tune_idx) Real_type tsum = m_sum_init; - RAJA::forall( RAJA::RangeSegment(ibegin, iend), + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/basic/PI_REDUCE-OMP.cpp b/src/basic/PI_REDUCE-OMP.cpp index 5c83aba6f..b31160f86 100644 --- a/src/basic/PI_REDUCE-OMP.cpp +++ b/src/basic/PI_REDUCE-OMP.cpp @@ -77,6 +77,8 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -84,9 +86,8 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum pi(m_pi_init); - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { PI_REDUCE_BODY; }); @@ -102,10 +103,11 @@ void PI_REDUCE::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type tpi = m_pi_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/PI_REDUCE-Seq.cpp b/src/basic/PI_REDUCE-Seq.cpp index 4a5b28815..b139d5fc2 100644 --- a/src/basic/PI_REDUCE-Seq.cpp +++ b/src/basic/PI_REDUCE-Seq.cpp @@ -77,6 +77,8 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -84,7 +86,7 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum pi(m_pi_init); - RAJA::forall( RAJA::RangeSegment(ibegin, iend), + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { PI_REDUCE_BODY; }); @@ -101,9 +103,11 @@ void PI_REDUCE::runSeqVariant(VariantID vid, size_t tune_idx) Real_type tpi = m_pi_init; - RAJA::forall( RAJA::RangeSegment(ibegin, iend), + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/REDUCE3_INT-OMP.cpp b/src/basic/REDUCE3_INT-OMP.cpp index c9848ac98..fedbe96a3 100644 --- a/src/basic/REDUCE3_INT-OMP.cpp +++ b/src/basic/REDUCE3_INT-OMP.cpp @@ -91,6 +91,8 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -100,7 +102,7 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMin vmin(m_vmin_init); RAJA::ReduceMax vmax(m_vmax_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE3_INT_BODY_RAJA; }); @@ -121,13 +123,16 @@ void REDUCE3_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) Int_type tvmin = m_vmin_init; Int_type tvmax = m_vmax_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE3_INT-Seq.cpp b/src/basic/REDUCE3_INT-Seq.cpp index 32bcfbef6..68949e57f 100644 --- a/src/basic/REDUCE3_INT-Seq.cpp +++ b/src/basic/REDUCE3_INT-Seq.cpp @@ -87,6 +87,8 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -96,7 +98,7 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMin vmin(m_vmin_init); RAJA::ReduceMax vmax(m_vmax_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE3_INT_BODY_RAJA; }); @@ -117,13 +119,16 @@ void REDUCE3_INT::runSeqVariant(VariantID vid, size_t tune_idx) Int_type tvmin = m_vmin_init; Int_type tvmax = m_vmax_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-OMP.cpp b/src/basic/REDUCE_STRUCT-OMP.cpp index 8c44d02c0..5c3b78b63 100644 --- a/src/basic/REDUCE_STRUCT-OMP.cpp +++ b/src/basic/REDUCE_STRUCT-OMP.cpp @@ -110,6 +110,8 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -122,7 +124,7 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMax xmax(m_init_max); RAJA::ReduceMax ymax(m_init_max); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE_STRUCT_BODY_RAJA; }); @@ -158,10 +160,14 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=](Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Seq.cpp b/src/basic/REDUCE_STRUCT-Seq.cpp index 1e2a68d43..06da6af92 100644 --- a/src/basic/REDUCE_STRUCT-Seq.cpp +++ b/src/basic/REDUCE_STRUCT-Seq.cpp @@ -100,6 +100,8 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -112,7 +114,7 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMax xmax(m_init_max); RAJA::ReduceMax ymax(m_init_max); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { REDUCE_STRUCT_BODY_RAJA; }); @@ -140,7 +142,7 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t tune_idx) Real_type txmax = m_init_max; Real_type tymax = m_init_max; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&txsum), RAJA::expt::Reduce(&tysum), @@ -148,10 +150,14 @@ void REDUCE_STRUCT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=](Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/TRAP_INT-OMP.cpp b/src/basic/TRAP_INT-OMP.cpp index f1961483a..5decf749f 100644 --- a/src/basic/TRAP_INT-OMP.cpp +++ b/src/basic/TRAP_INT-OMP.cpp @@ -79,6 +79,8 @@ void TRAP_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -86,7 +88,7 @@ void TRAP_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum sumx(m_sumx_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { TRAP_INT_BODY; }); @@ -103,10 +105,11 @@ void TRAP_INT::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type tsumx = m_sumx_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/basic/TRAP_INT-Seq.cpp b/src/basic/TRAP_INT-Seq.cpp index fa74efdcf..c998ebfa7 100644 --- a/src/basic/TRAP_INT-Seq.cpp +++ b/src/basic/TRAP_INT-Seq.cpp @@ -79,6 +79,8 @@ void TRAP_INT::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -86,7 +88,7 @@ void TRAP_INT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum sumx(m_sumx_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { TRAP_INT_BODY; }); @@ -103,10 +105,11 @@ void TRAP_INT::runSeqVariant(VariantID vid, size_t tune_idx) Real_type tsumx = m_sumx_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/lcals/FIRST_MIN-OMP.cpp b/src/lcals/FIRST_MIN-OMP.cpp index a9a7f1ba1..0a90546ca 100644 --- a/src/lcals/FIRST_MIN-OMP.cpp +++ b/src/lcals/FIRST_MIN-OMP.cpp @@ -87,6 +87,8 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -95,7 +97,7 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceMinLoc loc( m_xmin_init, m_initloc); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { FIRST_MIN_BODY_RAJA; }); @@ -107,22 +109,23 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) } else if (tune_idx == 1) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=](Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=](Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-Seq.cpp b/src/lcals/FIRST_MIN-Seq.cpp index a32ed4962..89bd3c4a0 100644 --- a/src/lcals/FIRST_MIN-Seq.cpp +++ b/src/lcals/FIRST_MIN-Seq.cpp @@ -79,15 +79,17 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + m_xmin_init, m_initloc); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { FIRST_MIN_BODY_RAJA; }); @@ -99,22 +101,23 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t tune_idx) } else if (tune_idx == 1) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=](Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=](Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/stream/DOT-OMP.cpp b/src/stream/DOT-OMP.cpp index d7112336a..28ccdfc30 100644 --- a/src/stream/DOT-OMP.cpp +++ b/src/stream/DOT-OMP.cpp @@ -76,6 +76,8 @@ void DOT::runOpenMPVariant(VariantID vid, size_t tune_idx) case RAJA_OpenMP : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -83,7 +85,7 @@ void DOT::runOpenMPVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum dot(m_dot_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { DOT_BODY; }); @@ -100,10 +102,11 @@ void DOT::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type tdot = m_dot_init; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/src/stream/DOT-Seq.cpp b/src/stream/DOT-Seq.cpp index 4d359775f..8c57ac02c 100644 --- a/src/stream/DOT-Seq.cpp +++ b/src/stream/DOT-Seq.cpp @@ -76,6 +76,8 @@ void DOT::runSeqVariant(VariantID vid, size_t tune_idx) case RAJA_Seq : { + RAJA::resources::Host res; + if (tune_idx == 0) { startTimer(); @@ -83,7 +85,7 @@ void DOT::runSeqVariant(VariantID vid, size_t tune_idx) RAJA::ReduceSum dot(m_dot_init); - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { DOT_BODY; }); @@ -100,9 +102,11 @@ void DOT::runSeqVariant(VariantID vid, size_t tune_idx) Real_type tdot = m_dot_init; - RAJA::forall( RAJA::RangeSegment(ibegin, iend), + RAJA::forall(res, + RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); diff --git a/tpl/RAJA b/tpl/RAJA index 378199aac..a7aa1b4c3 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 378199aac342ee21c2ddfbcbb48413bd1dfac612 +Subproject commit a7aa1b4c387596c55ad6971b00f5de69a6d77b00 From 7505865282f07914cc22339d9f64d39c027d31ed Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Mon, 14 Oct 2024 10:48:55 +0200 Subject: [PATCH 22/49] Update RAJA to merge of similar PR --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 6f9d25142..2e68fbb77 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 6f9d2514215156d80443be4ce8dbabc7fd2237bf +Subproject commit 2e68fbb77cdca67c03761bf2db5c8c7172fc01bf From 7696a9f7a4dad8ef3dcd9add320b5f8f55438133 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 15 Oct 2024 13:39:03 -0700 Subject: [PATCH 23/49] Convert CUDA variants to new reduction interface --- src/algorithm/REDUCE_SUM-Cuda.cpp | 3 ++- src/basic/PI_REDUCE-Cuda.cpp | 3 ++- src/basic/REDUCE3_INT-Cuda.cpp | 6 ++++-- src/basic/REDUCE_STRUCT-Cuda.cpp | 14 +++++++++----- src/basic/REDUCE_STRUCT-OMP.cpp | 2 +- src/basic/TRAP_INT-Cuda.cpp | 3 ++- src/lcals/FIRST_MIN-Cuda.cpp | 15 ++++++++------- src/stream/DOT-Cuda.cpp | 3 ++- 8 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/algorithm/REDUCE_SUM-Cuda.cpp b/src/algorithm/REDUCE_SUM-Cuda.cpp index 302ab35d6..836089ab6 100644 --- a/src/algorithm/REDUCE_SUM-Cuda.cpp +++ b/src/algorithm/REDUCE_SUM-Cuda.cpp @@ -239,7 +239,8 @@ void REDUCE_SUM::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] __device__ (Index_type i, Real_type& sum) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/basic/PI_REDUCE-Cuda.cpp b/src/basic/PI_REDUCE-Cuda.cpp index 8529897c3..449c0b634 100644 --- a/src/basic/PI_REDUCE-Cuda.cpp +++ b/src/basic/PI_REDUCE-Cuda.cpp @@ -168,7 +168,8 @@ void PI_REDUCE::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall< exec_policy >( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] __device__ (Index_type i, Real_type& pi) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/REDUCE3_INT-Cuda.cpp b/src/basic/REDUCE3_INT-Cuda.cpp index a8d68b31c..cf7bb9716 100644 --- a/src/basic/REDUCE3_INT-Cuda.cpp +++ b/src/basic/REDUCE3_INT-Cuda.cpp @@ -194,8 +194,10 @@ void REDUCE3_INT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), [=] __device__ (Index_type i, - Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Cuda.cpp b/src/basic/REDUCE_STRUCT-Cuda.cpp index 898b453f0..adc3b4d8d 100644 --- a/src/basic/REDUCE_STRUCT-Cuda.cpp +++ b/src/basic/REDUCE_STRUCT-Cuda.cpp @@ -233,7 +233,7 @@ void REDUCE_STRUCT::runCudaVariantRAJANewReduce(VariantID vid) Real_type txmax = m_init_max; Real_type tymax = m_init_max; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&txsum), RAJA::expt::Reduce(&tysum), @@ -241,10 +241,14 @@ void REDUCE_STRUCT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=] __device__ (Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=] __device__ (Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-OMP.cpp b/src/basic/REDUCE_STRUCT-OMP.cpp index 5c3b78b63..c7ef77de8 100644 --- a/src/basic/REDUCE_STRUCT-OMP.cpp +++ b/src/basic/REDUCE_STRUCT-OMP.cpp @@ -152,7 +152,7 @@ void REDUCE_STRUCT::runOpenMPVariant(VariantID vid, size_t tune_idx) Real_type txmax = m_init_max; Real_type tymax = m_init_max; - RAJA::forall( + RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&txsum), RAJA::expt::Reduce(&tysum), diff --git a/src/basic/TRAP_INT-Cuda.cpp b/src/basic/TRAP_INT-Cuda.cpp index e58e86923..717fef6f5 100644 --- a/src/basic/TRAP_INT-Cuda.cpp +++ b/src/basic/TRAP_INT-Cuda.cpp @@ -175,7 +175,8 @@ void TRAP_INT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] __device__ (Index_type i, Real_type& sumx) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/lcals/FIRST_MIN-Cuda.cpp b/src/lcals/FIRST_MIN-Cuda.cpp index 08f2ab240..11d11b46a 100644 --- a/src/lcals/FIRST_MIN-Cuda.cpp +++ b/src/lcals/FIRST_MIN-Cuda.cpp @@ -168,22 +168,23 @@ void FIRST_MIN::runCudaVariantRAJANewReduce(VariantID vid) if ( vid == RAJA_CUDA ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=] __device__ (Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=] __device__ (Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/stream/DOT-Cuda.cpp b/src/stream/DOT-Cuda.cpp index 031355a3e..c8910ee8c 100644 --- a/src/stream/DOT-Cuda.cpp +++ b/src/stream/DOT-Cuda.cpp @@ -164,7 +164,8 @@ void DOT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] __device__ (Index_type i, Real_type& dot) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); From bd434a547550e31ce154fa0b53077e8db92c3e1b Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 15 Oct 2024 15:15:07 -0700 Subject: [PATCH 24/49] Convert OpenMP target variants of reduction kernels to new interface. Note: A RAJA fix is needed to have this compile. --- src/algorithm/ATOMIC.hpp | 1 + src/algorithm/REDUCE_SUM-OMPTarget.cpp | 3 ++- src/basic/PI_REDUCE-OMPTarget.cpp | 3 ++- src/basic/REDUCE3_INT-OMPTarget.cpp | 7 +++++-- src/basic/REDUCE_STRUCT-OMPTarget.cpp | 12 ++++++++---- src/basic/TRAP_INT-OMPTarget.cpp | 3 ++- src/lcals/FIRST_MIN-OMPTarget.cpp | 15 ++++++++------- src/stream/DOT-OMPTarget.cpp | 3 ++- 8 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/algorithm/ATOMIC.hpp b/src/algorithm/ATOMIC.hpp index 800d3ad92..68fa4e1ef 100644 --- a/src/algorithm/ATOMIC.hpp +++ b/src/algorithm/ATOMIC.hpp @@ -74,6 +74,7 @@ class ATOMIC : public KernelBase void setOpenMPTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); + void setOpenMPTargetTuningDefinitions(VariantID vid); template < size_t replication > void runSeqVariantReplicate(VariantID vid); diff --git a/src/algorithm/REDUCE_SUM-OMPTarget.cpp b/src/algorithm/REDUCE_SUM-OMPTarget.cpp index bac174094..3b7482156 100644 --- a/src/algorithm/REDUCE_SUM-OMPTarget.cpp +++ b/src/algorithm/REDUCE_SUM-OMPTarget.cpp @@ -64,7 +64,8 @@ void REDUCE_SUM::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_AR RAJA::forall>( RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/basic/PI_REDUCE-OMPTarget.cpp b/src/basic/PI_REDUCE-OMPTarget.cpp index c74f3d551..efdef965c 100644 --- a/src/basic/PI_REDUCE-OMPTarget.cpp +++ b/src/basic/PI_REDUCE-OMPTarget.cpp @@ -64,7 +64,8 @@ void PI_REDUCE::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG RAJA::forall>( RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/REDUCE3_INT-OMPTarget.cpp b/src/basic/REDUCE3_INT-OMPTarget.cpp index 5cd18f176..4a1c08349 100644 --- a/src/basic/REDUCE3_INT-OMPTarget.cpp +++ b/src/basic/REDUCE3_INT-OMPTarget.cpp @@ -74,8 +74,11 @@ void REDUCE3_INT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_A RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=](Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-OMPTarget.cpp b/src/basic/REDUCE_STRUCT-OMPTarget.cpp index f8775bf71..543e314d8 100644 --- a/src/basic/REDUCE_STRUCT-OMPTarget.cpp +++ b/src/basic/REDUCE_STRUCT-OMPTarget.cpp @@ -101,10 +101,14 @@ void REDUCE_STRUCT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=](Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=](Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/TRAP_INT-OMPTarget.cpp b/src/basic/TRAP_INT-OMPTarget.cpp index 9fde43876..3a5d76306 100644 --- a/src/basic/TRAP_INT-OMPTarget.cpp +++ b/src/basic/TRAP_INT-OMPTarget.cpp @@ -71,7 +71,8 @@ void TRAP_INT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( RAJA::forall>( RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/lcals/FIRST_MIN-OMPTarget.cpp b/src/lcals/FIRST_MIN-OMPTarget.cpp index 14991e1b7..906c73127 100644 --- a/src/lcals/FIRST_MIN-OMPTarget.cpp +++ b/src/lcals/FIRST_MIN-OMPTarget.cpp @@ -60,22 +60,23 @@ void FIRST_MIN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG } else if ( vid == RAJA_OpenMPTarget ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall>( RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=](Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=](Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/stream/DOT-OMPTarget.cpp b/src/stream/DOT-OMPTarget.cpp index fd7d02a70..10a7bfea6 100644 --- a/src/stream/DOT-OMPTarget.cpp +++ b/src/stream/DOT-OMPTarget.cpp @@ -68,7 +68,8 @@ void DOT::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ RAJA::forall>( RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); From 9dfca7d7ef1e8d4f88f2c1dd92a8bf5af09f84a7 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 15 Oct 2024 15:48:00 -0700 Subject: [PATCH 25/49] Convert Sycl kernels with reductions to new val-loc interface --- src/algorithm/REDUCE_SUM-Sycl.cpp | 4 +++- src/basic/PI_REDUCE-Sycl.cpp | 3 ++- src/basic/REDUCE3_INT-Sycl.cpp | 7 +++++-- src/basic/TRAP_INT-Sycl.cpp | 3 ++- src/lcals/FIRST_MIN-Sycl.cpp | 15 ++++++++------- src/stream/DOT-Sycl.cpp | 4 +++- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/algorithm/REDUCE_SUM-Sycl.cpp b/src/algorithm/REDUCE_SUM-Sycl.cpp index 516048863..810a71bf2 100644 --- a/src/algorithm/REDUCE_SUM-Sycl.cpp +++ b/src/algorithm/REDUCE_SUM-Sycl.cpp @@ -76,11 +76,13 @@ void REDUCE_SUM::runSyclVariantImpl(VariantID vid) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { Real_type tsum = m_sum_init; + RAJA::forall< RAJA::sycl_exec >( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] (Index_type i, Real_type& sum) { + [=] (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/basic/PI_REDUCE-Sycl.cpp b/src/basic/PI_REDUCE-Sycl.cpp index c95e29583..3f09ffdf7 100644 --- a/src/basic/PI_REDUCE-Sycl.cpp +++ b/src/basic/PI_REDUCE-Sycl.cpp @@ -87,7 +87,8 @@ void PI_REDUCE::runSyclVariantImpl(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] (Index_type i, Real_type& pi) { + [=] (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/REDUCE3_INT-Sycl.cpp b/src/basic/REDUCE3_INT-Sycl.cpp index 58ac6f082..dbf81acaa 100644 --- a/src/basic/REDUCE3_INT-Sycl.cpp +++ b/src/basic/REDUCE3_INT-Sycl.cpp @@ -110,8 +110,11 @@ void REDUCE3_INT::runSyclVariantImpl(VariantID vid) RAJA::expt::Reduce(&tvsum), RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), - [=] (Index_type i, Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + [=] (Index_type i, + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/TRAP_INT-Sycl.cpp b/src/basic/TRAP_INT-Sycl.cpp index a9795c77e..b1ce89d9b 100644 --- a/src/basic/TRAP_INT-Sycl.cpp +++ b/src/basic/TRAP_INT-Sycl.cpp @@ -85,7 +85,8 @@ void TRAP_INT::runSyclVariantImpl(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] (Index_type i, Real_type& sumx) { + [=] (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/lcals/FIRST_MIN-Sycl.cpp b/src/lcals/FIRST_MIN-Sycl.cpp index 616c84dcb..6cd00ea38 100644 --- a/src/lcals/FIRST_MIN-Sycl.cpp +++ b/src/lcals/FIRST_MIN-Sycl.cpp @@ -84,23 +84,24 @@ void FIRST_MIN::runSyclVariantImpl(VariantID vid) } else if ( vid == RAJA_SYCL ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall< RAJA::sycl_exec >( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=] (Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=] (Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/stream/DOT-Sycl.cpp b/src/stream/DOT-Sycl.cpp index 250f0b680..4f3fb40f5 100644 --- a/src/stream/DOT-Sycl.cpp +++ b/src/stream/DOT-Sycl.cpp @@ -76,11 +76,13 @@ void DOT::runSyclVariantImpl(VariantID vid) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { Real_type tdot = m_dot_init; + RAJA::forall< RAJA::sycl_exec >( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] (Index_type i, Real_type& dot) { + [=] (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); From e43bd0aa8a5b047b67c1778a459b8e357b204712 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 16 Oct 2024 09:14:15 -0700 Subject: [PATCH 26/49] Pull in latest changes in RAJA develop --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index a7aa1b4c3..8b3c04e3d 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit a7aa1b4c387596c55ad6971b00f5de69a6d77b00 +Subproject commit 8b3c04e3da0cf508d30c98dc03cb4751893195db From c045ae8de054e9e1cc54bc2e10d56509d4b7c445 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 16 Oct 2024 11:52:43 -0700 Subject: [PATCH 27/49] Update HIP kernels to val-op interface --- src/algorithm/REDUCE_SUM-Hip.cpp | 3 ++- src/basic/PI_REDUCE-Hip.cpp | 3 ++- src/basic/REDUCE3_INT-Hip.cpp | 6 ++++-- src/basic/REDUCE_STRUCT-Cuda.cpp | 10 +++++----- src/basic/REDUCE_STRUCT-Hip.cpp | 12 ++++++++---- src/basic/TRAP_INT-Hip.cpp | 3 ++- src/lcals/FIRST_MIN-Hip.cpp | 15 ++++++++------- src/stream/DOT-Hip.cpp | 3 ++- 8 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/algorithm/REDUCE_SUM-Hip.cpp b/src/algorithm/REDUCE_SUM-Hip.cpp index 831978015..f7c689593 100644 --- a/src/algorithm/REDUCE_SUM-Hip.cpp +++ b/src/algorithm/REDUCE_SUM-Hip.cpp @@ -266,7 +266,8 @@ void REDUCE_SUM::runHipVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsum), - [=] __device__ (Index_type i, Real_type& sum) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sum) { REDUCE_SUM_BODY; } ); diff --git a/src/basic/PI_REDUCE-Hip.cpp b/src/basic/PI_REDUCE-Hip.cpp index ed2dfd8dd..2db8c8c98 100644 --- a/src/basic/PI_REDUCE-Hip.cpp +++ b/src/basic/PI_REDUCE-Hip.cpp @@ -168,7 +168,8 @@ void PI_REDUCE::runHipVariantRAJANewReduce(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tpi), - [=] __device__ (Index_type i, Real_type& pi) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& pi) { PI_REDUCE_BODY; } ); diff --git a/src/basic/REDUCE3_INT-Hip.cpp b/src/basic/REDUCE3_INT-Hip.cpp index 12d172de7..f28aecc5b 100644 --- a/src/basic/REDUCE3_INT-Hip.cpp +++ b/src/basic/REDUCE3_INT-Hip.cpp @@ -194,8 +194,10 @@ void REDUCE3_INT::runHipVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tvmin), RAJA::expt::Reduce(&tvmax), [=] __device__ (Index_type i, - Int_type& vsum, Int_type& vmin, Int_type& vmax) { - REDUCE3_INT_BODY; + RAJA::expt::ValOp& vsum, + RAJA::expt::ValOp& vmin, + RAJA::expt::ValOp& vmax) { + REDUCE3_INT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Cuda.cpp b/src/basic/REDUCE_STRUCT-Cuda.cpp index adc3b4d8d..2ac752316 100644 --- a/src/basic/REDUCE_STRUCT-Cuda.cpp +++ b/src/basic/REDUCE_STRUCT-Cuda.cpp @@ -243,11 +243,11 @@ void REDUCE_STRUCT::runCudaVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tymax), [=] __device__ (Index_type i, RAJA::expt::ValOp& xsum, - RAJA::expt::ValOp& ysum, - RAJA::expt::ValOp& xmin, - RAJA::expt::ValOp& ymin, - RAJA::expt::ValOp& xmax, - RAJA::expt::ValOp& ymax ) { + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/REDUCE_STRUCT-Hip.cpp b/src/basic/REDUCE_STRUCT-Hip.cpp index 17fe5ad83..cac5a2989 100644 --- a/src/basic/REDUCE_STRUCT-Hip.cpp +++ b/src/basic/REDUCE_STRUCT-Hip.cpp @@ -241,10 +241,14 @@ void REDUCE_STRUCT::runHipVariantRAJANewReduce(VariantID vid) RAJA::expt::Reduce(&tymin), RAJA::expt::Reduce(&txmax), RAJA::expt::Reduce(&tymax), - [=] __device__ (Index_type i, Real_type& xsum, Real_type& ysum, - Real_type& xmin, Real_type& ymin, - Real_type& xmax, Real_type& ymax) { - REDUCE_STRUCT_BODY; + [=] __device__ (Index_type i, + RAJA::expt::ValOp& xsum, + RAJA::expt::ValOp& ysum, + RAJA::expt::ValOp& xmin, + RAJA::expt::ValOp& ymin, + RAJA::expt::ValOp& xmax, + RAJA::expt::ValOp& ymax ) { + REDUCE_STRUCT_BODY_RAJA; } ); diff --git a/src/basic/TRAP_INT-Hip.cpp b/src/basic/TRAP_INT-Hip.cpp index e60b3ccff..de3140258 100644 --- a/src/basic/TRAP_INT-Hip.cpp +++ b/src/basic/TRAP_INT-Hip.cpp @@ -176,7 +176,8 @@ void TRAP_INT::runHipVariantRAJANewReduce(VariantID vid) res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tsumx), - [=] __device__ (Index_type i, Real_type& sumx) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& sumx) { TRAP_INT_BODY; } ); diff --git a/src/lcals/FIRST_MIN-Hip.cpp b/src/lcals/FIRST_MIN-Hip.cpp index 3c6fd7b35..b602b4fca 100644 --- a/src/lcals/FIRST_MIN-Hip.cpp +++ b/src/lcals/FIRST_MIN-Hip.cpp @@ -168,22 +168,23 @@ void FIRST_MIN::runHipVariantRAJANewReduce(VariantID vid) if ( vid == RAJA_HIP ) { - using VL_TYPE = RAJA::expt::ValLoc; - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - VL_TYPE tloc(m_xmin_init, m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tloc), - [=] __device__ (Index_type i, VL_TYPE& loc) { - loc.min(x[i], i); + RAJA::expt::Reduce(&tminloc), + [=] __device__ (Index_type i, + RAJA::expt::ValLocOp& minloc) { + minloc.minloc(x[i], i); } ); - m_minloc = static_cast(tloc.getLoc()); + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/stream/DOT-Hip.cpp b/src/stream/DOT-Hip.cpp index 0c3c914a9..24984f300 100644 --- a/src/stream/DOT-Hip.cpp +++ b/src/stream/DOT-Hip.cpp @@ -164,7 +164,8 @@ void DOT::runHipVariantRAJANewReduce(VariantID vid) RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), RAJA::expt::Reduce(&tdot), - [=] __device__ (Index_type i, Real_type& dot) { + [=] __device__ (Index_type i, + RAJA::expt::ValOp& dot) { DOT_BODY; } ); From 353e552c5328aa2e8e3ce66ca9e99e77b56fb23c Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 16 Oct 2024 13:03:03 -0700 Subject: [PATCH 28/49] Update CI to essentially match RAJA --- .github/workflows/build.yml | 2 +- .gitlab/custom-jobs-and-variables.yml | 14 ++++++------ .gitlab/jobs/corona.yml | 8 ++++--- .gitlab/jobs/lassen.yml | 28 ++++------------------- .gitlab/jobs/poodle.yml | 13 ++++------- .gitlab/jobs/ruby.yml | 14 ++++-------- .gitlab/jobs/tioga.yml | 19 +++++++++++----- .gitlab/subscribed-pipelines.yml | 2 -- .uberenv_config.json | 2 +- Dockerfile | 32 +++++---------------------- 10 files changed, 46 insertions(+), 88 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 48bb3df41..61703c49d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,7 +4,7 @@ jobs: build_docker: strategy: matrix: - target: [gcc12, gcc13, clang13, clang15, rocm5.6, rocm5.6_desul, intel2024, intel2024_debug, intel2024_sycl] + target: [gcc12, gcc13, clang13, clang15, rocm6, rocm6_desul, intel2024, intel2024_debug, intel2024_sycl] runs-on: ubuntu-latest steps: - run: | diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index 063cabb90..99d2ba503 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -22,7 +22,7 @@ variables: # Project specific variants for ruby PROJECT_RUBY_VARIANTS: "~shared +openmp" # Project specific deps for ruby - PROJECT_RUBY_DEPS: "^blt@develop " + PROJECT_RUBY_DEPS: # Poodle # Arguments for top level allocation @@ -32,27 +32,27 @@ variables: # Project specific variants for poodle PROJECT_POODLE_VARIANTS: "~shared +openmp" # Project specific deps for poodle - PROJECT_POODLE_DEPS: "^blt@develop " + PROJECT_POODLE_DEPS: # Corona # Arguments for top level allocation - CORONA_SHARED_ALLOC: "--exclusive --time-limit=12m --nodes=1 -o per-resource.count=2" + CORONA_SHARED_ALLOC: "--exclusive --time-limit=20m --nodes=1 -o per-resource.count=2" # Arguments for job level allocation CORONA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" # Project specific variants for corona PROJECT_CORONA_VARIANTS: "~shared ~openmp" # Project specific deps for corona - PROJECT_CORONA_DEPS: "^blt@develop " + PROJECT_CORONA_DEPS: # Tioga # Arguments for top level allocation - TIOGA_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=26m --nodes=1 -o per-resource.count=2" + TIOGA_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=30m --nodes=1 -o per-resource.count=2" # Arguments for job level allocation TIOGA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" # Project specific variants for tioga PROJECT_TIOGA_VARIANTS: "~shared +openmp" # Project specific deps for tioga - PROJECT_TIOGA_DEPS: "^blt@develop " + PROJECT_TIOGA_DEPS: # Lassen and Butte use a different job scheduler (spectrum lsf) that does not # allow pre-allocation the same way slurm does. @@ -61,7 +61,7 @@ variables: # Project specific variants for lassen PROJECT_LASSEN_VARIANTS: "~shared +openmp cuda_arch=70" # Project specific deps for lassen - PROJECT_LASSEN_DEPS: "^blt@develop " + PROJECT_LASSEN_DEPS: # Configuration shared by build and test jobs specific to this project. # Not all configuration can be shared. Here projects can fine tune the diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 9af5ba72b..dd33c062f 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -27,6 +27,8 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -# With GitLab CI, included files cannot be empty. -variables: - INCLUDED_FILE_CANNOT_BE_EMPTY: "True" +clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: + variables: + SPEC: " ~shared +sycl ~openmp +tests %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\"" + MODULE_LIST: "rocm/5.7.1" + extends: .job_on_corona diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index 1b9bc0eda..699be159b 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -18,14 +18,7 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. -# Overriding shared spec: Longer allocation + extra flags -xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: - variables: - SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" - MODULE_LIST: "cuda/11.2.0" - LASSEN_JOB_ALLOC: "1 -W 60 -q pci" - extends: .job_on_lassen - +# No overriden jobs so far. ############ # Extra jobs @@ -39,18 +32,11 @@ gcc_8_3_1: SPEC: " ~shared +openmp %gcc@=8.3.1 ${PROJECT_LASSEN_DEPS}" extends: .job_on_lassen -gcc_8_3_1_cuda_11_5_0_ats_disabled: - extends: .job_on_lassen - variables: - SPEC: " ~shared +openmp +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.5.0+allow-unsupported-compilers ^blt@develop" - MODULE_LIST: "cuda/11.5.0" - LASSEN_JOB_ALLOC: "1 --atsdisable -W 30 -q pci" - -gcc_8_3_1_cuda_11_5_0_ats_disabled_mpi: +gcc_8_3_1_cuda_11_7_0_ats_disabled: extends: .job_on_lassen variables: - SPEC: " ~shared +openmp +cuda +mpi %gcc@=8.3.1 cuda_arch=70 ^cuda@11.5.0+allow-unsupported-compilers ^spectrum-mpi ^blt@develop" - MODULE_LIST: "cuda/11.5.0" + SPEC: " ~shared +openmp +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.7.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" + MODULE_LIST: "cuda/11.7.0" LASSEN_JOB_ALLOC: "1 --atsdisable -W 30 -q pci" ########## @@ -68,9 +54,3 @@ clang_13_0_1_libcpp: # ASAN_OPTIONS: "detect_leaks=1" # LSAN_OPTIONS: "suppressions=${CI_PROJECT_DIR}/tpl/RAJA/suppressions.asan" # extends: .job_on_lassen - -# Activated in RAJA, but we don't use desul atomics here -#gcc_8_3_1_cuda_10_1_168_desul_atomics: -# variables: -# SPEC: "+openmp +cuda +desul %gcc@=8.3.1 cuda_arch=70 cuda_arch=70 ^cuda@10.1.243+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" -# extends: .job_on_lassen diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 8e86158f0..7a4dd556b 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -28,14 +28,9 @@ gcc_10_3_1: SPEC: "${PROJECT_POODLE_VARIANTS} +omptask %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle -intel_19_1_2_gcc_10_3_1: +intel_2023_2_1: variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_POODLE_DEPS}" - extends: .job_on_poodle - -intel_2022_1_0: - variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2022.1.0 ${PROJECT_POODLE_DEPS}" + SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" allow_failure: true extends: .job_on_poodle @@ -46,8 +41,8 @@ intel_2022_1_0: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -intel_2022_1_0_mpi: +intel_2023_2_1_mpi: variables: - SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" + SPEC: "+openmp +mpi %intel@=2023.2.0 ^mvapich2" allow_failure: true extends: .job_on_poodle diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index c19e36d12..666f60a70 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -29,15 +29,9 @@ gcc_10_3_1: RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" extends: .job_on_ruby -intel_19_1_2_gcc_10_3_1: +intel_2023_2_1: variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_RUBY_DEPS}" - RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=40 --nodes=1" - extends: .job_on_ruby - -intel_2022_1_0: - variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2022.1.0 ${PROJECT_RUBY_DEPS}" + SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby ############ @@ -47,7 +41,7 @@ intel_2022_1_0: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -intel_2022_1_0_mpi: +intel_2023_2_1_mpi: variables: - SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" + SPEC: "~shared +openmp +mpi %intel@=2023.2.1 ^mvapich2" extends: .job_on_ruby diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index 00ed3c276..234c83718 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -18,7 +18,11 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. -# No overridden jobs so far. +cce_18_0_0: + variables: + SPEC: "${PROJECT_TIOGA_VARIANTS} %cce@=18.0.0 ${PROJECT_TIOGA_DEPS}" + extends: .job_on_tioga +# allow_failure: true ############ # Extra jobs @@ -27,13 +31,18 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -rocmcc_6_1_1_hip_openmp: +cce_17_0_1: + variables: + SPEC: "${PROJECT_TIOGA_VARIANTS} %cce@=17.0.1 ${PROJECT_TIOGA_DEPS}" + extends: .job_on_tioga + +rocmcc_6_2_0_hip_openmp: variables: - SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" + SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0" extends: .job_on_tioga -rocmcc_6_1_1_hip_openmp_mpi: +rocmcc_6_2_0_hip_openmp_mpi: variables: - SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" + SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0" extends: .job_on_tioga allow_failure: true diff --git a/.gitlab/subscribed-pipelines.yml b/.gitlab/subscribed-pipelines.yml index 7e60a05e9..2f0a610df 100644 --- a/.gitlab/subscribed-pipelines.yml +++ b/.gitlab/subscribed-pipelines.yml @@ -113,5 +113,3 @@ lassen-build-and-test: CI_MACHINE: "lassen" needs: [lassen-up-check, generate-job-lists] extends: [.build-and-test] - - diff --git a/.uberenv_config.json b/.uberenv_config.json index fda595d3a..abc8c9e29 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -4,7 +4,7 @@ "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_branch": "develop-2024-05-26", +"spack_branch": "develop-2024-10-06", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", "spack_packages_path": "tpl/RAJA/scripts/radiuss-spack-configs/packages", diff --git a/Dockerfile b/Dockerfile index 9623e78c2..8c49c2cbf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -114,41 +114,21 @@ RUN /bin/bash -c "source /opt/intel/oneapi/setvars.sh 2>&1 > /dev/null && \ ## Need to find a viable cuda image to test... ## -# TODO: We should switch to ROCm 6 -- where to get an image?? -FROM ghcr.io/llnl/radiuss:ubuntu-20.04-hip-5.6.1 AS rocm5.6 +FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6 ENV GTEST_COLOR=1 ENV HCC_AMDGPU_TARGET=gfx900 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-5.6.1/bin/amdclang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ - make -j 6 +RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DROCM_PATH=/opt/rocm-6.0.2 -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ + make -j 16 -# TODO: We should switch to ROCm 6 -- where to get an image?? -FROM ghcr.io/llnl/radiuss:ubuntu-20.04-hip-5.6.1 AS rocm5.6_desul +FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6_desul ENV GTEST_COLOR=1 ENV HCC_AMDGPU_TARGET=gfx900 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-5.6.1/bin/amdclang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ - make -j 6 - -## ROCm 6 image is broken -FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6.0 -ENV GTEST_COLOR=1 -ENV HCC_AMDGPU_TARGET=gfx900 -COPY . /home/raja/workspace -WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ - make -j 6 - -## ROCm 6 image is broken -FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6.0_desul -ENV GTEST_COLOR=1 -ENV HCC_AMDGPU_TARGET=gfx900 -COPY . /home/raja/workspace -WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ - make -j 6 +RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DROCM_PATH=/opt/rocm-6.0.2 -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ + make -j 16 FROM ghcr.io/llnl/radiuss:intel-2024.0-ubuntu-20.04 AS intel2024_sycl ENV GTEST_COLOR=1 From 161da8f94cd300841bf2f693dba7e85771aed302 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 16 Oct 2024 13:59:41 -0700 Subject: [PATCH 29/49] Revert "Update CI to essentially match RAJA" This reverts commit 353e552c5328aa2e8e3ce66ca9e99e77b56fb23c. --- .github/workflows/build.yml | 2 +- .gitlab/custom-jobs-and-variables.yml | 14 ++++++------ .gitlab/jobs/corona.yml | 8 +++---- .gitlab/jobs/lassen.yml | 28 +++++++++++++++++++---- .gitlab/jobs/poodle.yml | 13 +++++++---- .gitlab/jobs/ruby.yml | 14 ++++++++---- .gitlab/jobs/tioga.yml | 19 +++++----------- .gitlab/subscribed-pipelines.yml | 2 ++ .uberenv_config.json | 2 +- Dockerfile | 32 ++++++++++++++++++++++----- 10 files changed, 88 insertions(+), 46 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 61703c49d..48bb3df41 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,7 +4,7 @@ jobs: build_docker: strategy: matrix: - target: [gcc12, gcc13, clang13, clang15, rocm6, rocm6_desul, intel2024, intel2024_debug, intel2024_sycl] + target: [gcc12, gcc13, clang13, clang15, rocm5.6, rocm5.6_desul, intel2024, intel2024_debug, intel2024_sycl] runs-on: ubuntu-latest steps: - run: | diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index 99d2ba503..063cabb90 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -22,7 +22,7 @@ variables: # Project specific variants for ruby PROJECT_RUBY_VARIANTS: "~shared +openmp" # Project specific deps for ruby - PROJECT_RUBY_DEPS: + PROJECT_RUBY_DEPS: "^blt@develop " # Poodle # Arguments for top level allocation @@ -32,27 +32,27 @@ variables: # Project specific variants for poodle PROJECT_POODLE_VARIANTS: "~shared +openmp" # Project specific deps for poodle - PROJECT_POODLE_DEPS: + PROJECT_POODLE_DEPS: "^blt@develop " # Corona # Arguments for top level allocation - CORONA_SHARED_ALLOC: "--exclusive --time-limit=20m --nodes=1 -o per-resource.count=2" + CORONA_SHARED_ALLOC: "--exclusive --time-limit=12m --nodes=1 -o per-resource.count=2" # Arguments for job level allocation CORONA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" # Project specific variants for corona PROJECT_CORONA_VARIANTS: "~shared ~openmp" # Project specific deps for corona - PROJECT_CORONA_DEPS: + PROJECT_CORONA_DEPS: "^blt@develop " # Tioga # Arguments for top level allocation - TIOGA_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=30m --nodes=1 -o per-resource.count=2" + TIOGA_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=26m --nodes=1 -o per-resource.count=2" # Arguments for job level allocation TIOGA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" # Project specific variants for tioga PROJECT_TIOGA_VARIANTS: "~shared +openmp" # Project specific deps for tioga - PROJECT_TIOGA_DEPS: + PROJECT_TIOGA_DEPS: "^blt@develop " # Lassen and Butte use a different job scheduler (spectrum lsf) that does not # allow pre-allocation the same way slurm does. @@ -61,7 +61,7 @@ variables: # Project specific variants for lassen PROJECT_LASSEN_VARIANTS: "~shared +openmp cuda_arch=70" # Project specific deps for lassen - PROJECT_LASSEN_DEPS: + PROJECT_LASSEN_DEPS: "^blt@develop " # Configuration shared by build and test jobs specific to this project. # Not all configuration can be shared. Here projects can fine tune the diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index dd33c062f..9af5ba72b 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -27,8 +27,6 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: - variables: - SPEC: " ~shared +sycl ~openmp +tests %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\"" - MODULE_LIST: "rocm/5.7.1" - extends: .job_on_corona +# With GitLab CI, included files cannot be empty. +variables: + INCLUDED_FILE_CANNOT_BE_EMPTY: "True" diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml index 699be159b..1b9bc0eda 100644 --- a/.gitlab/jobs/lassen.yml +++ b/.gitlab/jobs/lassen.yml @@ -18,7 +18,14 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. -# No overriden jobs so far. +# Overriding shared spec: Longer allocation + extra flags +xl_2022_08_19_gcc_8_3_1_cuda_11_2_0: + variables: + SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" %xl@=16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" + MODULE_LIST: "cuda/11.2.0" + LASSEN_JOB_ALLOC: "1 -W 60 -q pci" + extends: .job_on_lassen + ############ # Extra jobs @@ -32,11 +39,18 @@ gcc_8_3_1: SPEC: " ~shared +openmp %gcc@=8.3.1 ${PROJECT_LASSEN_DEPS}" extends: .job_on_lassen -gcc_8_3_1_cuda_11_7_0_ats_disabled: +gcc_8_3_1_cuda_11_5_0_ats_disabled: + extends: .job_on_lassen + variables: + SPEC: " ~shared +openmp +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.5.0+allow-unsupported-compilers ^blt@develop" + MODULE_LIST: "cuda/11.5.0" + LASSEN_JOB_ALLOC: "1 --atsdisable -W 30 -q pci" + +gcc_8_3_1_cuda_11_5_0_ats_disabled_mpi: extends: .job_on_lassen variables: - SPEC: " ~shared +openmp +cuda %gcc@=8.3.1 cuda_arch=70 ^cuda@11.7.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" - MODULE_LIST: "cuda/11.7.0" + SPEC: " ~shared +openmp +cuda +mpi %gcc@=8.3.1 cuda_arch=70 ^cuda@11.5.0+allow-unsupported-compilers ^spectrum-mpi ^blt@develop" + MODULE_LIST: "cuda/11.5.0" LASSEN_JOB_ALLOC: "1 --atsdisable -W 30 -q pci" ########## @@ -54,3 +68,9 @@ clang_13_0_1_libcpp: # ASAN_OPTIONS: "detect_leaks=1" # LSAN_OPTIONS: "suppressions=${CI_PROJECT_DIR}/tpl/RAJA/suppressions.asan" # extends: .job_on_lassen + +# Activated in RAJA, but we don't use desul atomics here +#gcc_8_3_1_cuda_10_1_168_desul_atomics: +# variables: +# SPEC: "+openmp +cuda +desul %gcc@=8.3.1 cuda_arch=70 cuda_arch=70 ^cuda@10.1.243+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}" +# extends: .job_on_lassen diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 7a4dd556b..8e86158f0 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -28,9 +28,14 @@ gcc_10_3_1: SPEC: "${PROJECT_POODLE_VARIANTS} +omptask %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle -intel_2023_2_1: +intel_19_1_2_gcc_10_3_1: variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" + SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_POODLE_DEPS}" + extends: .job_on_poodle + +intel_2022_1_0: + variables: + SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2022.1.0 ${PROJECT_POODLE_DEPS}" allow_failure: true extends: .job_on_poodle @@ -41,8 +46,8 @@ intel_2023_2_1: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -intel_2023_2_1_mpi: +intel_2022_1_0_mpi: variables: - SPEC: "+openmp +mpi %intel@=2023.2.0 ^mvapich2" + SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" allow_failure: true extends: .job_on_poodle diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 666f60a70..c19e36d12 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -29,9 +29,15 @@ gcc_10_3_1: RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" extends: .job_on_ruby -intel_2023_2_1: +intel_19_1_2_gcc_10_3_1: variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" + SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=19.1.2.gcc.10.3.1 ${PROJECT_RUBY_DEPS}" + RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=40 --nodes=1" + extends: .job_on_ruby + +intel_2022_1_0: + variables: + SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2022.1.0 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby ############ @@ -41,7 +47,7 @@ intel_2023_2_1: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -intel_2023_2_1_mpi: +intel_2022_1_0_mpi: variables: - SPEC: "~shared +openmp +mpi %intel@=2023.2.1 ^mvapich2" + SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" extends: .job_on_ruby diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index 234c83718..00ed3c276 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -18,11 +18,7 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. -cce_18_0_0: - variables: - SPEC: "${PROJECT_TIOGA_VARIANTS} %cce@=18.0.0 ${PROJECT_TIOGA_DEPS}" - extends: .job_on_tioga -# allow_failure: true +# No overridden jobs so far. ############ # Extra jobs @@ -31,18 +27,13 @@ cce_18_0_0: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -cce_17_0_1: - variables: - SPEC: "${PROJECT_TIOGA_VARIANTS} %cce@=17.0.1 ${PROJECT_TIOGA_DEPS}" - extends: .job_on_tioga - -rocmcc_6_2_0_hip_openmp: +rocmcc_6_1_1_hip_openmp: variables: - SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0" + SPEC: "~shared +rocm +openmp amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" extends: .job_on_tioga -rocmcc_6_2_0_hip_openmp_mpi: +rocmcc_6_1_1_hip_openmp_mpi: variables: - SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0" + SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.1.1 ^hip@6.1.1 ^blt@develop" extends: .job_on_tioga allow_failure: true diff --git a/.gitlab/subscribed-pipelines.yml b/.gitlab/subscribed-pipelines.yml index 2f0a610df..7e60a05e9 100644 --- a/.gitlab/subscribed-pipelines.yml +++ b/.gitlab/subscribed-pipelines.yml @@ -113,3 +113,5 @@ lassen-build-and-test: CI_MACHINE: "lassen" needs: [lassen-up-check, generate-job-lists] extends: [.build-and-test] + + diff --git a/.uberenv_config.json b/.uberenv_config.json index abc8c9e29..fda595d3a 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -4,7 +4,7 @@ "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_branch": "develop-2024-10-06", +"spack_branch": "develop-2024-05-26", "spack_activate" : {}, "spack_configs_path": "tpl/RAJA/scripts/radiuss-spack-configs", "spack_packages_path": "tpl/RAJA/scripts/radiuss-spack-configs/packages", diff --git a/Dockerfile b/Dockerfile index 8c49c2cbf..9623e78c2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -114,21 +114,41 @@ RUN /bin/bash -c "source /opt/intel/oneapi/setvars.sh 2>&1 > /dev/null && \ ## Need to find a viable cuda image to test... ## -FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6 +# TODO: We should switch to ROCm 6 -- where to get an image?? +FROM ghcr.io/llnl/radiuss:ubuntu-20.04-hip-5.6.1 AS rocm5.6 ENV GTEST_COLOR=1 ENV HCC_AMDGPU_TARGET=gfx900 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DROCM_PATH=/opt/rocm-6.0.2 -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ - make -j 16 +RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-5.6.1/bin/amdclang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ + make -j 6 -FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6_desul +# TODO: We should switch to ROCm 6 -- where to get an image?? +FROM ghcr.io/llnl/radiuss:ubuntu-20.04-hip-5.6.1 AS rocm5.6_desul ENV GTEST_COLOR=1 ENV HCC_AMDGPU_TARGET=gfx900 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DROCM_PATH=/opt/rocm-6.0.2 -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ - make -j 16 +RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-5.6.1/bin/amdclang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ + make -j 6 + +## ROCm 6 image is broken +FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6.0 +ENV GTEST_COLOR=1 +ENV HCC_AMDGPU_TARGET=gfx900 +COPY . /home/raja/workspace +WORKDIR /home/raja/workspace/build +RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ + make -j 6 + +## ROCm 6 image is broken +FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6.0_desul +ENV GTEST_COLOR=1 +ENV HCC_AMDGPU_TARGET=gfx900 +COPY . /home/raja/workspace +WORKDIR /home/raja/workspace/build +RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ + make -j 6 FROM ghcr.io/llnl/radiuss:intel-2024.0-ubuntu-20.04 AS intel2024_sycl ENV GTEST_COLOR=1 From 86784f1d699c8eb59890829c7fff7d755c339cb7 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 16 Oct 2024 23:15:16 +0200 Subject: [PATCH 30/49] Update to raja@develop --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 2e68fbb77..8b3c04e3d 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 2e68fbb77cdca67c03761bf2db5c8c7172fc01bf +Subproject commit 8b3c04e3da0cf508d30c98dc03cb4751893195db From 8b6f167efcc001413b46e5d19b0d2cd9c16a265a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Wed, 16 Oct 2024 23:55:47 +0200 Subject: [PATCH 31/49] Allow failure for intel 2023 and remove superfluous job --- .gitlab/jobs/poodle.yml | 16 +++++++++------- .gitlab/jobs/ruby.yml | 14 +++++++++----- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 7e23bba18..56709a184 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -18,22 +18,25 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. +# allow failure +intel_2023_2_1: + variables: + SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" + extends: .job_on_poodle + allow_failure: true + +# omptask variant clang_14_0_6: variables: SPEC: "${PROJECT_POODLE_VARIANTS} +omptask %clang@=14.0.6 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle +# omptask variant gcc_10_3_1: variables: SPEC: "${PROJECT_POODLE_VARIANTS} +omptask %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle -intel_2022_1_0: - variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2022.1.0 ${PROJECT_POODLE_DEPS}" - allow_failure: true - extends: .job_on_poodle - ############ # Extra jobs ############ @@ -44,5 +47,4 @@ intel_2022_1_0: intel_2022_1_0_mpi: variables: SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" - allow_failure: true extends: .job_on_poodle diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 376604c15..f3c448001 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -18,22 +18,26 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. +# allow failure +intel_2023_2_1: + variables: + SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" + extends: .job_on_ruby + allow_failure: true + +# omptask variant clang_14_0_6: variables: SPEC: "${PROJECT_RUBY_VARIANTS} +omptask %clang@=14.0.6 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby +# omptask variant gcc_10_3_1: variables: SPEC: "${PROJECT_RUBY_VARIANTS} +omptask %gcc@=10.3.1 ${PROJECT_RUBY_DEPS}" RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" extends: .job_on_ruby -intel_2022_1_0: - variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2022.1.0 ${PROJECT_RUBY_DEPS}" - extends: .job_on_ruby - ############ # Extra jobs ############ From ad7fd42b7f5f9251ccc15d5a0e18400218082a91 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:15:59 +0200 Subject: [PATCH 32/49] Update RAJA with "fix" for intel 2023 --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 8b3c04e3d..ee749f978 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 8b3c04e3da0cf508d30c98dc03cb4751893195db +Subproject commit ee749f97819aff5e4902ac19da2aab1fe920c7f6 From 8c6849f80ac127ce03afc32461bd8f64fcd4eb0a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:19:36 +0200 Subject: [PATCH 33/49] use -01 and fp-precise with intel 2023 --- .gitlab/jobs/poodle.yml | 4 ++-- .gitlab/jobs/ruby.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 56709a184..652f70ed3 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -18,10 +18,10 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. -# allow failure +# custom variant intel_2023_2_1: variables: - SPEC: "${PROJECT_POODLE_VARIANTS} %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" + SPEC: "${PROJECT_POODLE_VARIANTS} +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle allow_failure: true diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index f3c448001..8986ef272 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -18,10 +18,10 @@ # We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} So that # the comparison with the original job is easier. -# allow failure +# custom variant intel_2023_2_1: variables: - SPEC: "${PROJECT_RUBY_VARIANTS} %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" + SPEC: "${PROJECT_RUBY_VARIANTS} +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby allow_failure: true From f1b7375df27a5b497b3a2039f66d8acf5769c619 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Mon, 21 Oct 2024 14:33:13 +0200 Subject: [PATCH 34/49] From RAJA: Point at new RSC main commit --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index ee749f978..de6a7831c 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit ee749f97819aff5e4902ac19da2aab1fe920c7f6 +Subproject commit de6a7831c91b21ac024be7bf7d2a870aa0358596 From abfa26e1a55bab4ffa4d6ed8095d17a516d308c5 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Mon, 21 Oct 2024 17:27:22 +0200 Subject: [PATCH 35/49] Do not allow failure with intel 2023, update RAJA --- .gitlab/jobs/poodle.yml | 1 - .gitlab/jobs/ruby.yml | 1 - tpl/RAJA | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index 652f70ed3..df56107a6 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -23,7 +23,6 @@ intel_2023_2_1: variables: SPEC: "${PROJECT_POODLE_VARIANTS} +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ${PROJECT_POODLE_DEPS}" extends: .job_on_poodle - allow_failure: true # omptask variant clang_14_0_6: diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 8986ef272..1beb5f893 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -23,7 +23,6 @@ intel_2023_2_1: variables: SPEC: "${PROJECT_RUBY_VARIANTS} +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ${PROJECT_RUBY_DEPS}" extends: .job_on_ruby - allow_failure: true # omptask variant clang_14_0_6: diff --git a/tpl/RAJA b/tpl/RAJA index de6a7831c..9a936d3c3 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit de6a7831c91b21ac024be7bf7d2a870aa0358596 +Subproject commit 9a936d3c39aa0a901ef54451f0b61871a371e776 From 46d07d9fffd2e8a7661877034d4a38ca14369188 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Tue, 22 Oct 2024 21:37:49 +0200 Subject: [PATCH 36/49] Update RAJA to new develop commit --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 9a936d3c3..573c40786 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 9a936d3c39aa0a901ef54451f0b61871a371e776 +Subproject commit 573c40786f09b8fcaa1746ae8cd910a4dfdef1ea From 67462eeb5065ffac77232a18a624aa16f9a2bfe2 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 23 Oct 2024 08:40:56 -0700 Subject: [PATCH 37/49] Update to RAJA develop --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 9a936d3c3..573c40786 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 9a936d3c39aa0a901ef54451f0b61871a371e776 +Subproject commit 573c40786f09b8fcaa1746ae8cd910a4dfdef1ea From c6abab105c7b877cc1a41f6aa76b3ebd74f56ead Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 23 Oct 2024 15:00:08 -0700 Subject: [PATCH 38/49] Try to get more info about corona SYCL job --- .gitlab/jobs/corona.yml | 2 +- .gitlab/subscribed-pipelines.yml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index c1fdc1c53..0e3ff0949 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -29,7 +29,7 @@ clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: variables: - SPEC: " ~shared +sycl ~openmp %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" + SPEC: " ~shared +sycl ~openmp +tests %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" MODULE_LIST: "rocm/5.7.1" extends: .job_on_corona allow_failure: true diff --git a/.gitlab/subscribed-pipelines.yml b/.gitlab/subscribed-pipelines.yml index 7e60a05e9..2f0a610df 100644 --- a/.gitlab/subscribed-pipelines.yml +++ b/.gitlab/subscribed-pipelines.yml @@ -113,5 +113,3 @@ lassen-build-and-test: CI_MACHINE: "lassen" needs: [lassen-up-check, generate-job-lists] extends: [.build-and-test] - - From 4bb3f45f09da21647c87519fda5dc6886cf8fc11 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:47:12 +0200 Subject: [PATCH 39/49] Fix test variant syntax --- .gitlab/jobs/corona.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 0e3ff0949..4fec5964d 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -29,7 +29,7 @@ clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: variables: - SPEC: " ~shared +sycl ~openmp +tests %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" + SPEC: " ~shared +sycl ~openmp tests=basic %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" MODULE_LIST: "rocm/5.7.1" extends: .job_on_corona allow_failure: true From ab7d71b73abb1c81171dbd1e04f7150bd47dd651 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Thu, 24 Oct 2024 10:54:12 -0700 Subject: [PATCH 40/49] Pull in RAJA branch with radiuss-spack-configs branch that should fix sycl test --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 573c40786..4bcc2e3de 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 573c40786f09b8fcaa1746ae8cd910a4dfdef1ea +Subproject commit 4bcc2e3de9a70011ce244a7559b6e41a0014ace3 From bb596d543305843e748b7f0e9dfdb7a22a308983 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Thu, 24 Oct 2024 11:35:22 -0700 Subject: [PATCH 41/49] SYCL CI fix attempt --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 978c7ccf5..30789271f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ endif() option(ENABLE_KOKKOS "Include Kokkos implementations of the kernels in the RAJA Perfsuite" Off) -if (ENABLE_KOKKOS OR ENABLE_SYCL) +if (ENABLE_KOKKOS OR RAJA_ENABLE_SYCL) set(CMAKE_CXX_STANDARD 17) set(BLT_CXX_STD c++17) else() From 28e585aad01dde43eb15935859895cb25d39e4d2 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Thu, 24 Oct 2024 21:23:02 +0200 Subject: [PATCH 42/49] Apply changes to second occurence of ENABLE_SYCL --- CMakeLists.txt | 2 +- tpl/RAJA | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 30789271f..13da85d30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,7 +155,7 @@ endif() if (ENABLE_CUDA) list(APPEND RAJA_PERFSUITE_DEPENDS cuda) endif() -if (ENABLE_SYCL) +if (RAJA_ENABLE_SYCL) list(APPEND RAJA_PERFSUITE_DEPENDS sycl) endif() diff --git a/tpl/RAJA b/tpl/RAJA index 4bcc2e3de..573c40786 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 4bcc2e3de9a70011ce244a7559b6e41a0014ace3 +Subproject commit 573c40786f09b8fcaa1746ae8cd910a4dfdef1ea From afddcb8f559f93fafff9e21c27475800bf177854 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 25 Oct 2024 08:15:02 -0700 Subject: [PATCH 43/49] Pull in latest RAJA and don't allow jobs to fail --- .gitlab/jobs/corona.yml | 2 +- .gitlab/jobs/tioga.yml | 2 +- tpl/RAJA | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 4fec5964d..edd2c23d7 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -32,4 +32,4 @@ clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: SPEC: " ~shared +sycl ~openmp tests=basic %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" MODULE_LIST: "rocm/5.7.1" extends: .job_on_corona - allow_failure: true +##allow_failure: true diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index 36db68790..567737a6e 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -36,4 +36,4 @@ rocmcc_6_2_0_hip_openmp_mpi: variables: SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" extends: .job_on_tioga - allow_failure: true +##allow_failure: true diff --git a/tpl/RAJA b/tpl/RAJA index 573c40786..4bcc2e3de 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 573c40786f09b8fcaa1746ae8cd910a4dfdef1ea +Subproject commit 4bcc2e3de9a70011ce244a7559b6e41a0014ace3 From e1c42e6813bc76c0b7f4b009464732e50628d180 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" <51493078+adrienbernede@users.noreply.github.com> Date: Fri, 25 Oct 2024 23:08:25 +0200 Subject: [PATCH 44/49] From RAJA: From RSC: Set both ENABLE_SYCL and RAJA_ENABLE_SYCL --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 4bcc2e3de..e6bfd3ea8 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 4bcc2e3de9a70011ce244a7559b6e41a0014ace3 +Subproject commit e6bfd3ea86c62073a766f93991dfb0217efedba9 From 01bf5b28fa0f2d916a72f4f4ae16b3af46df1a7f Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Mon, 28 Oct 2024 13:22:27 -0700 Subject: [PATCH 45/49] Don't unload rocm module on corona --- scripts/gitlab/build_and_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index 44a89a1f3..984c834d4 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -241,7 +241,7 @@ then fi date - if [[ "${truehostname}" == "corona" || "${truehostname}" == "tioga" ]] + if [[ "${truehostname}" == "tioga" ]] then module unload rocm fi From 58ff1da1a5be0039f9b5770f0c6e9da7b723ed00 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 29 Oct 2024 12:52:46 -0700 Subject: [PATCH 46/49] Pull in new radiuss-spack-configs via RAJA --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index e6bfd3ea8..06e9df69f 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit e6bfd3ea86c62073a766f93991dfb0217efedba9 +Subproject commit 06e9df69f014060f3184d4e111f746e839435ff2 From 314f510d61e1c491888a5a06d4335211d8d40584 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 29 Oct 2024 13:09:27 -0700 Subject: [PATCH 47/49] Update some specs and remove allow failure --- .gitlab/jobs/corona.yml | 1 - .gitlab/jobs/poodle.yml | 4 ++-- .gitlab/jobs/ruby.yml | 4 ++-- .gitlab/jobs/tioga.yml | 1 - 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index edd2c23d7..65d835370 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -32,4 +32,3 @@ clang_19_0_0_sycl_gcc_10_3_1_rocmcc_5_7_1_hip: SPEC: " ~shared +sycl ~openmp tests=basic %clang@=19.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" ^blt@develop" MODULE_LIST: "rocm/5.7.1" extends: .job_on_corona -##allow_failure: true diff --git a/.gitlab/jobs/poodle.yml b/.gitlab/jobs/poodle.yml index df56107a6..cdd0b018d 100644 --- a/.gitlab/jobs/poodle.yml +++ b/.gitlab/jobs/poodle.yml @@ -43,7 +43,7 @@ gcc_10_3_1: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -intel_2022_1_0_mpi: +intel_2023_2_1_mpi: variables: - SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" + SPEC: "~shared +openmp +mpi +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ^mvapich2 ^blt@develop" extends: .job_on_poodle diff --git a/.gitlab/jobs/ruby.yml b/.gitlab/jobs/ruby.yml index 1beb5f893..4d74eec78 100644 --- a/.gitlab/jobs/ruby.yml +++ b/.gitlab/jobs/ruby.yml @@ -44,7 +44,7 @@ gcc_10_3_1: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -intel_2022_1_0_mpi: +intel_2023_2_1_mpi: variables: - SPEC: "~shared +openmp +mpi %intel@=2022.1.0 ^mvapich2 ^blt@develop" + SPEC: "~shared +openmp +mpi +lowopttest cxxflags==-fp-model=precise %intel@=2023.2.1 ^mvapich2 ^blt@develop" extends: .job_on_ruby diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index 567737a6e..d8a43062a 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -36,4 +36,3 @@ rocmcc_6_2_0_hip_openmp_mpi: variables: SPEC: "~shared +rocm +openmp +mpi amdgpu_target=gfx90a %rocmcc@=6.2.0 ^hip@6.2.0 ^blt@develop" extends: .job_on_tioga -##allow_failure: true From 89f161827cc0b3f5a43afd1f04a57a4101c86d6d Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Thu, 31 Oct 2024 11:32:50 -0700 Subject: [PATCH 48/49] Type and code consistency --- src/lcals/FIRST_MIN-Cuda.cpp | 15 +++++++------ src/lcals/FIRST_MIN-Hip.cpp | 35 +++++++++++++++---------------- src/lcals/FIRST_MIN-OMP.cpp | 13 ++++++------ src/lcals/FIRST_MIN-OMPTarget.cpp | 6 +++--- src/lcals/FIRST_MIN-Seq.cpp | 13 ++++++------ src/lcals/FIRST_MIN-Sycl.cpp | 6 +++--- src/lcals/FIRST_MIN.hpp | 2 +- 7 files changed, 45 insertions(+), 45 deletions(-) diff --git a/src/lcals/FIRST_MIN-Cuda.cpp b/src/lcals/FIRST_MIN-Cuda.cpp index 11d11b46a..3e7de06ef 100644 --- a/src/lcals/FIRST_MIN-Cuda.cpp +++ b/src/lcals/FIRST_MIN-Cuda.cpp @@ -114,8 +114,6 @@ void FIRST_MIN::runCudaVariantBase(VariantID vid) template < size_t block_size, typename MappingHelper > void FIRST_MIN::runCudaVariantRAJA(VariantID vid) { - using reduction_policy = RAJA::cuda_reduce; - using exec_policy = std::conditional_t, RAJA::cuda_exec_occ_calc>; @@ -133,15 +131,16 @@ void FIRST_MIN::runCudaVariantRAJA(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + RAJA::ReduceMinLoc minloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) { FIRST_MIN_BODY_RAJA; }); - m_minloc = loc.getLoc(); + m_minloc = minloc.getLoc(); } stopTimer(); @@ -171,8 +170,8 @@ void FIRST_MIN::runCudaVariantRAJANewReduce(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::ValLoc tminloc(m_xmin_init, - m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), @@ -180,7 +179,7 @@ void FIRST_MIN::runCudaVariantRAJANewReduce(VariantID vid) [=] __device__ (Index_type i, RAJA::expt::ValLocOp& minloc) { - minloc.minloc(x[i], i); + FIRST_MIN_BODY_RAJA; } ); diff --git a/src/lcals/FIRST_MIN-Hip.cpp b/src/lcals/FIRST_MIN-Hip.cpp index b602b4fca..dda9793bf 100644 --- a/src/lcals/FIRST_MIN-Hip.cpp +++ b/src/lcals/FIRST_MIN-Hip.cpp @@ -114,8 +114,6 @@ void FIRST_MIN::runHipVariantBase(VariantID vid) template < size_t block_size, typename MappingHelper > void FIRST_MIN::runHipVariantRAJA(VariantID vid) { - using reduction_policy = RAJA::hip_reduce; - using exec_policy = std::conditional_t, RAJA::hip_exec_occ_calc>; @@ -133,15 +131,16 @@ void FIRST_MIN::runHipVariantRAJA(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + RAJA::ReduceMinLoc minloc(m_xmin_init, + m_initloc); RAJA::forall( res, RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) { FIRST_MIN_BODY_RAJA; }); - m_minloc = loc.getLoc(); + m_minloc = minloc.getLoc(); } stopTimer(); @@ -171,20 +170,20 @@ void FIRST_MIN::runHipVariantRAJANewReduce(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::ValLoc tminloc(m_xmin_init, - m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); - RAJA::forall( res, - RAJA::RangeSegment(ibegin, iend), - RAJA::expt::Reduce(&tminloc), - [=] __device__ (Index_type i, - RAJA::expt::ValLocOp& minloc) { - minloc.minloc(x[i], i); - } - ); - - m_minloc = static_cast(tminloc.getLoc()); + RAJA::forall( res, + RAJA::RangeSegment(ibegin, iend), + RAJA::expt::Reduce(&tminloc), + [=] __device__ (Index_type i, + RAJA::expt::ValLocOp& minloc) { + FIRST_MIN_BODY_RAJA; + } + ); + + m_minloc = static_cast(tminloc.getLoc()); } stopTimer(); diff --git a/src/lcals/FIRST_MIN-OMP.cpp b/src/lcals/FIRST_MIN-OMP.cpp index 0a90546ca..c73732473 100644 --- a/src/lcals/FIRST_MIN-OMP.cpp +++ b/src/lcals/FIRST_MIN-OMP.cpp @@ -94,15 +94,16 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + RAJA::ReduceMinLoc minloc(m_xmin_init, + m_initloc); RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { FIRST_MIN_BODY_RAJA; }); - m_minloc = loc.getLoc(); + m_minloc = minloc.getLoc(); } stopTimer(); @@ -112,8 +113,8 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::ValLoc tminloc(m_xmin_init, - m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), @@ -121,7 +122,7 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t tune_idx) [=](Index_type i, RAJA::expt::ValLocOp& minloc) { - minloc.minloc(x[i], i); + FIRST_MIN_BODY_RAJA; } ); diff --git a/src/lcals/FIRST_MIN-OMPTarget.cpp b/src/lcals/FIRST_MIN-OMPTarget.cpp index 906c73127..29d890152 100644 --- a/src/lcals/FIRST_MIN-OMPTarget.cpp +++ b/src/lcals/FIRST_MIN-OMPTarget.cpp @@ -63,8 +63,8 @@ void FIRST_MIN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::ValLoc tminloc(m_xmin_init, - m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall>( RAJA::RangeSegment(ibegin, iend), @@ -72,7 +72,7 @@ void FIRST_MIN::runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG [=](Index_type i, RAJA::expt::ValLocOp& minloc) { - minloc.minloc(x[i], i); + FIRST_MIN_BODY_RAJA; } ); diff --git a/src/lcals/FIRST_MIN-Seq.cpp b/src/lcals/FIRST_MIN-Seq.cpp index 89bd3c4a0..9d8ea6684 100644 --- a/src/lcals/FIRST_MIN-Seq.cpp +++ b/src/lcals/FIRST_MIN-Seq.cpp @@ -86,15 +86,16 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); + RAJA::ReduceMinLoc minloc(m_xmin_init, + m_initloc); RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { FIRST_MIN_BODY_RAJA; }); - m_minloc = loc.getLoc(); + m_minloc = minloc.getLoc(); } stopTimer(); @@ -104,8 +105,8 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::ValLoc tminloc(m_xmin_init, - m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall(res, RAJA::RangeSegment(ibegin, iend), @@ -113,7 +114,7 @@ void FIRST_MIN::runSeqVariant(VariantID vid, size_t tune_idx) [=](Index_type i, RAJA::expt::ValLocOp& minloc) { - minloc.minloc(x[i], i); + FIRST_MIN_BODY_RAJA; } ); diff --git a/src/lcals/FIRST_MIN-Sycl.cpp b/src/lcals/FIRST_MIN-Sycl.cpp index 6cd00ea38..dddcc5aae 100644 --- a/src/lcals/FIRST_MIN-Sycl.cpp +++ b/src/lcals/FIRST_MIN-Sycl.cpp @@ -87,8 +87,8 @@ void FIRST_MIN::runSyclVariantImpl(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::expt::ValLoc tminloc(m_xmin_init, - m_initloc); + RAJA::expt::ValLoc tminloc(m_xmin_init, + m_initloc); RAJA::forall< RAJA::sycl_exec >( res, @@ -97,7 +97,7 @@ void FIRST_MIN::runSyclVariantImpl(VariantID vid) [=] (Index_type i, RAJA::expt::ValLocOp& minloc) { - minloc.minloc(x[i], i); + FIRST_MIN_BODY_RAJA; } ); diff --git a/src/lcals/FIRST_MIN.hpp b/src/lcals/FIRST_MIN.hpp index a0d4a6786..f00100385 100644 --- a/src/lcals/FIRST_MIN.hpp +++ b/src/lcals/FIRST_MIN.hpp @@ -33,7 +33,7 @@ } #define FIRST_MIN_BODY_RAJA \ - loc.minloc(x[i], i); + minloc.minloc(x[i], i); #include "common/RPTypes.hpp" From b54d8f762afea390331439416d30482bc3c840be Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Thu, 31 Oct 2024 12:56:05 -0700 Subject: [PATCH 49/49] Update to latest RAJA develop --- tpl/RAJA | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/RAJA b/tpl/RAJA index 06e9df69f..2fcd22ee2 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 06e9df69f014060f3184d4e111f746e839435ff2 +Subproject commit 2fcd22ee2a1c62747b6481d6331dfae6526adc32