Skip to content

Commit

Permalink
Merge pull request #1530 from LLNL/v2023.06.1-RC
Browse files Browse the repository at this point in the history
V2023.06.1 rc
  • Loading branch information
rhornung67 authored Aug 15, 2023
2 parents e330b25 + e7ee7f8 commit 9b5f61e
Show file tree
Hide file tree
Showing 40 changed files with 505 additions and 229 deletions.
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ stages:
include:
- local: '.gitlab/custom-jobs-and-variables.yml'
- project: 'radiuss/radiuss-shared-ci'
ref: v2023.03.1
ref: v2023.06.0
file: '${CI_MACHINE}-build-and-test.yml'
- local: '.gitlab/${CI_MACHINE}-build-and-test-extra.yml'
strategy: depend
Expand Down
4 changes: 2 additions & 2 deletions .gitlab/corona-build-and-test-extra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
# ${PROJECT_<MACHINE>_DEPS} in the extra jobs. There is no reason not to fully
# describe the spec here.

rocmcc_5_4_1_hip_desul_atomics:
rocmcc_5_5_0_hip_desul_atomics:
variables:
SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %rocmcc@5.4.1 ^hip@5.4.1 ^blt@develop"
SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %rocmcc@5.5.0 ^hip@5.5.0 ^blt@develop"
extends: .build_and_test_on_corona

5 changes: 5 additions & 0 deletions .gitlab/lassen-build-and-test-extra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ xl_2022_08_19_gcc_8_3_1_cuda_11_7_0:
# ${PROJECT_<MACHINE>_DEPS} in the extra jobs. There is no reason not to fully
# describe the spec here.

gcc_8_3_1_omptask:
variables:
SPEC: " ~shared +openmp +omptask +tests %[email protected]"
extends: .build_and_test_on_lassen

gcc_8_3_1_cuda_11_5_0_ats_disabled:
extends: .build_and_test_on_lassen
variables:
Expand Down
17 changes: 16 additions & 1 deletion .gitlab/ruby-build-and-test-extra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,22 @@
# We keep ${PROJECT_<MACHINE>_VARIANTS} and ${PROJECT_<MACHINE>_DEPS} So that
# the comparison with the original job is easier.

# No overridden jobs so far.
clang_14_0_6:
variables:
SPEC: " ~shared +openmp +omptask +tests %[email protected]"
extends: .build_and_test_on_ruby

gcc_10_3_1:
variables:
SPEC: " ~shared +openmp +omptask +tests %[email protected]"
RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1"
extends: .build_and_test_on_ruby

intel_19_1_2_gcc_8_5_0:
variables:
SPEC: " ~shared +openmp +omptask +tests %[email protected]"
RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=90 --nodes=1"
extends: .build_and_test_on_ruby

############
# Extra jobs
Expand Down
2 changes: 1 addition & 1 deletion .gitlab/tioga-build-and-test-extra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ rocmcc_5_4_3_hip_desul_atomics:

rocmcc_5_4_3_hip_openmp:
variables:
SPEC: "~shared +rocm +openmp +tests amdgpu_target=gfx90a %[email protected] ^[email protected] ^blt@develop"
SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx90a %[email protected] ^[email protected] ^blt@develop"
extends: .build_and_test_on_tioga
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ endif()
include(CMakeDependentOption)

# Set version number
set(RAJA_VERSION_MAJOR 2022)
set(RAJA_VERSION_MINOR 10)
set(RAJA_VERSION_PATCHLEVEL 5)
set(RAJA_VERSION_MAJOR 2023)
set(RAJA_VERSION_MINOR 06)
set(RAJA_VERSION_PATCHLEVEL 1)

if (RAJA_LOADED AND (NOT RAJA_LOADED STREQUAL "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}"))
message(FATAL_ERROR "You are mixing RAJA versions. Loaded is ${RAJA_LOADED}, expected ${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}")
Expand Down
86 changes: 49 additions & 37 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,35 @@ COPY . /home/raja/workspace
WORKDIR /home/raja/workspace/build
RUN cmake -DCMAKE_CXX_COMPILER=g++ -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_TBB=On -DRAJA_DEPRECATED_TESTS=On -DENABLE_OPENMP=On .. && \
make -j 6 &&\
ctest -T test --output-on-failure
ctest -T test --output-on-failure && \
cd .. && rm -rf build

FROM ghcr.io/rse-ops/gcc-ubuntu-20.04:gcc-8.1.0 AS gcc8.1.0
ENV GTEST_COLOR=1
COPY . /home/raja/workspace
WORKDIR /home/raja/workspace/build
RUN cmake -DCMAKE_CXX_COMPILER=g++ -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_COVERAGE=On -DRAJA_ENABLE_TBB=On -DENABLE_OPENMP=On .. && \
make -j 6 &&\
ctest -T test --output-on-failure
ctest -T test --output-on-failure && \
cd .. && rm -rf build

FROM ghcr.io/rse-ops/gcc-ubuntu-20.04:gcc-9.4.0 AS gcc9.4.0
ENV GTEST_COLOR=1
COPY . /home/raja/workspace
WORKDIR /home/raja/workspace/build
RUN cmake -DCMAKE_CXX_COMPILER=g++ -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_TBB=On -DRAJA_ENABLE_RUNTIME_PLUGINS=On -DENABLE_OPENMP=On .. && \
make -j 6 &&\
ctest -T test --output-on-failure
ctest -T test --output-on-failure && \
cd .. && rm -rf build

FROM ghcr.io/rse-ops/gcc-ubuntu-20.04:gcc-11.2.0 AS gcc11.2.0
ENV GTEST_COLOR=1
COPY . /home/raja/workspace
WORKDIR /home/raja/workspace/build
RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_CXX_COMPILER=g++ -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_TBB=On -DRAJA_ENABLE_BOUNDS_CHECK=ON -DENABLE_OPENMP=On .. && \
make -j 6 &&\
ctest -T test --output-on-failure
ctest -T test --output-on-failure && \
cd .. && rm -rf build

FROM ghcr.io/rse-ops/clang-ubuntu-20.04:llvm-11.0.0 AS clang11.0.0
ENV GTEST_COLOR=1
Expand All @@ -44,7 +48,8 @@ WORKDIR /home/raja/workspace/build
RUN . /opt/spack/share/spack/setup-env.sh && export LD_LIBRARY_PATH=/opt/view/lib:$LD_LIBRARY_PATH && \
cmake -DCMAKE_CXX_COMPILER=clang++ -DRAJA_ENABLE_TBB=On -DENABLE_OPENMP=On .. && \
make -j 6 &&\
ctest -T test --output-on-failure
ctest -T test --output-on-failure && \
cd .. && rm -rf build

FROM ghcr.io/rse-ops/clang-ubuntu-20.04:llvm-11.0.0 AS clang11.0.0-debug
ENV GTEST_COLOR=1
Expand All @@ -53,7 +58,8 @@ WORKDIR /home/raja/workspace/build
RUN . /opt/spack/share/spack/setup-env.sh && export LD_LIBRARY_PATH=/opt/view/lib:$LD_LIBRARY_PATH && \
cmake -DCMAKE_CXX_COMPILER=clang++ -DENABLE_OPENMP=On -DCMAKE_BUILD_TYPE=Debug .. && \
make -j 6 &&\
ctest -T test --output-on-failure
ctest -T test --output-on-failure && \
cd .. && rm -rf build

FROM ghcr.io/rse-ops/clang-ubuntu-22.04:llvm-13.0.0 AS clang13.0.0
ENV GTEST_COLOR=1
Expand All @@ -62,40 +68,45 @@ WORKDIR /home/raja/workspace/build
RUN . /opt/spack/share/spack/setup-env.sh && export LD_LIBRARY_PATH=/opt/view/lib:$LD_LIBRARY_PATH && \
cmake -DCMAKE_CXX_COMPILER=clang++ -DENABLE_OPENMP=On -DCMAKE_BUILD_TYPE=Release .. && \
make -j 6 &&\
ctest -T test --output-on-failure
ctest -T test --output-on-failure && \
cd .. && rm -rf build

FROM ghcr.io/rse-ops/cuda:cuda-10.1.243-ubuntu-18.04 AS nvcc10.1.243
ENV GTEST_COLOR=1
COPY . /home/raja/workspace
WORKDIR /home/raja/workspace/build
RUN . /opt/spack/share/spack/setup-env.sh && spack load cuda && \
cmake -DCMAKE_CXX_COMPILER=g++ -DENABLE_CUDA=On -DCMAKE_CUDA_STANDARD=14 -DCMAKE_CUDA_ARCHITECTURES=70 -DENABLE_OPENMP=On .. && \
make -j 4
##FROM ghcr.io/rse-ops/cuda:cuda-10.1.243-ubuntu-18.04 AS nvcc10.1.243
##ENV GTEST_COLOR=1
##COPY . /home/raja/workspace
##WORKDIR /home/raja/workspace/build
##RUN . /opt/spack/share/spack/setup-env.sh && spack load cuda && \
## cmake -DCMAKE_CXX_COMPILER=g++ -DENABLE_CUDA=On -DCMAKE_CUDA_STANDARD=14 -DCMAKE_CUDA_ARCHITECTURES=70 -DENABLE_OPENMP=On .. && \
## make -j 4 && \
## cd .. && rm -rf build

FROM ghcr.io/rse-ops/cuda-ubuntu-20.04:cuda-11.1.1 AS nvcc11.1.1
ENV GTEST_COLOR=1
COPY . /home/raja/workspace
WORKDIR /home/raja/workspace/build
RUN . /opt/spack/share/spack/setup-env.sh && spack load cuda && \
cmake -DCMAKE_CXX_COMPILER=g++ -DENABLE_CUDA=On -DCMAKE_CUDA_STANDARD=14 -DCMAKE_CUDA_ARCHITECTURES=70 -DENABLE_OPENMP=On .. && \
make -j 4
##FROM ghcr.io/rse-ops/cuda-ubuntu-20.04:cuda-11.1.1 AS nvcc11.1.1
##ENV GTEST_COLOR=1
##COPY . /home/raja/workspace
##WORKDIR /home/raja/workspace/build
##RUN . /opt/spack/share/spack/setup-env.sh && spack load cuda && \
## cmake -DCMAKE_CXX_COMPILER=g++ -DENABLE_CUDA=On -DCMAKE_CUDA_STANDARD=14 -DCMAKE_CUDA_ARCHITECTURES=70 -DENABLE_OPENMP=On .. && \
## make -j 4 && \
## cd .. && rm -rf build

FROM ghcr.io/rse-ops/cuda-ubuntu-20.04:cuda-11.1.1 AS nvcc11.1.-debug
ENV GTEST_COLOR=1
COPY . /home/raja/workspace
WORKDIR /home/raja/workspace/build
RUN . /opt/spack/share/spack/setup-env.sh && spack load cuda && \
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=g++ -DENABLE_CUDA=On -DCMAKE_CUDA_STANDARD=14 -DCMAKE_CUDA_ARCHITECTURES=70 -DENABLE_OPENMP=On .. && \
make -j 4
##FROM ghcr.io/rse-ops/cuda-ubuntu-20.04:cuda-11.1.1 AS nvcc11.1.-debug
##ENV GTEST_COLOR=1
##COPY . /home/raja/workspace
##WORKDIR /home/raja/workspace/build
##RUN . /opt/spack/share/spack/setup-env.sh && spack load cuda && \
## cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=g++ -DENABLE_CUDA=On -DCMAKE_CUDA_STANDARD=14 -DCMAKE_CUDA_ARCHITECTURES=70 -DENABLE_OPENMP=On .. && \
## make -j 4 && \
## cd .. && rm -rf build

FROM ghcr.io/rse-ops/hip-ubuntu-20.04:hip-5.1.3 AS hip5.1.3
ENV GTEST_COLOR=1
ENV HCC_AMDGPU_TARGET=gfx900
COPY . /home/raja/workspace
WORKDIR /home/raja/workspace/build
RUN . /opt/spack/share/spack/setup-env.sh && spack load hip llvm-amdgpu && \
cmake -DCMAKE_CXX_COMPILER=clang++ -DHIP_PATH=/opt -DENABLE_HIP=On -DENABLE_CUDA=Off -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \
make -j 6
##FROM ghcr.io/rse-ops/hip-ubuntu-20.04:hip-5.1.3 AS hip5.1.3
##ENV GTEST_COLOR=1
##ENV HCC_AMDGPU_TARGET=gfx900
##COPY . /home/raja/workspace
##WORKDIR /home/raja/workspace/build
##RUN . /opt/spack/share/spack/setup-env.sh && spack load hip llvm-amdgpu && \
## cmake -DCMAKE_CXX_COMPILER=clang++ -DHIP_PATH=/opt -DENABLE_HIP=On -DENABLE_CUDA=Off -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \
## make -j 6 && \
## cd .. && rm -rf build

FROM ghcr.io/rse-ops/intel-ubuntu-22.04:intel-2022.1.0 AS sycl
ENV GTEST_COLOR=1
Expand All @@ -104,4 +115,5 @@ WORKDIR /home/raja/workspace/build
RUN /bin/bash -c "source /opt/view/setvars.sh && \
cmake -DCMAKE_CXX_COMPILER=dpcpp -DRAJA_ENABLE_SYCL=On -DENABLE_OPENMP=Off -DENABLE_ALL_WARNINGS=Off -DBLT_CXX_STD=c++17 .. && \
make -j 6 &&\
ctest -T test --output-on-failure"
ctest -T test --output-on-failure" && \
cd .. && rm -rf build
31 changes: 30 additions & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[comment]: # (# SPDX-License-Identifier: BSD-3-Clause)
[comment]: # (#################################################################)

Version vxx.yy.zz -- Release date 20yy-mm-dd
Version YYYY.MM.PP -- Release date 20yy-mm-dd
============================================

This release contains ...
Expand All @@ -20,6 +20,35 @@ Notable changes include:
* Bug fixes/improvements:


Version 2023.06.1 -- Release date 2023-08-16
============================================

This release contains various smaller RAJA improvements.

Notable changes include:

* New features / API changes:
* Add compile time block size optimization for new reduction interface.
* Changed default stream usage for Workgroup constructs to use the
stream associated with the default (camp) resource. Previously, we were
using stream zero. Specifically, this change affects where we memset
memory in the zeroed device memory pool and where we get device function
pointers for WorkGroup.

* Build changes/improvements:
* RAJA_ENABLE_OPENMP_TASK CMake option added to enable/disable algorithm
options based on OpenMP task construct. Currently, this only applies
to RAJA's OpenMP sort implementation. The default is 'Off'. The option
allows users to choose a task implementation if they wish.
* Resolve several compiler warnings.

* Bug fixes/improvements:
* Fix compilation of GPU occupancy calculator and use common types for
HIP and CUDA backends in the occupancy calculator, kernel policies,
and kernel launch helper routines.
* Fix direct cudaMalloc/hipMalloc calls and memory leaks.


Version 2023.06.0 -- Release date 2023-07-06
============================================

Expand Down
12 changes: 6 additions & 6 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ jobs:
docker_target: clang11.0.0-debug
clang13.0.0:
docker_target: clang13.0.0
nvcc10.1.243:
docker_target: nvcc10.1.243
nvcc11.1.1:
docker_target: nvcc11.1.1
## nvcc10.1.243:
## docker_target: nvcc10.1.243
## nvcc11.1.1:
## docker_target: nvcc11.1.1
## nvcc11.1.1-debug:
## docker_target: nvcc11.1.1-debug
hip5.1.3:
docker_target: hip5.1.3
## hip5.1.3:
## docker_target: hip5.1.3
sycl:
docker_target: sycl
pool:
Expand Down
2 changes: 2 additions & 0 deletions cmake/SetupRajaOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ option(RAJA_ENABLE_SYCL "Build SYCL support" Off)

option(RAJA_ENABLE_VECTORIZATION "Build experimental vectorization support" On)

option(RAJA_ENABLE_OPENMP_TASK "Build OpenMP task variants of certain algorithms" Off)

option(RAJA_ENABLE_REPRODUCERS "Build issue reproducers" Off)

option(RAJA_ENABLE_EXERCISES "Build exercises " On)
Expand Down
35 changes: 30 additions & 5 deletions docs/sphinx/user_guide/feature/policies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -279,12 +279,37 @@ policies have the prefix ``hip_``.
CUDA/HIP Execution Policies Works with Brief description
========================================= ============= =======================================
cuda/hip_exec<BLOCK_SIZE> forall, Execute loop iterations
scan, in a GPU kernel launched
sort with given thread-block
size. Note that the
scan, directly mapped to global threads
sort in a GPU kernel launched
with given thread-block
size and unbounded grid size.
Note that the thread-block
size must be provided,
there is no default.
cuda/hip_exec_grid<BLOCK_SIZE, GRID_SIZE> forall, Execute loop iterations
mapped to global threads via
grid striding with multiple
iterations per global thread
in a GPU kernel launched
with given thread-block
size and grid size.
Note that the thread-block
size and grid size must be
provided, there is no default.
cuda/hip_exec_occ_calc<BLOCK_SIZE> forall Execute loop iterations
mapped to global threads via
grid striding with multiple
iterations per global thread
in a GPU kernel launched
with given thread-block
size and grid size bounded
by the maximum occupancy of
the kernel. Note that the
thread-block size must
be provided, there is
no default.
be provided, there is no
default. Note this can improve
reducer performance in kernels
with large iteration counts.
cuda/hip_launch_t launch Launches a device kernel,
any code expressed within
the lambda is executed
Expand Down
2 changes: 1 addition & 1 deletion host-configs/lc-builds/toss3/icpc_X_gcc8headers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

set(RAJA_COMPILER "RAJA_COMPILER_ICC" CACHE STRING "")

set(COMMON_FLAGS "-gxx-name=/usr/tce/packages/gcc/gcc-8.1.0/bin/g++")
set(COMMON_FLAGS "-gxx-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/g++")

set(CMAKE_CXX_FLAGS_RELEASE "${COMMON_FLAGS} -O3 -march=native -ansi-alias -diag-disable cpu-dispatch" CACHE STRING "")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_FLAGS} -O3 -g -march=native -ansi-alias -diag-disable cpu-dispatch" CACHE STRING "")
Expand Down
18 changes: 18 additions & 0 deletions host-configs/lc-builds/toss4/icpc-classic_X.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
###############################################################################
# Copyright (c) 2016-23, Lawrence Livermore National Security, LLC
# and RAJA project contributors. See the RAJA/LICENSE file for details.
#
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

set(RAJA_COMPILER "RAJA_COMPILER_ICC" CACHE STRING "")

set(COMMON_FLAGS "-gxx-name=/usr/tce/packages/gcc/gcc-10.3.1/bin/g++")

set(CMAKE_CXX_FLAGS_RELEASE "${COMMON_FLAGS} -O3 -march=native -ansi-alias -diag-disable cpu-dispatch" CACHE STRING "")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${COMMON_FLAGS} -O3 -g -march=native -ansi-alias -diag-disable cpu-dispatch" CACHE STRING "")
set(CMAKE_CXX_FLAGS_DEBUG "${COMMON_FLAGS} -O0 -g" CACHE STRING "")

set(RAJA_DATA_ALIGN 64 CACHE STRING "")

set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "")
10 changes: 7 additions & 3 deletions include/RAJA/config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ static_assert(RAJA_HAS_SOME_CXX14,
#cmakedefine RAJA_ENABLE_CLANG_CUDA
#cmakedefine RAJA_ENABLE_HIP
#cmakedefine RAJA_ENABLE_SYCL

#cmakedefine RAJA_ENABLE_OMP_TASK
#cmakedefine RAJA_ENABLE_VECTORIZATION

#cmakedefine RAJA_ENABLE_NV_TOOLS_EXT
Expand Down Expand Up @@ -254,12 +256,14 @@ namespace RAJA {
#if defined(RAJA_ENABLE_OPENMP) && !defined(__HIP_DEVICE_COMPILE__)
#if defined(_OPENMP)
#if (_OPENMP >= 200805)
#define RAJA_ENABLE_OPENMP_TASK
#if defined(RAJA_ENABLE_OPENMP_TASK)
#define RAJA_ENABLE_OPENMP_TASK_INTERNAL
#endif
#endif // _OPENMP >= 200805
#else
#error RAJA configured with RAJA_ENABLE_OPENMP, but _OPENMP is not defined in this code section
#endif // _OPENMP
#endif // RAJA_ENABLE_OPENMP && __HIP_DEVICE_COMPILE__
#endif // else
#endif // RAJA_ENABLE_OPENMP && !__HIP_DEVICE_COMPILE__

#if defined(RAJA_ENABLE_CUDA) && defined(__CUDACC__)
#define RAJA_CUDA_ACTIVE
Expand Down
Loading

0 comments on commit 9b5f61e

Please sign in to comment.