diff --git a/.gitignore b/.gitignore index 278c78091b..69f928be3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ *.pyc *.o +*.a *.exe +*.gch build/ +build-*/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..0fdd3ab4f8 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "blt"] + path = blt + url = https://github.com/LLNL/blt.git diff --git a/.travis.yml b/.travis.yml index b91fda4cbe..2902989e6f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,60 +13,64 @@ matrix: addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-4.9, libtbb-dev ] } } env: - COMPILER=g++-4.9 - - CMAKE_EXTRA_FLAGS="-DRAJA_ENABLE_WARNINGS=On" + - CMAKE_EXTRA_FLAGS="-DENABLE_WARNINGS=On -DENABLE_TBB=On" - compiler: gcc-6 addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } } env: - COMPILER=g++-6 - - CMAKE_EXTRA_FLAGS="-DRAJA_ENABLE_WARNINGS=On" + - CMAKE_EXTRA_FLAGS="-DENABLE_WARNINGS=On -DENABLE_TBB=On" - compiler: gcc-7 addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-7, libtbb-dev ] } } env: - COMPILER=g++-7 - - CMAKE_EXTRA_FLAGS="-DRAJA_ENABLE_WARNINGS=On" + - CMAKE_EXTRA_FLAGS="-DENABLE_WARNINGS=On -DENABLE_TBB=On" - compiler: clang-5 addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } } env: - COMPILER=clang++-5.0.0 - LLVM_VERSION=5.0.0 - DOWNLOAD_URL=http://releases.llvm.org/5.0.0/clang+llvm-5.0.0-linux-x86_64-ubuntu14.04.tar.xz + - CMAKE_EXTRA_FLAGS="-DCMAKE_CXX_FLAGS=-fmodules -DENABLE_TBB=On" - compiler: clang-3.9 addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } } env: - COMPILER=clang++-3.9.1 - LLVM_VERSION=3.9.1 + - CMAKE_EXTRA_FLAGS="-DENABLE_TBB=On" - compiler: clang-4.0 addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } } env: - COMPILER=clang++-4.0.0 - LLVM_VERSION=4.0.0 + - CMAKE_EXTRA_FLAGS="-DENABLE_TBB=On" - compiler: intel-17 env: - COMPILER=icpc - TRAVIS_INSTALL_COMPILER="intel" + - CMAKE_EXTRA_FLAGS="-DENABLE_TBB=On" - compiler: nvcc addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-4.9, libtbb-dev ] } } env: - COMPILER=g++-4.9 - - CMAKE_EXTRA_FLAGS="-DRAJA_ENABLE_CUDA=On" + - CMAKE_EXTRA_FLAGS="-DENABLE_CUDA=On -DENABLE_TBB=On" - TRAVIS_INSTALL_COMPILER="nvcc" - DO_TEST=no - compiler: gcc-4.9-debug addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-4.9, libtbb-dev ] } } env: - COMPILER=g++-4.9 - - CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DRAJA_ENABLE_COVERAGE=On" + - CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DENABLE_COVERAGE=On -DENABLE_TBB=On" - compiler: clang-3.9-debug addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-6, libtbb-dev ] } } env: - COMPILER=clang++ - LLVM_VERSION=3.9.1 - - CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug" + - CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DENABLE_TBB=On" - compiler: nvcc-debug addons: { apt: { sources: [ ubuntu-toolchain-r-test ] , packages: [ g++-4.9, libtbb-dev ] } } env: - COMPILER=g++-4.9 - - CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DRAJA_ENABLE_CUDA=On" + - CMAKE_EXTRA_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DENABLE_CUDA=On -DENABLE_TBB=On" - TRAVIS_INSTALL_COMPILER="nvcc" - DO_TEST=no cache: @@ -97,5 +101,5 @@ before_install: script: - ./scripts/travis_build_and_test.sh after_success: -- if [[ "${CMAKE_EXTRA_FLAGS}" == *"RAJA_ENABLE_COVERAGE"* ]] ; then bash <(curl -s https://codecov.io/bash) -a "-f"; fi +- if [[ "${CMAKE_EXTRA_FLAGS}" == *"ENABLE_COVERAGE"* ]] ; then bash <(curl -s https://codecov.io/bash) -a "-f" >& /dev/null; fi - if [[ "${TRAVIS_INSTALL_COMPILER}" == "intel" ]] ; then uninstall_intel_software ; fi diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fae38936a..cbd0ef79a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2016, Lawrence Livermore National Security, LLC. +# Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. # # Produced at the Lawrence Livermore National Laboratory # @@ -9,34 +9,7 @@ # # This file is part of RAJA. # -# For additional details, please also read RAJA/LICENSE. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the disclaimer below. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the disclaimer (as noted below) in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the name of the LLNS/LLNL nor the names of its contributors may -# be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -# LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. +# For details about use and distribution, please read RAJA/LICENSE. # ############################################################################### @@ -47,30 +20,44 @@ project(RAJA LANGUAGES CXX C) # Set version number set(RAJA_VERSION_MAJOR 0) -set(RAJA_VERSION_MINOR 3) -set(RAJA_VERSION_PATCHLEVEL 1) +set(RAJA_VERSION_MINOR 4) +set(RAJA_VERSION_PATCHLEVEL 0) set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/thirdparty" ${CMAKE_MODULE_PATH}) # Build options +set(ENABLE_OPENMP On CACHE Bool "Build OpenMP support") +set(ENABLE_CUDA Off CACHE Bool "Build CUDA support") +set(ENABLE_COPY_HEADERS Off CACHE Bool "") +set(ENABLE_WARNINGS_AS_ERRORS Off CACHE Bool "") + set(RAJA_CXX_STANDARD_FLAG "default" CACHE STRING "Specific c++ standard flag to use, default attempts to autodetect the highest available") -option(RAJA_ENABLE_OPENMP "Build OpenMP support" On) -option(RAJA_ENABLE_TBB "Build TBB support" On) -option(RAJA_ENABLE_TARGET_OPENMP "Build OpenMP on target device support" Off) -option(RAJA_ENABLE_CUDA "Build CUDA support" Off) -option(RAJA_ENABLE_CLANG_CUDA "Use Clang's native CUDA support" Off) -set(RAJA_CUDA_ARCH "sm_35" CACHE STRING "Compute architecture to pass to CUDA builds") -option(RAJA_ENABLE_CUB "Use cub for scans using CUDA" On) -option(RAJA_ENABLE_TESTS "Build tests" On) -option(RAJA_ENABLE_EXAMPLES "Build simple examples" On) -option(RAJA_ENABLE_NESTED "Enable nested loop support" Off) -option(RAJA_ENABLE_WARNINGS "Enable warnings as errors for CI" Off) -option(RAJA_ENABLE_DOCUMENTATION "Build RAJA documentation" Off) - -option(RAJA_ENABLE_COVERAGE "Enable coverage (only supported with GCC)" Off) + +option(ENABLE_TBB "Build TBB support" Off) +option(ENABLE_TARGET_OPENMP "Build OpenMP on target device support" Off) +option(ENABLE_CLANG_CUDA "Use Clang's native CUDA support" Off) +set(CUDA_ARCH "sm_35" CACHE STRING "Compute architecture to pass to CUDA builds") +option(ENABLE_CUB "Use cub for scans using CUDA" On) +option(ENABLE_TESTS "Build tests" On) +option(ENABLE_EXAMPLES "Build simple examples" On) +option(ENABLE_MODULES "Enable modules in supporting compilers (clang)" On) +option(ENABLE_WARNINGS "Enable warnings as errors for CI" Off) +option(ENABLE_DOCUMENTATION "Build RAJA documentation" Off) +option(ENABLE_COVERAGE "Enable coverage (only supported with GCC)" Off) set(TEST_DRIVER "" CACHE STRING "driver used to wrap test commands") +if (NOT BLT_LOADED) +if (NOT EXISTS ${PROJECT_SOURCE_DIR}/blt/SetupBLT.cmake) + message(FATAL_ERROR "\ + The BLT submodule is not present. \ + If in a git repo run the following command:\n\ + git submodule init && git submodule update") +endif() + +include(blt/SetupBLT.cmake) +endif() + # Setup basic CMake options include(cmake/SetupBasics.cmake) # Find third-party packages @@ -81,44 +68,83 @@ include(cmake/SetupCompilers.cmake) include(cmake/SetupRajaConfig.cmake) # Macros for building executables and libraries include (cmake/RAJAMacros.cmake) -# Sanity check for compiler compatibility -include (cmake/CompilerCompatibility.cmake) -include_directories(${PROJECT_BINARY_DIR}/include/RAJA) -include_directories(${PROJECT_BINARY_DIR}/include) +set (raja_sources + src/AlignedRangeIndexSetBuilders.cpp + src/DepGraphNode.cpp + src/LockFreeIndexSetBuilders.cpp + src/MemUtils_CUDA.cpp + src/ThreadUtils_CPU.cpp) + +set (raja_depends) -include_directories(include) +if (ENABLE_OPENMP) + set (raja_depends + openmp) +endif() + +if (ENABLE_CUDA) + set (raja_depends + ${raja_depends} + cuda) +endif () + +if (ENABLE_CUDA) + if (ENABLE_CUB) + set (raja_depends + ${raja_depends} + cub) + endif () +endif () + +if (ENABLE_CHAI) + set (raja_depends + ${raja_depends} + chai) +endif () + +if (ENABLE_TBB) + set(raja_depends + ${raja_depends} + tbb) +endif () + +blt_add_library( + NAME RAJA + SOURCES ${raja_sources} + DEPENDS_ON ${raja_depends}) + +install(TARGETS RAJA + EXPORT RAJA + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION lib +) + +install(EXPORT RAJA DESTINATION share/raja/cmake/) + +target_include_directories(RAJA + PUBLIC + $ + $ + $) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN *.hpp) -install(FILES ${PROJECT_BINARY_DIR}/include/RAJA/config.hpp - DESTINATION "include/RAJA") -add_subdirectory(src) +install(FILES + ${PROJECT_BINARY_DIR}/include/RAJA/config.hpp + include/RAJA/module.modulemap + include/RAJA/module.private.modulemap + DESTINATION "include/RAJA/") -if(RAJA_ENABLE_TESTS) +if(ENABLE_TESTS) add_subdirectory(test) endif() -if(RAJA_ENABLE_EXAMPLES) +if(ENABLE_EXAMPLES) add_subdirectory(examples) endif() -if (RAJA_ENABLE_DOCUMENTATION) +if (ENABLE_DOCUMENTATION) add_subdirectory(docs) endif () - -if(RAJA_ENABLE_APPLICATIONS) - if (NOT EXISTS ${PROJECT_SOURCE_DIR}/extra/llnl-raja-proxies/CMakeLists.txt) - message(STATUS "Cloning RAJA proxy applications...") - execute_process(COMMAND git clone https://github.com/LLNL/RAJA-examples.git ${PROJECT_SOURCE_DIR}/extra/llnl-raja-proxies) - endif() - add_subdirectory(extra/llnl-raja-proxies) -endif() - -if(RAJA_ENABLE_PERFSUITE) - if (NOT EXISTS ${PROJECT_SOURCE_DIR}/extra/performance/CMakeLists.txt) - message(STATUS "Cannot find performance suite") - message(STATUS "Clone it to ./extra/performance") - endif() - add_subdirectory(extra/performance) -endif() diff --git a/Dockerfile b/Dockerfile index a3a9702514..a1f93dcdb9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,6 @@ RUN cd /opt/ && git clone https://github.com/LLNL/RAJA.git WORKDIR /opt/RAJA -RUN mkdir build && cd build && cmake -DRAJA_ENABLE_CUDA=ON .. +RUN mkdir build && cd build && cmake -DENABLE_CUDA=ON .. RUN cd build && make -j && make install diff --git a/LICENSE b/LICENSE index 41872267e9..40def96a55 100644 --- a/LICENSE +++ b/LICENSE @@ -1,32 +1,5 @@ -******************************************************************************* - -RAJA: ................................, version 0.3.1 - -Copyright (c) 2016, Lawrence Livermore National Security, LLC. -Produced at the Lawrence Livermore National Laboratory. -All rights reserved. See details below. - -Unlimited Open Source - BSD Distribution -LLNL-CODE-689114 -OCEC-16-063 - -The original developers of RAJA are: - -Rich Hornung (hornung1@llnl.gov) -Jeff Keasler (keasler1@llnl.gov) - -Contributors include: - -David Beckingsale (beckingsale1@llnl.gov) -Jason Burmark (burmark1@llnl.gov) -Holger Jones (jones19@llnl.gov) -Will Killian (killian4@llnl.gov) -Adam Kunen (kunen1@llnl.gov) -Olga Pearce (pearce8@llnl.gov) -David Poliakoff (poliakoff1@llnl.gov) -Tom Scogland (scogland1@llnl.gov) - -******************************************************************************* +Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -53,25 +26,3 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Additional BSD Notice - -1. This notice is required to be provided under our contract with the U.S. -Department of Energy (DOE). This work was produced at Lawrence Livermore -National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE. - -2. Neither the United States Government nor Lawrence Livermore National -Security, LLC nor any of their employees, makes any warranty, express or -implied, or assumes any liability or responsibility for the accuracy, -completeness, or usefulness of any information, apparatus, product, or -process disclosed, or represents that its use would not infringe -privately-owned rights. - -3. Also, reference herein to any specific commercial products, process, -or services by trade name, trademark, manufacturer or otherwise does not -necessarily constitute or imply its endorsement, recommendation, or favoring -by the United States Government or Lawrence Livermore National Security, LLC. -The views and opinions of authors expressed herein do not necessarily state -or reflect those of the United States Government or Lawrence Livermore -National Security, LLC, and shall not be used for advertising or product -endorsement purposes. diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000000..8aea31b91a --- /dev/null +++ b/NOTICE @@ -0,0 +1,21 @@ +Additional BSD Notice + +1. This notice is required to be provided under our contract with the U.S. +Department of Energy (DOE). This work was produced at Lawrence Livermore +National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE. + +2. Neither the United States Government nor Lawrence Livermore National +Security, LLC nor any of their employees, makes any warranty, express or +implied, or assumes any liability or responsibility for the accuracy, +completeness, or usefulness of any information, apparatus, product, or +process disclosed, or represents that its use would not infringe +privately-owned rights. + +3. Also, reference herein to any specific commercial products, process, +or services by trade name, trademark, manufacturer or otherwise does not +necessarily constitute or imply its endorsement, recommendation, or favoring +by the United States Government or Lawrence Livermore National Security, LLC. +The views and opinions of authors expressed herein do not necessarily state +or reflect those of the United States Government or Lawrence Livermore +National Security, LLC, and shall not be used for advertising or product +endorsement purposes. diff --git a/README.md b/README.md index 0fd546a8c6..45eab7e860 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -RAJA v0.3.1 +RAJA v0.4.0 ============ [![Build Status](https://travis-ci.org/LLNL/RAJA.svg?branch=develop)](https://travis-ci.org/LLNL/RAJA) @@ -42,7 +42,7 @@ Quick Start The RAJA code lives in a GitHub [repository](https://github.com/llnl/raja). To clone the repo, use the command: - git clone https://github.com/llnl/raja.git + git clone --recursive https://github.com/llnl/raja.git Then, you can build RAJA like any other CMake project, provided you have a C++ compiler that supports the C++11 standard. The simplest way to build the code @@ -62,7 +62,7 @@ Example Applications The [RAJA-examples](https://github.com/LLNL/RAJA-examples) repository contains three proxy applications that use the RAJA programming model. These applications can be built along with the rest of the -RAJA framework by setting `-DRAJA_ENABLE_APPLICATIONS=On` when running CMake. +RAJA framework by setting `-DENABLE_APPLICATIONS=On` when running CMake. When this option is passed to CMake, the RAJA-examples repository is cloned using `git` to the directory `extra/llnl-raja-proxies` in the project root. The example applications will be built using the same configuration that the RAJA library uses. @@ -110,31 +110,24 @@ The original developers of RAJA are: * Rich Hornung (hornung1@llnl.gov) * Jeff Keasler (keasler1@llnl.gov) -Contributors include: - - * David Beckingsale (beckingsale1@llnl.gov) - * Jason Burmark (burmark1@llnl.gov) - * Holger Jones (jones19@llnl.gov) - * Will Killian (killian4@llnl.gov) - * Adam Kunen (kunen1@llnl.gov) - * Olga Pearce (pearce8@llnl.gov) - * David Poliakoff (poliakoff1@llnl.gov) - * Tom Scogland (scogland1@llnl.gov) +Please see the {RAJA Contributors Page](https://github.com/LLNL/RAJA/graphs/contributors), to see the full list of contributors to the project. Release ----------- -Copyright (c) 2016, Lawrence Livermore National Security, LLC. +Copyright (c) 2016-2017, Lawrence Livermore National Security, LLC. Produced at the Lawrence Livermore National Laboratory. All rights reserved. -Unlimited Open Source - BSD Distribution +`LLNL-CODE-689114` `OCEC-16-063` -For release details and restrictions, please read the LICENSE.txt file. -It is also linked here: -- [LICENSE](./LICENSE.txt) +Unlimited Open Source - BSD Distribution -`LLNL-CODE-689114` `OCEC-16-063` +For release details and restrictions, please read the RELEASE, LICENSE, +and NOTICE files, also linked here: +- [RELEASE](./RELEASE) +- [LICENSE](./LICENSE) +- [NOTICE](./NOTICE) diff --git a/RELEASE b/RELEASE new file mode 100644 index 0000000000..461ec52447 --- /dev/null +++ b/RELEASE @@ -0,0 +1,30 @@ +******************************************************************************* + +RAJA: ................................, version 0.4.0 + +Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +Produced at the Lawrence Livermore National Laboratory. +All rights reserved. See details in RAJA/LICENSE and RAJA/NOTICE files. + +Unlimited Open Source - BSD Distribution +LLNL-CODE-689114 +OCEC-16-063 + +The original developers of RAJA are: + +Rich Hornung (hornung1@llnl.gov) +Jeff Keasler (keasler1@llnl.gov) + +Contributors include: + +David Beckingsale (beckingsale1@llnl.gov) +Jason Burmark (burmark1@llnl.gov) +Matt Cordery (cordery1@llnl.gov) +Jeff Hammond (jeff.science@gmail.com) +Holger Jones (jones19@llnl.gov) +Will Killian (killian4@llnl.gov) +Adam Kunen (kunen1@llnl.gov) +Olga Pearce (pearce8@llnl.gov) +David Poliakoff (poliakoff1@llnl.gov) +Tom Scogland (scogland1@llnl.gov) +Arturo Vargas (vargas45@llnl.gov) diff --git a/appveyor.yml b/appveyor.yml index 5de70791d4..947fb05695 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -3,6 +3,10 @@ skip_branch_with_pr: true image: Visual Studio 2017 build_script: - cmd: >- + git submodule init + + git submodule update + mkdir build cd build diff --git a/blt b/blt new file mode 160000 index 0000000000..949f45ae30 --- /dev/null +++ b/blt @@ -0,0 +1 @@ +Subproject commit 949f45ae3041bea0072f0bdfd9d53409f03e7201 diff --git a/cmake/CompilerCompatibility.cmake b/cmake/CompilerCompatibility.cmake deleted file mode 100644 index 709a7d5bac..0000000000 --- a/cmake/CompilerCompatibility.cmake +++ /dev/null @@ -1,114 +0,0 @@ -############################################################################### -# Copyright (c) 2016, Lawrence Livermore National Security, LLC. -# -# Produced at the Lawrence Livermore National Laboratory -# -# LLNL-CODE-689114 -# -# All rights reserved. -# -# This file is part of RAJA. -# -# For additional details, please also read RAJA/LICENSE. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the disclaimer below. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the disclaimer (as noted below) in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the name of the LLNS/LLNL nor the names of its contributors may -# be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -# LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -############################################################################### - -include(CheckCXXSourceCompiles) - -set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) -if (NOT MSVC) - if (CMAKE_CXX_COMPILER_ID MATCHES INTEL) - set (CMAKE_REQUIRED_FLAGS "${COMMON_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG}") - else () - set (CMAKE_REQUIRED_FLAGS "${COMMON_FLAGS} -std=c++11") - endif() -endif() - -CHECK_CXX_SOURCE_COMPILES( -"#include -#include - -template -struct signed_limits { - static constexpr T min() - { - return static_cast(1llu << ((8llu * sizeof(T)) - 1llu)); - } - static constexpr T max() - { - return static_cast(~(1llu << ((8llu * sizeof(T)) - 1llu))); - } -}; - -template -struct unsigned_limits { - static constexpr T min() - { - return static_cast(0); - } - static constexpr T max() - { - return static_cast(0xFFFFFFFFFFFFFFFF); - } -}; - -template -struct limits : public std::conditional< - std::is_signed::value, - signed_limits, - unsigned_limits>::type { -}; - -template -void check() { - static_assert(limits::min() == std::numeric_limits::min(), \"min failed\"); - static_assert(limits::max() == std::numeric_limits::max(), \"max failed\"); -} - -int main() { - check(); - check(); - check(); - check(); - check(); - check(); - check(); - check(); - check(); - check(); - check(); - check(); -}" check_power_of_two_integral_types) - -set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) - -if(NOT check_power_of_two_integral_types) - message(FATAL_ERROR "RAJA fast limits are unsupported for your compiler/architecture") -endif() diff --git a/cmake/RAJAMacros.cmake b/cmake/RAJAMacros.cmake index 413f6630a9..519a009347 100644 --- a/cmake/RAJAMacros.cmake +++ b/cmake/RAJAMacros.cmake @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2016, Lawrence Livermore National Security, LLC. +# Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. # # Produced at the Lawrence Livermore National Laboratory # @@ -9,106 +9,51 @@ # # This file is part of RAJA. # -# For additional details, please also read RAJA/LICENSE. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the disclaimer below. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the disclaimer (as noted below) in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the name of the LLNS/LLNL nor the names of its contributors may -# be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -# LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. +# For details about use and distribution, please read RAJA/LICENSE. # ############################################################################### macro(raja_add_executable) set(options ) - set(singleValueArgs NAME) + set(singleValueArgs NAME TEST) set(multiValueArgs SOURCES DEPENDS_ON) cmake_parse_arguments(arg "${options}" "${singleValueArgs}" "${multiValueArgs}" ${ARGN}) - if (RAJA_ENABLE_CHAI) + list (APPEND arg_DEPENDS_ON RAJA) + + if (ENABLE_CHAI) list (APPEND arg_DEPENDS_ON chai) endif () - if (RAJA_ENABLE_CUDA) - if (RAJA_ENABLE_CLANG_CUDA) - add_executable(${arg_NAME} ${arg_SOURCES}) - target_compile_options(${arg_NAME} PRIVATE - -x cuda --cuda-gpu-arch=${RAJA_CUDA_ARCH} --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}) - target_include_directories(${arg_NAME} - PUBLIC ${EXPT_CUDA_INCLUDE_LOCATION}) - target_link_libraries(${arg_NAME} ${CUDA_LIBRARIES} RAJA ${arg_DEPENDS_ON}) - else () - set_source_files_properties( - ${arg_SOURCES} - PROPERTIES - CUDA_SOURCE_PROPERTY_FORMAT OBJ) - cuda_add_executable(${arg_NAME} ${arg_SOURCES}) - target_link_libraries(${arg_NAME} PUBLIC RAJA ${arg_DEPENDS_ON}) - endif() - else () - add_executable(${arg_NAME} ${arg_SOURCES}) - target_link_libraries(${arg_NAME} RAJA ${arg_DEPENDS_ON}) - endif() -endmacro(raja_add_executable) - -macro(raja_add_library) - set(options ) - set(singleValueArgs NAME) - set(multiValueArgs SOURCES DEPENDS_ON) - - cmake_parse_arguments(arg - "${options}" "${singleValueArgs}" "${multiValueArgs}" ${ARGN}) - - if (RAJA_ENABLE_CHAI) - list (APPEND arg_DEPENDS_ON chai) + if (ENABLE_OPENMP) + list (APPEND arg_DEPENDS_ON openmp) endif () - if (RAJA_ENABLE_CUDA) - if (RAJA_ENABLE_CLANG_CUDA) + if (ENABLE_CUDA) + list (APPEND arg_DEPENDS_ON cuda) + endif () - add_library(${arg_NAME} ${arg_SOURCES}) - target_compile_options(${arg_NAME} PRIVATE - -x cuda --cuda-gpu-arch=${RAJA_CUDA_ARCH} --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}) - target_include_directories(${arg_NAME} - PUBLIC ${EXPT_CUDA_INCLUDE_LOCATION}) - target_link_libraries(${arg_NAME} ${CUDA_LIBRARIES}) + if (ENABLE_TBB) + list (APPEND arg_DEPENDS_ON tbb) + endif () - else () - set_source_files_properties( - ${arg_SOURCES} - PROPERTIES - CUDA_SOURCE_PROPERTY_FORMAT OBJ) + message(STATUS "${arg_NAME} building with depends: ${arg_DEPENDS_ON}") - cuda_add_library(${arg_NAME} ${arg_SOURCES}) - endif () + if (${arg_TEST}) + set (_output_dir test) else () - add_library(${arg_NAME} ${arg_SOURCES}) - endif () + set (_output_dir bin) + endif() -endmacro(raja_add_library) + blt_add_executable( + NAME ${arg_NAME} + SOURCES ${arg_SOURCES} + DEPENDS_ON ${arg_DEPENDS_ON} + OUTPUT_DIR ${_output_dir} + ) +endmacro(raja_add_executable) macro(raja_add_test) set(options ) @@ -118,13 +63,16 @@ macro(raja_add_test) cmake_parse_arguments(arg "${options}" "${singleValueArgs}" "${multiValueArgs}" ${ARGN}) - list (APPEND arg_DEPENDS_ON gtest gtest_main ${CMAKE_THREAD_LIBS_INIT}) + list (APPEND arg_DEPENDS_ON gtest ${CMAKE_THREAD_LIBS_INIT}) raja_add_executable( NAME ${arg_NAME}.exe SOURCES ${arg_SOURCES} - DEPENDS_ON ${arg_DEPENDS_ON}) + DEPENDS_ON ${arg_DEPENDS_ON} + TEST On) - add_test(NAME ${arg_NAME} - COMMAND ${TEST_DRIVER} $) + blt_add_test( + NAME ${arg_NAME} + #COMMAND ${TEST_DRIVER} $) + COMMAND ${TEST_DRIVER} ${arg_NAME}) endmacro(raja_add_test) diff --git a/cmake/SetupBasics.cmake b/cmake/SetupBasics.cmake index cd11ee52d9..ef6bec528f 100644 --- a/cmake/SetupBasics.cmake +++ b/cmake/SetupBasics.cmake @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2016, Lawrence Livermore National Security, LLC. +# Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. # # Produced at the Lawrence Livermore National Laboratory # @@ -9,44 +9,10 @@ # # This file is part of RAJA. # -# For additional details, please also read RAJA/LICENSE. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the disclaimer below. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the disclaimer (as noted below) in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the name of the LLNS/LLNL nor the names of its contributors may -# be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -# LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. +# For details about use and distribution, please read RAJA/LICENSE. # ############################################################################### -# Don't allow in-source builds -if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) - message(FATAL_ERROR "In-source builds are not supported. Please remove \ - CMakeCache.txt from the 'src' dir and configure an out-of-source build in \ - another directory.") - endif() - if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, \ options are: Debug Release RelWithDebInfo" FORCE) diff --git a/cmake/SetupCompilers.cmake b/cmake/SetupCompilers.cmake index 5a719b9bf9..d80d5e2552 100644 --- a/cmake/SetupCompilers.cmake +++ b/cmake/SetupCompilers.cmake @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2016, Lawrence Livermore National Security, LLC. +# Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. # # Produced at the Lawrence Livermore National Laboratory # @@ -9,34 +9,7 @@ # # This file is part of RAJA. # -# For additional details, please also read RAJA/LICENSE. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the disclaimer below. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the disclaimer (as noted below) in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the name of the LLNS/LLNL nor the names of its contributors may -# be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -# LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. +# For details about use and distribution, please read RAJA/LICENSE. # ############################################################################### @@ -45,6 +18,7 @@ set(COMPILERS_KNOWN_TO_CMAKE33 AppleClang Clang GNU MSVC) include(CheckCXXCompilerFlag) if(RAJA_CXX_STANDARD_FLAG MATCHES default) if("cxx_std_17" IN_LIST CMAKE_CXX_KNOWN_FEATURES) + #TODO set BLT_CXX_STANDARD set(CMAKE_CXX_STANDARD 17) elseif("cxx_std_14" IN_LIST CMAKE_CXX_KNOWN_FEATURES) set(CMAKE_CXX_STANDARD 14) @@ -71,16 +45,16 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0" CACHE STRING "") -if (RAJA_ENABLE_WARNINGS) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") -endif () +if (RAJA_ENABLE_MODULES AND CMAKE_CXX_COMPILER_ID MATCHES Clang) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmodules") +endif() if (CMAKE_CXX_COMPILER_ID MATCHES GNU) if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9) message(FATAL_ERROR "RAJA requires GCC 4.9 or greater!") endif () - if (RAJA_ENABLE_COVERAGE) - if(NOT RAJA_ENABLE_CUDA) + if (ENABLE_COVERAGE) + if(NOT ENABLE_CUDA) message(INFO "Coverage analysis enabled") set(CMAKE_CXX_FLAGS "-coverage ${CMAKE_CXX_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "-coverage ${CMAKE_EXE_LINKER_FLAGS}") @@ -102,8 +76,7 @@ if ( MSVC ) endif() endif() -if (RAJA_ENABLE_CUDA) - +if (ENABLE_CUDA) if ( NOT DEFINED RAJA_NVCC_STD ) set(RAJA_NVCC_STD "c++11") # When we require cmake 3.8+, replace this with setting CUDA_STANDARD @@ -119,28 +92,29 @@ if (RAJA_ENABLE_CUDA) endif() if (NOT RAJA_HOST_CONFIG_LOADED) - if(CMAKE_BUILD_TYPE MATCHES Release) - set(RAJA_NVCC_FLAGS -O2; -restrict; -arch ${RAJA_CUDA_ARCH}; -std ${RAJA_NVCC_STD}; --expt-extended-lambda; -ccbin; ${CMAKE_CXX_COMPILER} CACHE LIST "") - elseif(CMAKE_BUILD_TYPE MATCHES Debug) - set(RAJA_NVCC_FLAGS -g; -G; -O0; -restrict; -arch ${RAJA_CUDA_ARCH}; -std ${RAJA_NVCC_STD}; --expt-extended-lambda; -ccbin ${CMAKE_CXX_COMPILER} CACHE LIST "") - elseif(CMAKE_BUILD_TYPE MATCHES MinSizeRel) - set(RAJA_NVCC_FLAGS -Os; -restrict; -arch ${RAJA_CUDA_ARCH}; -std ${RAJA_NVCC_STD}; --expt-extended-lambda; -ccbin; ${CMAKE_CXX_COMPILER} CACHE LIST "") - else() # CMAKE_BUILD_TYPE MATCHES RelWithDebInfo) - set(RAJA_NVCC_FLAGS -g; -G; -O2; -restrict; -arch ${RAJA_CUDA_ARCH}; -std ${RAJA_NVCC_STD}; --expt-extended-lambda; -ccbin ${CMAKE_CXX_COMPILER} CACHE LIST "") - endif() + list(APPEND RAJA_EXTRA_NVCC_FLAGS -restrict; -arch ${CUDA_ARCH}; -std ${RAJA_NVCC_STD}; --expt-extended-lambda; -ccbin; ${CMAKE_CXX_COMPILER}) + + set(RAJA_NVCC_FLAGS_RELEASE -O2 CACHE STRING "") + set(RAJA_NVCC_FLAGS_DEBUG -g; -G; -O0 CACHE STRING "") + set(RAJA_NVCC_FLAGS_MINSIZEREL -Os CACHE STRING "") + set(RAJA_NVCC_FLAGS_RELWITHDEBINFO -g; -G; -O2 CACHE STRING "") if(RAJA_ENABLE_COVERAGE) if (CMAKE_CXX_COMPILER_ID MATCHES GNU) message(INFO "Coverage analysis enabled") - set(RAJA_NVCC_FLAGS ${RAJA_NVCC_FLAGS}; -Xcompiler -coverage; -Xlinker -coverage) + set(RAJA_EXTRA_NVCC_FLAGS ${RAJA_EXTRA_NVCC_FLAGS}; -Xcompiler -coverage; -Xlinker -coverage) set(CMAKE_EXE_LINKER_FLAGS "-coverage ${CMAKE_EXE_LINKER_FLAGS}") else() message(WARNING "Code coverage specified but not enabled -- GCC was not detected") endif() endif() endif() - + set(RAJA_NVCC_FLAGS ${RAJA_EXTRA_NVCC_FLAGS} CACHE STRING "") set(CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS}) + set(CUDA_NVCC_FLAGS_RELEASE ${RAJA_NVCC_FLAGS_RELEASE}) + set(CUDA_NVCC_FLAGS_DEBUG ${RAJA_NVCC_FLAGS_DEBUG}) + set(CUDA_NVCC_FLAGS_MINSIZEREL ${RAJA_NVCC_FLAGS_MINSIZEREL}) + set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ${RAJA_NVCC_FLAGS_RELWITHDEBINFO}) endif() # end RAJA_ENABLE_CUDA section @@ -148,4 +122,3 @@ set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") set(RAJA_DATA_ALIGN 64 CACHE INT "") set(RAJA_COHERENCE_BLOCK_SIZE 64 CACHE INT "") - diff --git a/cmake/SetupPackages.cmake b/cmake/SetupPackages.cmake index 88b8ee6343..ef5be0aefa 100644 --- a/cmake/SetupPackages.cmake +++ b/cmake/SetupPackages.cmake @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2016, Lawrence Livermore National Security, LLC. +# Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. # # Produced at the Lawrence Livermore National Laboratory # @@ -9,152 +9,52 @@ # # This file is part of RAJA. # -# For additional details, please also read RAJA/LICENSE. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the disclaimer below. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the disclaimer (as noted below) in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the name of the LLNS/LLNL nor the names of its contributors may -# be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -# LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. +# For details about use and distribution, please read RAJA/LICENSE. # ############################################################################### -if (RAJA_ENABLE_OPENMP) - find_package(OpenMP) +if (ENABLE_OPENMP) if(OPENMP_FOUND) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - list(APPEND RAJA_NVCC_FLAGS -Xcompiler ${OpenMP_CXX_FLAGS}) + list(APPEND RAJA_EXTRA_NVCC_FLAGS -Xcompiler ${OpenMP_CXX_FLAGS}) message(STATUS "OpenMP Enabled") else() message(WARNING "OpenMP NOT FOUND") - set(RAJA_ENABLE_OPENMP Off) + set(ENABLE_OPENMP Off) endif() endif() -if (RAJA_ENABLE_CLANG_CUDA) - set(RAJA_ENABLE_CUDA On) -endif () - -if (RAJA_ENABLE_CUDA) - find_package(CUDA REQUIRED) - set (CUDA_PROPAGATE_HOST_FLAGS OFF) - include_directories(${CUDA_INCLUDE_DIRS}) - - if (RAJA_ENABLE_CUB) - +if (ENABLE_CUDA) + if (ENABLE_CUB) find_package(CUB) - if (CUB_FOUND) - include_directories(${CUB_INCLUDE_DIRS}) + blt_register_library( + NAME cub + INCLUDES ${CUB_INCLUDE_DIRS}) else() message(WARNING "Using deprecated Thrust backend for CUDA scans.\n Please set CUB_DIR for better scan performance.") - set(RAJA_ENABLE_CUB False) + set(ENABLE_CUB Off) endif() endif() -endif() - +endif () -if (RAJA_ENABLE_TBB) +if (ENABLE_TBB) find_package(TBB) if(TBB_FOUND) - include_directories(${TBB_INCLUDE_DIRS}) + blt_register_library( + NAME tbb + INCLUDES ${TBB_INCLUDE_DIRS} + LIBRARIES ${TBB_LIBRARIES}) message(STATUS "TBB Enabled") else() message(WARNING "TBB NOT FOUND") - set(RAJA_ENABLE_TBB Off) + set(ENABLE_TBB Off) endif() endif () -if (RAJA_ENABLE_TESTS) - -# -# This conditional prevents build problems resulting from BLT and -# RAJA each having their own copy of googletest. -# -if (RAJA_BUILD_WITH_BLT) -else() - - include(ExternalProject) - # Set default ExternalProject root directory - SET_DIRECTORY_PROPERTIES(PROPERTIES EP_PREFIX ${CMAKE_BINARY_DIR}/tpl) - - ExternalProject_Add( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.7.0 - CMAKE_ARGS - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_CXX_COMPILER_ARG1=${CMAKE_CXX_COMPILER_ARG1} - INSTALL_COMMAND "" - LOG_DOWNLOAD ON - LOG_CONFIGURE ON - LOG_BUILD ON) - - ExternalProject_Get_Property(googletest source_dir) - include_directories(${source_dir}/include) - - ExternalProject_Get_Property(googletest binary_dir) - add_library(gtest UNKNOWN IMPORTED) - add_library(gtest_main UNKNOWN IMPORTED) - - if ( UNIX ) - set_target_properties(gtest PROPERTIES - IMPORTED_LOCATION ${binary_dir}/libgtest.a - ) - set_target_properties(gtest_main PROPERTIES - IMPORTED_LOCATION ${binary_dir}/libgtest_main.a - ) - elseif( WIN32 ) - set_target_properties(gtest PROPERTIES - IMPORTED_LOCATION ${binary_dir}/${CMAKE_BUILD_TYPE}/gtest.lib - ) - set_target_properties(gtest_main PROPERTIES - IMPORTED_LOCATION ${binary_dir}/${CMAKE_BUILD_TYPE}/gtest_main.lib - ) - endif () - add_dependencies(gtest googletest) - add_dependencies(gtest_main googletest) - - # GoogleTest requires threading - find_package(Threads) - - enable_testing() -endif () - -endif () - -if (RAJA_ENABLE_DOCUMENTATION) - find_package(Sphinx) - find_package(Doxygen) -endif () - -if (RAJA_ENABLE_CHAI) +if (ENABLE_CHAI) message(STATUS "CHAI enabled") - find_package(chai) - include_directories(${CHAI_INCLUDE_DIRS}) endif() diff --git a/cmake/SetupRajaConfig.cmake b/cmake/SetupRajaConfig.cmake index efa1bc59a7..ab9e562573 100644 --- a/cmake/SetupRajaConfig.cmake +++ b/cmake/SetupRajaConfig.cmake @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2016, Lawrence Livermore National Security, LLC. +# Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. # # Produced at the Lawrence Livermore National Laboratory # @@ -9,34 +9,7 @@ # # This file is part of RAJA. # -# For additional details, please also read RAJA/LICENSE. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the disclaimer below. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the disclaimer (as noted below) in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the name of the LLNS/LLNL nor the names of its contributors may -# be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -# LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. +# For details about use and distribution, please read RAJA/LICENSE. # ############################################################################### @@ -48,7 +21,7 @@ option(RAJA_USE_FLOAT Off) option(RAJA_USE_COMPLEX Off) ## Pointer options -if (RAJA_ENABLE_CUDA) +if (ENABLE_CUDA) set(RAJA_PTR "RAJA_USE_BARE_PTR") else () set(RAJA_PTR "RAJA_USE_RESTRICT_PTR") @@ -59,7 +32,7 @@ endif() #set(RAJA_USE_PTR_CLASS OFF) ## Fault tolerance options -option(RAJA_ENABLE_FT "Enable fault-tolerance features" OFF) +option(ENABLE_FT "Enable fault-tolerance features" OFF) option(RAJA_REPORT_FT "Report on use of fault-tolerant features" OFF) ## Timer options @@ -88,6 +61,15 @@ check_function_exists(posix_memalign RAJA_HAVE_POSIX_MEMALIGN) check_function_exists(aligned_alloc RAJA_HAVE_ALIGNED_ALLOC) check_function_exists(_mm_malloc RAJA_HAVE_MM_MALLOC) +# Set up RAJA_ENABLE prefixed options +set(RAJA_ENABLE_OPENMP ${ENABLE_OPENMP}) +set(RAJA_ENABLE_TARGET_OPENMP ${ENABLE_TARGET_OPENMP}) +set(RAJA_ENABLE_TBB ${ENABLE_TBB}) +set(RAJA_ENABLE_CUDA ${ENABLE_CUDA}) +set(RAJA_ENABLE_CLANG_CUDA ${ENABLE_CLANG_CUDA}) +set(RAJA_ENABLE_CHAI ${ENABLE_CHAI}) +set(RAJA_ENABLE_CUB ${ENABLE_CUB}) + # Configure a header file with all the variables we found. configure_file(${PROJECT_SOURCE_DIR}/include/RAJA/config.hpp.in ${PROJECT_BINARY_DIR}/include/RAJA/config.hpp) @@ -109,7 +91,7 @@ if(PKG_CONFIG_FOUND) foreach(INCDIR ${INCLUDE_DIRECTORIES} ${CUDA_INCLUDE_DIRS}) set(PC_C_FLAGS "${PC_C_FLAGS} -I${INCDIR}") endforeach() - if(RAJA_ENABLE_CUDA) + if(ENABLE_CUDA) foreach(FLAG ${RAJA_NVCC_FLAGS}) set(PC_C_FLAGS "${PC_C_FLAGS} ${FLAG}") endforeach() diff --git a/cmake/thirdparty/FindCUDA.cmake b/cmake/thirdparty/FindCUDA.cmake deleted file mode 100644 index ebfd24ab29..0000000000 --- a/cmake/thirdparty/FindCUDA.cmake +++ /dev/null @@ -1,1917 +0,0 @@ -#.rst: -# FindCUDA -# -------- -# -# Tools for building CUDA C files: libraries and build dependencies. -# -# This script locates the NVIDIA CUDA C tools. It should work on linux, -# windows, and mac and should be reasonably up to date with CUDA C -# releases. -# -# This script makes use of the standard find_package arguments of -# , REQUIRED and QUIET. CUDA_FOUND will report if an -# acceptable version of CUDA was found. -# -# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if -# the prefix cannot be determined by the location of nvcc in the system -# path and REQUIRED is specified to find_package(). To use a different -# installed version of the toolkit set the environment variable -# CUDA_BIN_PATH before running cmake (e.g. -# CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default -# /usr/local/cuda) or set CUDA_TOOLKIT_ROOT_DIR after configuring. If -# you change the value of CUDA_TOOLKIT_ROOT_DIR, various components that -# depend on the path will be relocated. -# -# It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain -# platforms, or to use a cuda runtime not installed in the default -# location. In newer versions of the toolkit the cuda library is -# included with the graphics driver- be sure that the driver version -# matches what is needed by the cuda runtime version. -# -# The following variables affect the behavior of the macros in the -# script (in alphebetical order). Note that any of these flags can be -# changed multiple times in the same directory before calling -# CUDA_ADD_EXECUTABLE, CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX, -# CUDA_COMPILE_FATBIN, CUDA_COMPILE_CUBIN or CUDA_WRAP_SRCS:: -# -# CUDA_64_BIT_DEVICE_CODE (Default matches host bit size) -# -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code. -# Note that making this different from the host code when generating object -# or C files from CUDA code just won't work, because size_t gets defined by -# nvcc in the generated source. If you compile to PTX and then load the -# file yourself, you can mix bit sizes between device and host. -# -# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON) -# -- Set to ON if you want the custom build rule to be attached to the source -# file in Visual Studio. Turn OFF if you add the same cuda file to multiple -# targets. -# -# This allows the user to build the target from the CUDA file; however, bad -# things can happen if the CUDA source file is added to multiple targets. -# When performing parallel builds it is possible for the custom build -# command to be run more than once and in parallel causing cryptic build -# errors. VS runs the rules for every source file in the target, and a -# source can have only one rule no matter how many projects it is added to. -# When the rule is run from multiple targets race conditions can occur on -# the generated file. Eventually everything will get built, but if the user -# is unaware of this behavior, there may be confusion. It would be nice if -# this script could detect the reuse of source files across multiple targets -# and turn the option off for the user, but no good solution could be found. -# -# CUDA_BUILD_CUBIN (Default OFF) -# -- Set to ON to enable and extra compilation pass with the -cubin option in -# Device mode. The output is parsed and register, shared memory usage is -# printed during build. -# -# CUDA_BUILD_EMULATION (Default OFF for device mode) -# -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files -# when CUDA_BUILD_EMULATION is TRUE. -# -# CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR) -# -- Set to the path you wish to have the generated files placed. If it is -# blank output files will be placed in CMAKE_CURRENT_BINARY_DIR. -# Intermediate files will always be placed in -# CMAKE_CURRENT_BINARY_DIR/CMakeFiles. -# -# CUDA_HOST_COMPILATION_CPP (Default ON) -# -- Set to OFF for C compilation of host code. -# -# CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS) -# -- Set the host compiler to be used by nvcc. Ignored if -ccbin or -# --compiler-bindir is already present in the CUDA_NVCC_FLAGS or -# CUDA_NVCC_FLAGS_ variables. For Visual Studio targets -# $(VCInstallDir)/bin is a special value that expands out to the path when -# the command is run from within VS. -# -# CUDA_NVCC_FLAGS -# CUDA_NVCC_FLAGS_ -# -- Additional NVCC command line arguments. NOTE: multiple arguments must be -# semi-colon delimited (e.g. --compiler-options;-Wall) -# -# CUDA_PROPAGATE_HOST_FLAGS (Default ON) -# -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration -# dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the -# host compiler through nvcc's -Xcompiler flag. This helps make the -# generated host code match the rest of the system better. Sometimes -# certain flags give nvcc problems, and this will help you turn the flag -# propagation off. This does not affect the flags supplied directly to nvcc -# via CUDA_NVCC_FLAGS or through the OPTION flags specified through -# CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS. Flags used for -# shared library compilation are not affected by this flag. -# -# CUDA_SEPARABLE_COMPILATION (Default OFF) -# -- If set this will enable separable compilation for all CUDA runtime object -# files. If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY -# (e.g. calling CUDA_WRAP_SRCS directly), -# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called. -# -# CUDA_SOURCE_PROPERTY_FORMAT -# -- If this source file property is set, it can override the format specified -# to CUDA_WRAP_SRCS (OBJ, PTX, CUBIN, or FATBIN). If an input source file -# is not a .cu file, setting this file will cause it to be treated as a .cu -# file. See documentation for set_source_files_properties on how to set -# this property. -# -# CUDA_USE_STATIC_CUDA_RUNTIME (Default ON) -# -- When enabled the static version of the CUDA runtime library will be used -# in CUDA_LIBRARIES. If the version of CUDA configured doesn't support -# this option, then it will be silently disabled. -# -# CUDA_VERBOSE_BUILD (Default OFF) -# -- Set to ON to see all the commands used when building the CUDA file. When -# using a Makefile generator the value defaults to VERBOSE (run make -# VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will -# always print the output. -# -# The script creates the following macros (in alphebetical order):: -# -# CUDA_ADD_CUFFT_TO_TARGET( cuda_target ) -# -- Adds the cufft library to the target (can be any target). Handles whether -# you are in emulation mode or not. -# -# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target ) -# -- Adds the cublas library to the target (can be any target). Handles -# whether you are in emulation mode or not. -# -# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ... -# [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) -# -- Creates an executable "cuda_target" which is made up of the files -# specified. All of the non CUDA C files are compiled using the standard -# build rules specified by CMAKE and the cuda files are compiled to object -# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is -# added automatically to include_directories(). Some standard CMake target -# calls can be used on the target after calling this macro -# (e.g. set_target_properties and target_link_libraries), but setting -# properties that adjust compilation flags will not affect code compiled by -# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS. -# -# CUDA_ADD_LIBRARY( cuda_target file0 file1 ... -# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) -# -- Same as CUDA_ADD_EXECUTABLE except that a library is created. -# -# CUDA_BUILD_CLEAN_TARGET() -# -- Creates a convience target that deletes all the dependency files -# generated. You should make clean after running this target to ensure the -# dependency files get regenerated. -# -# CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE] -# [OPTIONS ...] ) -# -- Returns a list of generated files from the input source files to be used -# with ADD_LIBRARY or ADD_EXECUTABLE. -# -# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] ) -# -- Returns a list of PTX files generated from the input source files. -# -# CUDA_COMPILE_FATBIN( generated_files file0 file1 ... [OPTIONS ...] ) -# -- Returns a list of FATBIN files generated from the input source files. -# -# CUDA_COMPILE_CUBIN( generated_files file0 file1 ... [OPTIONS ...] ) -# -- Returns a list of CUBIN files generated from the input source files. -# -# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var -# cuda_target -# object_files ) -# -- Compute the name of the intermediate link file used for separable -# compilation. This file name is typically passed into -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS. output_file_var is produced -# based on cuda_target the list of objects files that need separable -# compilation as specified by object_files. If the object_files list is -# empty, then output_file_var will be empty. This function is called -# automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE. Note that -# this is a function and not a macro. -# -# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... ) -# -- Sets the directories that should be passed to nvcc -# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu -# files. -# -# -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target -# nvcc_flags object_files) -# -- Generates the link object required by separable compilation from the given -# object files. This is called automatically for CUDA_ADD_EXECUTABLE and -# CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS -# directly. When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the -# nvcc_flags passed in are the same as the flags passed in via the OPTIONS -# argument. The only nvcc flag added automatically is the bitness flag as -# specified by CUDA_64_BIT_DEVICE_CODE. Note that this is a function -# instead of a macro. -# -# CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures]) -# -- Selects GPU arch flags for nvcc based on target_CUDA_architectures -# target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...) -# - "Auto" detects local machine GPU compute arch at runtime. -# - "Common" and "All" cover common and entire subsets of architectures -# ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX -# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal -# NUM: Any number. Only those pairs are currently accepted by NVCC though: -# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 -# Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable} -# Additionally, sets ${out_variable}_readable to the resulting numeric list -# Example: -# CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell) -# LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS}) -# -# More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA -# Note that this is a function instead of a macro. -# -# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ... -# [STATIC | SHARED | MODULE] [OPTIONS ...] ) -# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this -# function under the hood. -# -# Given the list of files (file0 file1 ... fileN) this macro generates -# custom commands that generate either PTX or linkable objects (use "PTX" or -# "OBJ" for the format argument to switch). Files that don't end with .cu -# or have the HEADER_FILE_ONLY property are ignored. -# -# The arguments passed in after OPTIONS are extra command line options to -# give to nvcc. You can also specify per configuration options by -# specifying the name of the configuration followed by the options. General -# options must precede configuration specific options. Not all -# configurations need to be specified, only the ones provided will be used. -# -# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag" -# DEBUG -g -# RELEASE --use_fast_math -# RELWITHDEBINFO --use_fast_math;-g -# MINSIZEREL --use_fast_math -# -# For certain configurations (namely VS generating object files with -# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will -# be produced for the given cuda file. This is because when you add the -# cuda file to Visual Studio it knows that this file produces an object file -# and will link in the resulting object file automatically. -# -# This script will also generate a separate cmake script that is used at -# build time to invoke nvcc. This is for several reasons. -# -# 1. nvcc can return negative numbers as return values which confuses -# Visual Studio into thinking that the command succeeded. The script now -# checks the error codes and produces errors when there was a problem. -# -# 2. nvcc has been known to not delete incomplete results when it -# encounters problems. This confuses build systems into thinking the -# target was generated when in fact an unusable file exists. The script -# now deletes the output files if there was an error. -# -# 3. By putting all the options that affect the build into a file and then -# make the build rule dependent on the file, the output files will be -# regenerated when the options change. -# -# This script also looks at optional arguments STATIC, SHARED, or MODULE to -# determine when to target the object compilation for a shared library. -# BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in -# CUDA_ADD_LIBRARY. On some systems special flags are added for building -# objects intended for shared libraries. A preprocessor macro, -# _EXPORTS is defined when a shared library compilation is -# detected. -# -# Flags passed into add_definitions with -D or /D are passed along to nvcc. -# -# -# -# The script defines the following variables:: -# -# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc. -# CUDA_VERSION_MINOR -- The minor version. -# CUDA_VERSION -# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR -# CUDA_HAS_FP16 -- Whether a short float (float16,fp16) is supported. -# -# CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set). -# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the -# SDK. This script will not directly support finding -# specific libraries or headers, as that isn't -# supported by NVIDIA. If you want to change -# libraries when the path changes see the -# FindCUDA.cmake script for an example of how to clear -# these variables. There are also examples of how to -# use the CUDA_SDK_ROOT_DIR to locate headers or -# libraries, if you so choose (at your own risk). -# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically -# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY. -# CUDA_LIBRARIES -- Cuda RT library. -# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT -# implementation (alternative to: -# CUDA_ADD_CUFFT_TO_TARGET macro) -# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS -# implementation (alternative to: -# CUDA_ADD_CUBLAS_TO_TARGET macro). -# CUDA_cudart_static_LIBRARY -- Statically linkable cuda runtime library. -# Only available for CUDA version 5.5+ -# CUDA_cudadevrt_LIBRARY -- Device runtime library. -# Required for separable compilation. -# CUDA_cupti_LIBRARY -- CUDA Profiling Tools Interface library. -# Only available for CUDA version 4.0+. -# CUDA_curand_LIBRARY -- CUDA Random Number Generation library. -# Only available for CUDA version 3.2+. -# CUDA_cusolver_LIBRARY -- CUDA Direct Solver library. -# Only available for CUDA version 7.0+. -# CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library. -# Only available for CUDA version 3.2+. -# CUDA_npp_LIBRARY -- NVIDIA Performance Primitives lib. -# Only available for CUDA version 4.0+. -# CUDA_nppc_LIBRARY -- NVIDIA Performance Primitives lib (core). -# Only available for CUDA version 5.5+. -# CUDA_nppi_LIBRARY -- NVIDIA Performance Primitives lib (image processing). -# Only available for CUDA version 5.5+. -# CUDA_npps_LIBRARY -- NVIDIA Performance Primitives lib (signal processing). -# Only available for CUDA version 5.5+. -# CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library. -# Only available for CUDA version 3.2+. -# Windows only. -# CUDA_nvcuvid_LIBRARY -- CUDA Video Decoder library. -# Only available for CUDA version 3.2+. -# Windows only. -# - -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# Copyright (c) 2007-2009 -# Scientific Computing and Imaging Institute, University of Utah -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. - -# The MIT License -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# -############################################################################### - -# FindCUDA.cmake - -# This macro helps us find the location of helper files we will need the full path to -macro(CUDA_FIND_HELPER_FILE _name _extension) - set(_full_name "${_name}.${_extension}") - # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being - # processed. Using this variable, we can pull out the current path, and - # provide a way to get access to the other files we need local to here. - get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) - set(CUDA_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindCUDA/${_full_name}") - if(NOT EXISTS "${CUDA_${_name}}") - set(error_message "${_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindCUDA") - if(CUDA_FIND_REQUIRED) - message(FATAL_ERROR "${error_message}") - else() - if(NOT CUDA_FIND_QUIETLY) - message(STATUS "${error_message}") - endif() - endif() - endif() - # Set this variable as internal, so the user isn't bugged with it. - set(CUDA_${_name} ${CUDA_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE) -endmacro() - -##################################################################### -## CUDA_INCLUDE_NVCC_DEPENDENCIES -## - -# So we want to try and include the dependency file if it exists. If -# it doesn't exist then we need to create an empty one, so we can -# include it. - -# If it does exist, then we need to check to see if all the files it -# depends on exist. If they don't then we should clear the dependency -# file and regenerate it later. This covers the case where a header -# file has disappeared or moved. - -macro(CUDA_INCLUDE_NVCC_DEPENDENCIES dependency_file) - set(CUDA_NVCC_DEPEND) - set(CUDA_NVCC_DEPEND_REGENERATE FALSE) - - - # Include the dependency file. Create it first if it doesn't exist . The - # INCLUDE puts a dependency that will force CMake to rerun and bring in the - # new info when it changes. DO NOT REMOVE THIS (as I did and spent a few - # hours figuring out why it didn't work. - if(NOT EXISTS ${dependency_file}) - file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n") - endif() - # Always include this file to force CMake to run again next - # invocation and rebuild the dependencies. - #message("including dependency_file = ${dependency_file}") - include(${dependency_file}) - - # Now we need to verify the existence of all the included files - # here. If they aren't there we need to just blank this variable and - # make the file regenerate again. -# if(DEFINED CUDA_NVCC_DEPEND) -# message("CUDA_NVCC_DEPEND set") -# else() -# message("CUDA_NVCC_DEPEND NOT set") -# endif() - if(CUDA_NVCC_DEPEND) - #message("CUDA_NVCC_DEPEND found") - foreach(f ${CUDA_NVCC_DEPEND}) - # message("searching for ${f}") - if(NOT EXISTS ${f}) - #message("file ${f} not found") - set(CUDA_NVCC_DEPEND_REGENERATE TRUE) - endif() - endforeach() - else() - #message("CUDA_NVCC_DEPEND false") - # No dependencies, so regenerate the file. - set(CUDA_NVCC_DEPEND_REGENERATE TRUE) - endif() - - #message("CUDA_NVCC_DEPEND_REGENERATE = ${CUDA_NVCC_DEPEND_REGENERATE}") - # No incoming dependencies, so we need to generate them. Make the - # output depend on the dependency file itself, which should cause the - # rule to re-run. - if(CUDA_NVCC_DEPEND_REGENERATE) - set(CUDA_NVCC_DEPEND ${dependency_file}) - #message("Generating an empty dependency_file: ${dependency_file}") - file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n") - endif() - -endmacro() - -############################################################################### -############################################################################### -# Setup variables' defaults -############################################################################### -############################################################################### - -# Allow the user to specify if the device code is supposed to be 32 or 64 bit. -if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(CUDA_64_BIT_DEVICE_CODE_DEFAULT ON) -else() - set(CUDA_64_BIT_DEVICE_CODE_DEFAULT OFF) -endif() -option(CUDA_64_BIT_DEVICE_CODE "Compile device code in 64 bit mode" ${CUDA_64_BIT_DEVICE_CODE_DEFAULT}) - -# Attach the build rule to the source file in VS. This option -option(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE "Attach the build rule to the CUDA source file. Enable only when the CUDA source file is added to at most one target." ON) - -# Prints out extra information about the cuda file during compilation -option(CUDA_BUILD_CUBIN "Generate and parse .cubin files in Device mode." OFF) - -# Set whether we are using emulation or device mode. -option(CUDA_BUILD_EMULATION "Build in Emulation mode" OFF) - -# Where to put the generated output. -set(CUDA_GENERATED_OUTPUT_DIR "" CACHE PATH "Directory to put all the output files. If blank it will default to the CMAKE_CURRENT_BINARY_DIR") - -# Parse HOST_COMPILATION mode. -option(CUDA_HOST_COMPILATION_CPP "Generated file extension" ON) - -# Extra user settable flags -set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.") - -if(CMAKE_GENERATOR MATCHES "Visual Studio") - set(CUDA_HOST_COMPILER "$(VCInstallDir)bin" CACHE FILEPATH "Host side compiler used by NVCC") -else() - if(APPLE - AND "${CMAKE_C_COMPILER_ID}" MATCHES "Clang" - AND "${CMAKE_C_COMPILER}" MATCHES "/cc$") - # Using cc which is symlink to clang may let NVCC think it is GCC and issue - # unhandled -dumpspecs option to clang. Also in case neither - # CMAKE_C_COMPILER is defined (project does not use C language) nor - # CUDA_HOST_COMPILER is specified manually we should skip -ccbin and let - # nvcc use its own default C compiler. - # Only care about this on APPLE with clang to avoid - # following symlinks to things like ccache - if(DEFINED CMAKE_C_COMPILER AND NOT DEFINED CUDA_HOST_COMPILER) - get_filename_component(c_compiler_realpath "${CMAKE_C_COMPILER}" REALPATH) - # if the real path does not end up being clang then - # go back to using CMAKE_C_COMPILER - if(NOT "${c_compiler_realpath}" MATCHES "/clang$") - set(c_compiler_realpath "${CMAKE_C_COMPILER}") - endif() - else() - set(c_compiler_realpath "") - endif() - set(CUDA_HOST_COMPILER "${c_compiler_realpath}" CACHE FILEPATH "Host side compiler used by NVCC") - else() - set(CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}" - CACHE FILEPATH "Host side compiler used by NVCC") - endif() -endif() - -# Propagate the host flags to the host compiler via -Xcompiler -option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" ON) - -# Enable CUDA_SEPARABLE_COMPILATION -option(CUDA_SEPARABLE_COMPILATION "Compile CUDA objects with separable compilation enabled. Requires CUDA 5.0+" OFF) - -# Specifies whether the commands used when compiling the .cu file will be printed out. -option(CUDA_VERBOSE_BUILD "Print out the commands run while compiling the CUDA source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF) - -mark_as_advanced( - CUDA_64_BIT_DEVICE_CODE - CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE - CUDA_GENERATED_OUTPUT_DIR - CUDA_HOST_COMPILATION_CPP - CUDA_NVCC_FLAGS - CUDA_PROPAGATE_HOST_FLAGS - CUDA_BUILD_CUBIN - CUDA_BUILD_EMULATION - CUDA_VERBOSE_BUILD - CUDA_SEPARABLE_COMPILATION - ) - -# Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we -# need to add another entry for the CMAKE_BUILD_TYPE. We also need to add the -# standerd set of 4 build types (Debug, MinSizeRel, Release, and RelWithDebInfo) -# for completeness. We need run this loop in order to accomodate the addition -# of extra configuration types. Duplicate entries will be removed by -# REMOVE_DUPLICATES. -set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo) -list(REMOVE_DUPLICATES CUDA_configuration_types) -foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - set(CUDA_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semi-colon delimit multiple arguments.") - mark_as_advanced(CUDA_NVCC_FLAGS_${config_upper}) -endforeach() - -############################################################################### -############################################################################### -# Locate CUDA, Set Build Type, etc. -############################################################################### -############################################################################### - -macro(cuda_unset_include_and_libraries) - unset(CUDA_TOOLKIT_INCLUDE CACHE) - unset(CUDA_CUDART_LIBRARY CACHE) - unset(CUDA_CUDA_LIBRARY CACHE) - # Make sure you run this before you unset CUDA_VERSION. - if(CUDA_VERSION VERSION_EQUAL "3.0") - # This only existed in the 3.0 version of the CUDA toolkit - unset(CUDA_CUDARTEMU_LIBRARY CACHE) - endif() - unset(CUDA_cudart_static_LIBRARY CACHE) - unset(CUDA_cudadevrt_LIBRARY CACHE) - unset(CUDA_cublas_LIBRARY CACHE) - unset(CUDA_cublas_device_LIBRARY CACHE) - unset(CUDA_cublasemu_LIBRARY CACHE) - unset(CUDA_cufft_LIBRARY CACHE) - unset(CUDA_cufftemu_LIBRARY CACHE) - unset(CUDA_cupti_LIBRARY CACHE) - unset(CUDA_curand_LIBRARY CACHE) - unset(CUDA_cusolver_LIBRARY CACHE) - unset(CUDA_cusparse_LIBRARY CACHE) - unset(CUDA_npp_LIBRARY CACHE) - unset(CUDA_nppc_LIBRARY CACHE) - unset(CUDA_nppi_LIBRARY CACHE) - unset(CUDA_npps_LIBRARY CACHE) - unset(CUDA_nvcuvenc_LIBRARY CACHE) - unset(CUDA_nvcuvid_LIBRARY CACHE) - unset(CUDA_USE_STATIC_CUDA_RUNTIME CACHE) - unset(CUDA_GPU_DETECT_OUTPUT CACHE) -endmacro() - -# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed, -# if they have then clear the cache variables, so that will be detected again. -if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") - unset(CUDA_TOOLKIT_TARGET_DIR CACHE) - unset(CUDA_NVCC_EXECUTABLE CACHE) - cuda_unset_include_and_libraries() - unset(CUDA_VERSION CACHE) -endif() - -if(NOT "${CUDA_TOOLKIT_TARGET_DIR}" STREQUAL "${CUDA_TOOLKIT_TARGET_DIR_INTERNAL}") - cuda_unset_include_and_libraries() -endif() - -# -# End of unset() -# - -# -# Start looking for things -# - -# Search for the cuda distribution. -if(NOT CUDA_TOOLKIT_ROOT_DIR AND NOT CMAKE_CROSSCOMPILING) - # Search in the CUDA_BIN_PATH first. - find_path(CUDA_TOOLKIT_ROOT_DIR - NAMES nvcc nvcc.exe - PATHS - ENV CUDA_TOOLKIT_ROOT - ENV CUDA_PATH - ENV CUDA_BIN_PATH - PATH_SUFFIXES bin bin64 - DOC "Toolkit location." - NO_DEFAULT_PATH - ) - - # Now search default paths - find_path(CUDA_TOOLKIT_ROOT_DIR - NAMES nvcc nvcc.exe - PATHS /opt/cuda/bin - /usr/local/bin - /usr/local/cuda/bin - DOC "Toolkit location." - ) - - if (CUDA_TOOLKIT_ROOT_DIR) - string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR}) - # We need to force this back into the cache. - set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE) - set(CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR}) - endif() - - if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR}) - if(CUDA_FIND_REQUIRED) - message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR") - elseif(NOT CUDA_FIND_QUIETLY) - message("CUDA_TOOLKIT_ROOT_DIR not found or specified") - endif() - endif () -endif () - -if(CMAKE_CROSSCOMPILING) - SET (CUDA_TOOLKIT_ROOT $ENV{CUDA_TOOLKIT_ROOT}) - if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a") - # Support for NVPACK - set (CUDA_TOOLKIT_TARGET_NAME "armv7-linux-androideabi") - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") - # Support for arm cross compilation - set(CUDA_TOOLKIT_TARGET_NAME "armv7-linux-gnueabihf") - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") - # Support for aarch64 cross compilation - if (ANDROID_ARCH_NAME STREQUAL "arm64") - set(CUDA_TOOLKIT_TARGET_NAME "aarch64-linux-androideabi") - else() - set(CUDA_TOOLKIT_TARGET_NAME "aarch64-linux") - endif (ANDROID_ARCH_NAME STREQUAL "arm64") - endif() - - if (EXISTS "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}") - set(CUDA_TOOLKIT_TARGET_DIR "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}" CACHE PATH "CUDA Toolkit target location.") - SET (CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT}) - mark_as_advanced(CUDA_TOOLKIT_TARGET_DIR) - endif() - - # add known CUDA targetr root path to the set of directories we search for programs, libraries and headers - set( CMAKE_FIND_ROOT_PATH "${CUDA_TOOLKIT_TARGET_DIR};${CMAKE_FIND_ROOT_PATH}") - macro( cuda_find_host_program ) - find_host_program( ${ARGN} ) - endmacro() -else() - # for non-cross-compile, find_host_program == find_program and CUDA_TOOLKIT_TARGET_DIR == CUDA_TOOLKIT_ROOT_DIR - macro( cuda_find_host_program ) - find_program( ${ARGN} ) - endmacro() - SET (CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR}) -endif() - - -# CUDA_NVCC_EXECUTABLE -cuda_find_host_program(CUDA_NVCC_EXECUTABLE - NAMES nvcc - PATHS "${CUDA_TOOLKIT_ROOT_DIR}" - ENV CUDA_PATH - ENV CUDA_BIN_PATH - PATH_SUFFIXES bin bin64 - NO_DEFAULT_PATH - ) -# Search default search paths, after we search our own set of paths. -cuda_find_host_program(CUDA_NVCC_EXECUTABLE nvcc) -mark_as_advanced(CUDA_NVCC_EXECUTABLE) - -if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION) - # Compute the version. - execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT}) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT}) - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.") - mark_as_advanced(CUDA_VERSION) -else() - # Need to set these based off of the cached value - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${CUDA_VERSION}") - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}") -endif() - - -# Always set this convenience variable -set(CUDA_VERSION_STRING "${CUDA_VERSION}") - -# CUDA_TOOLKIT_INCLUDE -find_path(CUDA_TOOLKIT_INCLUDE - device_functions.h # Header included in toolkit - PATHS ${CUDA_TOOLKIT_TARGET_DIR} - ENV CUDA_PATH - ENV CUDA_INC_PATH - PATH_SUFFIXES include - NO_DEFAULT_PATH - ) -# Search default search paths, after we search our own set of paths. -find_path(CUDA_TOOLKIT_INCLUDE device_functions.h) -mark_as_advanced(CUDA_TOOLKIT_INCLUDE) - -if (CUDA_VERSION VERSION_GREATER "7.0" OR EXISTS "${CUDA_TOOLKIT_INCLUDE}/cuda_fp16.h") - set(CUDA_HAS_FP16 TRUE) -else() - set(CUDA_HAS_FP16 FALSE) -endif() - -# Set the user list of include dir to nothing to initialize it. -set (CUDA_NVCC_INCLUDE_DIRS_USER "") -set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) - -macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext ) - if(CMAKE_SIZEOF_VOID_P EQUAL 8) - # CUDA 3.2+ on Windows moved the library directories, so we need the new - # and old paths. - set(_cuda_64bit_lib_dir "${_path_ext}lib/x64" "${_path_ext}lib64" "${_path_ext}libx64" ) - endif() - # CUDA 3.2+ on Windows moved the library directories, so we need to new - # (lib/Win32) and the old path (lib). - find_library(${_var} - NAMES ${_names} - PATHS "${CUDA_TOOLKIT_TARGET_DIR}" - ENV CUDA_PATH - ENV CUDA_LIB_PATH - PATH_SUFFIXES ${_cuda_64bit_lib_dir} "${_path_ext}lib/Win32" "${_path_ext}lib" "${_path_ext}libWin32" - DOC ${_doc} - NO_DEFAULT_PATH - ) - if (NOT CMAKE_CROSSCOMPILING) - # Search default search paths, after we search our own set of paths. - find_library(${_var} - NAMES ${_names} - PATHS "/usr/lib/nvidia-current" - DOC ${_doc} - ) - endif() -endmacro() - -macro(cuda_find_library_local_first _var _names _doc) - cuda_find_library_local_first_with_path_ext( "${_var}" "${_names}" "${_doc}" "" ) -endmacro() - -macro(find_library_local_first _var _names _doc ) - cuda_find_library_local_first( "${_var}" "${_names}" "${_doc}" "" ) -endmacro() - - -# CUDA_LIBRARIES -cuda_find_library_local_first(CUDA_CUDART_LIBRARY cudart "\"cudart\" library") -if(CUDA_VERSION VERSION_EQUAL "3.0") - # The cudartemu library only existed for the 3.0 version of CUDA. - cuda_find_library_local_first(CUDA_CUDARTEMU_LIBRARY cudartemu "\"cudartemu\" library") - mark_as_advanced( - CUDA_CUDARTEMU_LIBRARY - ) -endif() - -if(NOT CUDA_VERSION VERSION_LESS "5.5") - cuda_find_library_local_first(CUDA_cudart_static_LIBRARY cudart_static "static CUDA runtime library") - mark_as_advanced(CUDA_cudart_static_LIBRARY) -endif() - - -if(CUDA_cudart_static_LIBRARY) - # If static cudart available, use it by default, but provide a user-visible option to disable it. - option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" ON) - set(CUDA_CUDART_LIBRARY_VAR CUDA_cudart_static_LIBRARY) -else() - # If not available, silently disable the option. - set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "") - set(CUDA_CUDART_LIBRARY_VAR CUDA_CUDART_LIBRARY) -endif() -if(NOT CUDA_VERSION VERSION_LESS "5.0") - cuda_find_library_local_first(CUDA_cudadevrt_LIBRARY cudadevrt "\"cudadevrt\" library") - mark_as_advanced(CUDA_cudadevrt_LIBRARY) -endif() - -if(CUDA_USE_STATIC_CUDA_RUNTIME) - if(UNIX) - # Check for the dependent libraries. Here we look for pthreads. - if (DEFINED CMAKE_THREAD_PREFER_PTHREAD) - set(_cuda_cmake_thread_prefer_pthread ${CMAKE_THREAD_PREFER_PTHREAD}) - endif() - set(CMAKE_THREAD_PREFER_PTHREAD 1) - - # Many of the FindXYZ CMake comes with makes use of try_compile with int main(){return 0;} - # as the source file. Unfortunately this causes a warning with -Wstrict-prototypes and - # -Werror causes the try_compile to fail. We will just temporarily disable other flags - # when doing the find_package command here. - set(_cuda_cmake_c_flags ${CMAKE_C_FLAGS}) - set(CMAKE_C_FLAGS "-fPIC") - find_package(Threads REQUIRED) - set(CMAKE_C_FLAGS ${_cuda_cmake_c_flags}) - - if (DEFINED _cuda_cmake_thread_prefer_pthread) - set(CMAKE_THREAD_PREFER_PTHREAD ${_cuda_cmake_thread_prefer_pthread}) - unset(_cuda_cmake_thread_prefer_pthread) - else() - unset(CMAKE_THREAD_PREFER_PTHREAD) - endif() - - if(NOT APPLE) - #On Linux, you must link against librt when using the static cuda runtime. - find_library(CUDA_rt_LIBRARY rt) - if (NOT CUDA_rt_LIBRARY) - message(WARNING "Expecting to find librt for libcudart_static, but didn't find it.") - endif() - endif() - endif() -endif() - -# CUPTI library showed up in cuda toolkit 4.0 -if(NOT CUDA_VERSION VERSION_LESS "4.0") - cuda_find_library_local_first_with_path_ext(CUDA_cupti_LIBRARY cupti "\"cupti\" library" "extras/CUPTI/") - mark_as_advanced(CUDA_cupti_LIBRARY) -endif() - -# Set the CUDA_LIBRARIES variable. This is the set of stuff to link against if you are -# using the CUDA runtime. For the dynamic version of the runtime, most of the -# dependencies are brough in, but for the static version there are additional libraries -# and linker commands needed. -# Initialize to empty -set(CUDA_LIBRARIES) - -# If we are using emulation mode and we found the cudartemu library then use -# that one instead of cudart. -if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY) - list(APPEND CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY}) -elseif(CUDA_USE_STATIC_CUDA_RUNTIME AND CUDA_cudart_static_LIBRARY) - list(APPEND CUDA_LIBRARIES ${CUDA_cudart_static_LIBRARY} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) - if (CUDA_rt_LIBRARY) - list(APPEND CUDA_LIBRARIES ${CUDA_rt_LIBRARY}) - endif() - if(APPLE) - # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that - # the static cuda runtime can find it at runtime. - list(APPEND CUDA_LIBRARIES -Wl,-rpath,/usr/local/cuda/lib) - endif() -else() - list(APPEND CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY}) -endif() - -# 1.1 toolkit on linux doesn't appear to have a separate library on -# some platforms. -cuda_find_library_local_first(CUDA_CUDA_LIBRARY cuda "\"cuda\" library (older versions only).") - -mark_as_advanced( - CUDA_CUDA_LIBRARY - CUDA_CUDART_LIBRARY - ) - -####################### -# Look for some of the toolkit helper libraries -macro(FIND_CUDA_HELPER_LIBS _name) - cuda_find_library_local_first(CUDA_${_name}_LIBRARY ${_name} "\"${_name}\" library") - mark_as_advanced(CUDA_${_name}_LIBRARY) -endmacro() - -####################### -# Disable emulation for v3.1 onward -if(CUDA_VERSION VERSION_GREATER "3.0") - if(CUDA_BUILD_EMULATION) - message(FATAL_ERROR "CUDA_BUILD_EMULATION is not supported in version 3.1 and onwards. You must disable it to proceed. You have version ${CUDA_VERSION}.") - endif() -endif() - -# Search for additional CUDA toolkit libraries. -if(CUDA_VERSION VERSION_LESS "3.1") - # Emulation libraries aren't available in version 3.1 onward. - find_cuda_helper_libs(cufftemu) - find_cuda_helper_libs(cublasemu) -endif() -find_cuda_helper_libs(cufft) -find_cuda_helper_libs(cublas) -if(NOT CUDA_VERSION VERSION_LESS "3.2") - # cusparse showed up in version 3.2 - find_cuda_helper_libs(cusparse) - find_cuda_helper_libs(curand) - if (WIN32) - find_cuda_helper_libs(nvcuvenc) - find_cuda_helper_libs(nvcuvid) - endif() -endif() -if(CUDA_VERSION VERSION_GREATER "5.0") - find_cuda_helper_libs(cublas_device) - # In CUDA 5.5 NPP was splitted onto 3 separate libraries. - find_cuda_helper_libs(nppc) - find_cuda_helper_libs(nppi) - find_cuda_helper_libs(npps) - set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}") -elseif(NOT CUDA_VERSION VERSION_LESS "4.0") - find_cuda_helper_libs(npp) -endif() -if(NOT CUDA_VERSION VERSION_LESS "7.0") - # cusolver showed up in version 7.0 - find_cuda_helper_libs(cusolver) -endif() - -if (CUDA_BUILD_EMULATION) - set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY}) - set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY}) -else() - set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY}) - set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY}) -endif() - -######################## -# Look for the SDK stuff. As of CUDA 3.0 NVSDKCUDA_ROOT has been replaced with -# NVSDKCOMPUTE_ROOT with the old CUDA C contents moved into the C subdirectory -find_path(CUDA_SDK_ROOT_DIR common/inc/cutil.h - HINTS - "$ENV{NVSDKCOMPUTE_ROOT}/C" - ENV NVSDKCUDA_ROOT - "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]" - PATHS - "/Developer/GPU\ Computing/C" - ) - -# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the -# environment variables. -set(CUDA_SDK_SEARCH_PATH - "${CUDA_SDK_ROOT_DIR}" - "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2" - "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2" - "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK" - "$ENV{HOME}/NVIDIA_CUDA_SDK" - "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX" - "/Developer/CUDA" - ) - -# Example of how to find an include file from the CUDA_SDK_ROOT_DIR - -# find_path(CUDA_CUT_INCLUDE_DIR -# cutil.h -# PATHS ${CUDA_SDK_SEARCH_PATH} -# PATH_SUFFIXES "common/inc" -# DOC "Location of cutil.h" -# NO_DEFAULT_PATH -# ) -# # Now search system paths -# find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h") - -# mark_as_advanced(CUDA_CUT_INCLUDE_DIR) - - -# Example of how to find a library in the CUDA_SDK_ROOT_DIR - -# # cutil library is called cutil64 for 64 bit builds on windows. We don't want -# # to get these confused, so we are setting the name based on the word size of -# # the build. - -# if(CMAKE_SIZEOF_VOID_P EQUAL 8) -# set(cuda_cutil_name cutil64) -# else() -# set(cuda_cutil_name cutil32) -# endif() - -# find_library(CUDA_CUT_LIBRARY -# NAMES cutil ${cuda_cutil_name} -# PATHS ${CUDA_SDK_SEARCH_PATH} -# # The new version of the sdk shows up in common/lib, but the old one is in lib -# PATH_SUFFIXES "common/lib" "lib" -# DOC "Location of cutil library" -# NO_DEFAULT_PATH -# ) -# # Now search system paths -# find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library") -# mark_as_advanced(CUDA_CUT_LIBRARY) -# set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY}) - - - -############################# -# Check for required components -set(CUDA_FOUND TRUE) - -set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL - "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE) -set(CUDA_TOOLKIT_TARGET_DIR_INTERNAL "${CUDA_TOOLKIT_TARGET_DIR}" CACHE INTERNAL - "This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was set successfully." FORCE) -set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL - "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE) - -#include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) - -find_package_handle_standard_args(CUDA - REQUIRED_VARS - CUDA_TOOLKIT_ROOT_DIR - CUDA_NVCC_EXECUTABLE - CUDA_INCLUDE_DIRS - ${CUDA_CUDART_LIBRARY_VAR} - VERSION_VAR - CUDA_VERSION - ) - - - -############################################################################### -############################################################################### -# Macros -############################################################################### -############################################################################### - -############################################################################### -# Add include directories to pass to the nvcc command. -macro(CUDA_INCLUDE_DIRECTORIES) - foreach(dir ${ARGN}) - list(APPEND CUDA_NVCC_INCLUDE_DIRS_USER ${dir}) - endforeach() -endmacro() - - -############################################################################## -cuda_find_helper_file(parse_cubin cmake) -cuda_find_helper_file(make2cmake cmake) -cuda_find_helper_file(run_nvcc cmake) -include("${CMAKE_CURRENT_LIST_DIR}/FindCUDA/select_compute_arch.cmake") - -############################################################################## -# Separate the OPTIONS out from the sources -# -macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _cmake_options _options) - set( ${_sources} ) - set( ${_cmake_options} ) - set( ${_options} ) - set( _found_options FALSE ) - foreach(arg ${ARGN}) - if("x${arg}" STREQUAL "xOPTIONS") - set( _found_options TRUE ) - elseif( - "x${arg}" STREQUAL "xWIN32" OR - "x${arg}" STREQUAL "xMACOSX_BUNDLE" OR - "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR - "x${arg}" STREQUAL "xSTATIC" OR - "x${arg}" STREQUAL "xSHARED" OR - "x${arg}" STREQUAL "xMODULE" - ) - list(APPEND ${_cmake_options} ${arg}) - else() - if ( _found_options ) - list(APPEND ${_options} ${arg}) - else() - # Assume this is a file - list(APPEND ${_sources} ${arg}) - endif() - endif() - endforeach() -endmacro() - -############################################################################## -# Parse the OPTIONS from ARGN and set the variables prefixed by _option_prefix -# -macro(CUDA_PARSE_NVCC_OPTIONS _option_prefix) - set( _found_config ) - foreach(arg ${ARGN}) - # Determine if we are dealing with a perconfiguration flag - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - if (arg STREQUAL "${config_upper}") - set( _found_config _${arg}) - # Set arg to nothing to keep it from being processed further - set( arg ) - endif() - endforeach() - - if ( arg ) - list(APPEND ${_option_prefix}${_found_config} "${arg}") - endif() - endforeach() -endmacro() - -############################################################################## -# Helper to add the include directory for CUDA only once -function(CUDA_ADD_CUDA_INCLUDE_ONCE) - get_directory_property(_include_directories INCLUDE_DIRECTORIES) - set(_add TRUE) - if(_include_directories) - foreach(dir ${_include_directories}) - if("${dir}" STREQUAL "${CUDA_INCLUDE_DIRS}") - set(_add FALSE) - endif() - endforeach() - endif() - if(_add) - include_directories(${CUDA_INCLUDE_DIRS}) - endif() -endfunction() - -function(CUDA_BUILD_SHARED_LIBRARY shared_flag) - set(cmake_args ${ARGN}) - # If SHARED, MODULE, or STATIC aren't already in the list of arguments, then - # add SHARED or STATIC based on the value of BUILD_SHARED_LIBS. - list(FIND cmake_args SHARED _cuda_found_SHARED) - list(FIND cmake_args MODULE _cuda_found_MODULE) - list(FIND cmake_args STATIC _cuda_found_STATIC) - if( _cuda_found_SHARED GREATER -1 OR - _cuda_found_MODULE GREATER -1 OR - _cuda_found_STATIC GREATER -1) - set(_cuda_build_shared_libs) - else() - if (BUILD_SHARED_LIBS) - set(_cuda_build_shared_libs SHARED) - else() - set(_cuda_build_shared_libs STATIC) - endif() - endif() - set(${shared_flag} ${_cuda_build_shared_libs} PARENT_SCOPE) -endfunction() - -############################################################################## -# Helper to avoid clashes of files with the same basename but different paths. -# This doesn't attempt to do exactly what CMake internals do, which is to only -# add this path when there is a conflict, since by the time a second collision -# in names is detected it's already too late to fix the first one. For -# consistency sake the relative path will be added to all files. -function(CUDA_COMPUTE_BUILD_PATH path build_path) - #message("CUDA_COMPUTE_BUILD_PATH([${path}] ${build_path})") - # Only deal with CMake style paths from here on out - file(TO_CMAKE_PATH "${path}" bpath) - if (IS_ABSOLUTE "${bpath}") - # Absolute paths are generally unnessary, especially if something like - # file(GLOB_RECURSE) is used to pick up the files. - - string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos) - if (_binary_dir_pos EQUAL 0) - file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}") - else() - file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}") - endif() - endif() - - # This recipe is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the - # CMake source. - - # Remove leading / - string(REGEX REPLACE "^[/]+" "" bpath "${bpath}") - # Avoid absolute paths by removing ':' - string(REPLACE ":" "_" bpath "${bpath}") - # Avoid relative paths that go up the tree - string(REPLACE "../" "__/" bpath "${bpath}") - # Avoid spaces - string(REPLACE " " "_" bpath "${bpath}") - - # Strip off the filename. I wait until here to do it, since removin the - # basename can make a path that looked like path/../basename turn into - # path/.. (notice the trailing slash). - get_filename_component(bpath "${bpath}" PATH) - - set(${build_path} "${bpath}" PARENT_SCOPE) - #message("${build_path} = ${bpath}") -endfunction() - -############################################################################## -# This helper macro populates the following variables and setups up custom -# commands and targets to invoke the nvcc compiler to generate C or PTX source -# dependent upon the format parameter. The compiler is invoked once with -M -# to generate a dependency file and a second time with -cuda or -ptx to generate -# a .cpp or .ptx file. -# INPUT: -# cuda_target - Target name -# format - PTX, CUBIN, FATBIN or OBJ -# FILE1 .. FILEN - The remaining arguments are the sources to be wrapped. -# OPTIONS - Extra options to NVCC -# OUTPUT: -# generated_files - List of generated files -############################################################################## -############################################################################## - -macro(CUDA_WRAP_SRCS cuda_target format generated_files) - - # Put optional arguments in list. - set(_argn_list "${ARGN}") - # If one of the given optional arguments is "PHONY", make a note of it, then - # remove it from the list. - list(FIND _argn_list "PHONY" _phony_idx) - if("${_phony_idx}" GREATER "-1") - set(_target_is_phony true) - list(REMOVE_AT _argn_list ${_phony_idx}) - else() - set(_target_is_phony false) - endif() - - # If CMake doesn't support separable compilation, complain - if(CUDA_SEPARABLE_COMPILATION AND CMAKE_VERSION VERSION_LESS "2.8.10.1") - message(SEND_ERROR "CUDA_SEPARABLE_COMPILATION isn't supported for CMake versions less than 2.8.10.1") - endif() - - # Set up all the command line flags here, so that they can be overridden on a per target basis. - - set(nvcc_flags "") - - # Emulation if the card isn't present. - if (CUDA_BUILD_EMULATION) - # Emulation. - set(nvcc_flags ${nvcc_flags} --device-emulation -D_DEVICEEMU -g) - else() - # Device mode. No flags necessary. - endif() - - if(CUDA_HOST_COMPILATION_CPP) - set(CUDA_C_OR_CXX CXX) - else() - if(CUDA_VERSION VERSION_LESS "3.0") - set(nvcc_flags ${nvcc_flags} --host-compilation C) - else() - message(WARNING "--host-compilation flag is deprecated in CUDA version >= 3.0. Removing --host-compilation C flag" ) - endif() - set(CUDA_C_OR_CXX C) - endif() - - set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION}) - - if(CUDA_64_BIT_DEVICE_CODE) - set(nvcc_flags ${nvcc_flags} -m64) - else() - set(nvcc_flags ${nvcc_flags} -m32) - endif() - - if(CUDA_TARGET_CPU_ARCH) - set(nvcc_flags ${nvcc_flags} "--target-cpu-architecture=${CUDA_TARGET_CPU_ARCH}") - endif() - - # This needs to be passed in at this stage, because VS needs to fill out the - # value of VCInstallDir from within VS. Note that CCBIN is only used if - # -ccbin or --compiler-bindir isn't used and CUDA_HOST_COMPILER matches - # $(VCInstallDir)/bin. - if(CMAKE_GENERATOR MATCHES "Visual Studio") - set(ccbin_flags -D "\"CCBIN:PATH=$(VCInstallDir)bin\"" ) - else() - set(ccbin_flags) - endif() - - # Figure out which configure we will use and pass that in as an argument to - # the script. We need to defer the decision until compilation time, because - # for VS projects we won't know if we are making a debug or release build - # until build time. - if(CMAKE_GENERATOR MATCHES "Visual Studio") - set( CUDA_build_configuration "$(ConfigurationName)" ) - else() - set( CUDA_build_configuration "${CMAKE_BUILD_TYPE}") - endif() - - # Initialize our list of includes with the user ones followed by the CUDA system ones. - set(CUDA_NVCC_INCLUDE_DIRS ${CUDA_NVCC_INCLUDE_DIRS_USER} "${CUDA_INCLUDE_DIRS}") - if(_target_is_phony) - # If the passed in target name isn't a real target (i.e., this is from a call to one of the - # cuda_compile_* functions), need to query directory properties to get include directories - # and compile definitions. - get_directory_property(_dir_include_dirs INCLUDE_DIRECTORIES) - get_directory_property(_dir_compile_defs COMPILE_DEFINITIONS) - - list(APPEND CUDA_NVCC_INCLUDE_DIRS "${_dir_include_dirs}") - set(CUDA_NVCC_COMPILE_DEFINITIONS "${_dir_compile_defs}") - else() - # Append the include directories for this target via generator expression, which is - # expanded by the FILE(GENERATE) call below. This generator expression captures all - # include dirs set by the user, whether via directory properties or target properties - list(APPEND CUDA_NVCC_INCLUDE_DIRS "$") - - # Do the same thing with compile definitions - set(CUDA_NVCC_COMPILE_DEFINITIONS "$") - endif() - - - # Reset these variables - set(CUDA_WRAP_OPTION_NVCC_FLAGS) - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}) - endforeach() - - CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${_argn_list}) - CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options}) - - # Figure out if we are building a shared library. BUILD_SHARED_LIBS is - # respected in CUDA_ADD_LIBRARY. - set(_cuda_build_shared_libs FALSE) - # SHARED, MODULE - list(FIND _cuda_wrap_cmake_options SHARED _cuda_found_SHARED) - list(FIND _cuda_wrap_cmake_options MODULE _cuda_found_MODULE) - if(_cuda_found_SHARED GREATER -1 OR _cuda_found_MODULE GREATER -1) - set(_cuda_build_shared_libs TRUE) - endif() - # STATIC - list(FIND _cuda_wrap_cmake_options STATIC _cuda_found_STATIC) - if(_cuda_found_STATIC GREATER -1) - set(_cuda_build_shared_libs FALSE) - endif() - - # CUDA_HOST_FLAGS - if(_cuda_build_shared_libs) - # If we are setting up code for a shared library, then we need to add extra flags for - # compiling objects for shared libraries. - set(CUDA_HOST_SHARED_FLAGS ${CMAKE_SHARED_LIBRARY_${CUDA_C_OR_CXX}_FLAGS}) - else() - set(CUDA_HOST_SHARED_FLAGS) - endif() - # Only add the CMAKE_{C,CXX}_FLAGS if we are propagating host flags. We - # always need to set the SHARED_FLAGS, though. - if(CUDA_PROPAGATE_HOST_FLAGS) - set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${CMAKE_${CUDA_C_OR_CXX}_FLAGS} ${CUDA_HOST_SHARED_FLAGS})") - else() - set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${CUDA_HOST_SHARED_FLAGS})") - endif() - - set(_cuda_nvcc_flags_config "# Build specific configuration flags") - # Loop over all the configuration types to generate appropriate flags for run_nvcc.cmake - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - # CMAKE_FLAGS are strings and not lists. By not putting quotes around CMAKE_FLAGS - # we convert the strings to lists (like we want). - - if(CUDA_PROPAGATE_HOST_FLAGS) - # nvcc chokes on -g3 in versions previous to 3.0, so replace it with -g - set(_cuda_fix_g3 FALSE) - - if(CMAKE_COMPILER_IS_GNUCC) - if (CUDA_VERSION VERSION_LESS "3.0" OR - CUDA_VERSION VERSION_EQUAL "4.1" OR - CUDA_VERSION VERSION_EQUAL "4.2" - ) - set(_cuda_fix_g3 TRUE) - endif() - endif() - if(_cuda_fix_g3) - string(REPLACE "-g3" "-g" _cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") - else() - set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") - endif() - - string(APPEND _cuda_host_flags "\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})") - endif() - - # Note that if we ever want CUDA_NVCC_FLAGS_ to be string (instead of a list - # like it is currently), we can remove the quotes around the - # ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_ variable. - string(APPEND _cuda_nvcc_flags_config "\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})") - endforeach() - - # Process the C++11 flag. If the host sets the flag, we need to add it to nvcc and - # remove it from the host. This is because -Xcompile -std=c++ will choke nvcc (it uses - # the C preprocessor). In order to get this to work correctly, we need to use nvcc's - # specific c++11 flag. - if( "${_cuda_host_flags}" MATCHES "-std=c\\+\\+11") - # Add the c++11 flag to nvcc if it isn't already present. Note that we only look at - # the main flag instead of the configuration specific flags. - if( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std;c\\+\\+11" ) - list(APPEND nvcc_flags --std c++11) - endif() - string(REGEX REPLACE "[-]+std=c\\+\\+11" "" _cuda_host_flags "${_cuda_host_flags}") - endif() - - if(_cuda_build_shared_libs) - list(APPEND nvcc_flags "-D${cuda_target}_EXPORTS") - endif() - - # Reset the output variable - set(_cuda_wrap_generated_files "") - - # Iterate over the macro arguments and create custom - # commands for all the .cu files. - foreach(file ${_argn_list}) - # Ignore any file marked as a HEADER_FILE_ONLY - get_source_file_property(_is_header ${file} HEADER_FILE_ONLY) - # Allow per source file overrides of the format. Also allows compiling non-.cu files. - get_source_file_property(_cuda_source_format ${file} CUDA_SOURCE_PROPERTY_FORMAT) - if((${file} MATCHES "\\.cu$" OR _cuda_source_format) AND NOT _is_header) - - if(NOT _cuda_source_format) - set(_cuda_source_format ${format}) - endif() - # If file isn't a .cu file, we need to tell nvcc to treat it as such. - if(NOT ${file} MATCHES "\\.cu$") - set(cuda_language_flag -x=cu) - else() - set(cuda_language_flag) - endif() - - if( ${_cuda_source_format} MATCHES "OBJ") - set( cuda_compile_to_external_module OFF ) - else() - set( cuda_compile_to_external_module ON ) - if( ${_cuda_source_format} MATCHES "PTX" ) - set( cuda_compile_to_external_module_type "ptx" ) - elseif( ${_cuda_source_format} MATCHES "CUBIN") - set( cuda_compile_to_external_module_type "cubin" ) - elseif( ${_cuda_source_format} MATCHES "FATBIN") - set( cuda_compile_to_external_module_type "fatbin" ) - else() - message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS or set with CUDA_SOURCE_PROPERTY_FORMAT file property for file '${file}': '${_cuda_source_format}'. Use OBJ, PTX, CUBIN or FATBIN.") - endif() - endif() - - if(cuda_compile_to_external_module) - # Don't use any of the host compilation flags for PTX targets. - set(CUDA_HOST_FLAGS) - set(CUDA_NVCC_FLAGS_CONFIG) - else() - set(CUDA_HOST_FLAGS ${_cuda_host_flags}) - set(CUDA_NVCC_FLAGS_CONFIG ${_cuda_nvcc_flags_config}) - endif() - - # Determine output directory - cuda_compute_build_path("${file}" cuda_build_path) - set(cuda_compile_intermediate_directory "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${cuda_build_path}") - if(CUDA_GENERATED_OUTPUT_DIR) - set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}") - else() - if ( cuda_compile_to_external_module ) - set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}") - else() - set(cuda_compile_output_dir "${cuda_compile_intermediate_directory}") - endif() - endif() - - # Add a custom target to generate a c or ptx file. ###################### - - get_filename_component( basename ${file} NAME ) - if( cuda_compile_to_external_module ) - set(generated_file_path "${cuda_compile_output_dir}") - set(generated_file_basename "${cuda_target}_generated_${basename}.${cuda_compile_to_external_module_type}") - set(format_flag "-${cuda_compile_to_external_module_type}") - file(MAKE_DIRECTORY "${cuda_compile_output_dir}") - else() - set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}") - set(generated_file_basename "${cuda_target}_generated_${basename}${generated_extension}") - if(CUDA_SEPARABLE_COMPILATION) - set(format_flag "-dc") - else() - set(format_flag "-c") - endif() - endif() - - # Set all of our file names. Make sure that whatever filenames that have - # generated_file_path in them get passed in through as a command line - # argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time - # instead of configure time. - set(generated_file "${generated_file_path}/${generated_file_basename}") - set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend") - set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend") - set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt") - set(custom_target_script_pregen "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake.pre-gen") - set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}$<$>:.$>.cmake") - - # Setup properties for obj files: - if( NOT cuda_compile_to_external_module ) - set_source_files_properties("${generated_file}" - PROPERTIES - EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked. - ) - endif() - - # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path. - get_filename_component(file_path "${file}" PATH) - if(IS_ABSOLUTE "${file_path}") - set(source_file "${file}") - else() - set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") - endif() - - if( NOT cuda_compile_to_external_module AND CUDA_SEPARABLE_COMPILATION) - list(APPEND ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS "${generated_file}") - endif() - - # Bring in the dependencies. Creates a variable CUDA_NVCC_DEPEND ####### - cuda_include_nvcc_dependencies(${cmake_dependency_file}) - - # Convience string for output ########################################### - if(CUDA_BUILD_EMULATION) - set(cuda_build_type "Emulation") - else() - set(cuda_build_type "Device") - endif() - - # Build the NVCC made dependency file ################################### - set(build_cubin OFF) - if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN ) - if ( NOT cuda_compile_to_external_module ) - set ( build_cubin ON ) - endif() - endif() - - # Configure the build script - configure_file("${CUDA_run_nvcc}" "${custom_target_script_pregen}" @ONLY) - file(GENERATE - OUTPUT "${custom_target_script}" - INPUT "${custom_target_script_pregen}" - ) - - # So if a user specifies the same cuda file as input more than once, you - # can have bad things happen with dependencies. Here we check an option - # to see if this is the behavior they want. - if(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE) - set(main_dep MAIN_DEPENDENCY ${source_file}) - else() - set(main_dep DEPENDS ${source_file}) - endif() - - if(CUDA_VERBOSE_BUILD) - set(verbose_output ON) - elseif(CMAKE_GENERATOR MATCHES "Makefiles") - set(verbose_output "$(VERBOSE)") - else() - set(verbose_output OFF) - endif() - - # Create up the comment string - file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") - if(cuda_compile_to_external_module) - set(cuda_build_comment_string "Building NVCC ${cuda_compile_to_external_module_type} file ${generated_file_relative_path}") - else() - set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}") - endif() - - set(_verbatim VERBATIM) - if(ccbin_flags MATCHES "\\$\\(VCInstallDir\\)") - set(_verbatim "") - endif() - - # Build the generated file and dependency file ########################## - add_custom_command( - OUTPUT ${generated_file} - # These output files depend on the source_file and the contents of cmake_dependency_file - ${main_dep} - DEPENDS ${CUDA_NVCC_DEPEND} - DEPENDS ${custom_target_script} - # Make sure the output directory exists before trying to write to it. - COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}" - COMMAND ${CMAKE_COMMAND} ARGS - -D verbose:BOOL=${verbose_output} - ${ccbin_flags} - -D build_configuration:STRING=${CUDA_build_configuration} - -D "generated_file:STRING=${generated_file}" - -D "generated_cubin_file:STRING=${generated_cubin_file}" - -P "${custom_target_script}" - WORKING_DIRECTORY "${cuda_compile_intermediate_directory}" - COMMENT "${cuda_build_comment_string}" - ${_verbatim} - ) - - # Make sure the build system knows the file is generated. - set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE) - - list(APPEND _cuda_wrap_generated_files ${generated_file}) - - # Add the other files that we want cmake to clean on a cleanup ########## - list(APPEND CUDA_ADDITIONAL_CLEAN_FILES "${cmake_dependency_file}") - list(REMOVE_DUPLICATES CUDA_ADDITIONAL_CLEAN_FILES) - set(CUDA_ADDITIONAL_CLEAN_FILES ${CUDA_ADDITIONAL_CLEAN_FILES} CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.") - - endif() - endforeach() - - # Set the return parameter - set(${generated_files} ${_cuda_wrap_generated_files}) -endmacro() - -function(_cuda_get_important_host_flags important_flags flag_string) - if(CMAKE_GENERATOR MATCHES "Visual Studio") - string(REGEX MATCHALL "/M[DT][d]?" flags "${flag_string}") - list(APPEND ${important_flags} ${flags}) - else() - string(REGEX MATCHALL "-fPIC" flags "${flag_string}") - list(APPEND ${important_flags} ${flags}) - endif() - set(${important_flags} ${${important_flags}} PARENT_SCOPE) -endfunction() - -############################################################################### -############################################################################### -# Separable Compilation Link -############################################################################### -############################################################################### - -# Compute the filename to be used by CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS -function(CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME output_file_var cuda_target object_files) - if (object_files) - set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION}) - set(output_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${CMAKE_CFG_INTDIR}/${cuda_target}_intermediate_link${generated_extension}") - else() - set(output_file) - endif() - - set(${output_file_var} "${output_file}" PARENT_SCOPE) -endfunction() - -# Setup the build rule for the separable compilation intermediate link file. -function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options object_files) - if (object_files) - - set_source_files_properties("${output_file}" - PROPERTIES - EXTERNAL_OBJECT TRUE # This is an object file not to be compiled, but only - # be linked. - GENERATED TRUE # This file is generated during the build - ) - - # For now we are ignoring all the configuration specific flags. - set(nvcc_flags) - CUDA_PARSE_NVCC_OPTIONS(nvcc_flags ${options}) - if(CUDA_64_BIT_DEVICE_CODE) - list(APPEND nvcc_flags -m64) - else() - list(APPEND nvcc_flags -m32) - endif() - # If -ccbin, --compiler-bindir has been specified, don't do anything. Otherwise add it here. - list( FIND nvcc_flags "-ccbin" ccbin_found0 ) - list( FIND nvcc_flags "--compiler-bindir" ccbin_found1 ) - if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) - # Match VERBATIM check below. - if(CUDA_HOST_COMPILER MATCHES "\\$\\(VCInstallDir\\)") - list(APPEND nvcc_flags -ccbin "\"${CUDA_HOST_COMPILER}\"") - else() - list(APPEND nvcc_flags -ccbin "${CUDA_HOST_COMPILER}") - endif() - endif() - - # Create a list of flags specified by CUDA_NVCC_FLAGS_${CONFIG} and CMAKE_${CUDA_C_OR_CXX}_FLAGS* - set(config_specific_flags) - set(flags) - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - # Add config specific flags - foreach(f ${CUDA_NVCC_FLAGS_${config_upper}}) - list(APPEND config_specific_flags $<$:${f}>) - endforeach() - set(important_host_flags) - _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") - foreach(f ${important_host_flags}) - list(APPEND flags $<$:-Xcompiler> $<$:${f}>) - endforeach() - endforeach() - # Add CMAKE_${CUDA_C_OR_CXX}_FLAGS - set(important_host_flags) - _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS}") - foreach(f ${important_host_flags}) - list(APPEND flags -Xcompiler ${f}) - endforeach() - - # Add our general CUDA_NVCC_FLAGS with the configuration specifig flags - set(nvcc_flags ${CUDA_NVCC_FLAGS} ${config_specific_flags} ${nvcc_flags}) - - file(RELATIVE_PATH output_file_relative_path "${CMAKE_BINARY_DIR}" "${output_file}") - - # Some generators don't handle the multiple levels of custom command - # dependencies correctly (obj1 depends on file1, obj2 depends on obj1), so - # we work around that issue by compiling the intermediate link object as a - # pre-link custom command in that situation. - set(do_obj_build_rule TRUE) - if (MSVC_VERSION GREATER 1599 AND MSVC_VERSION LESS 1800) - # VS 2010 and 2012 have this problem. - set(do_obj_build_rule FALSE) - endif() - - set(_verbatim VERBATIM) - if(nvcc_flags MATCHES "\\$\\(VCInstallDir\\)") - set(_verbatim "") - endif() - - if (do_obj_build_rule) - add_custom_command( - OUTPUT ${output_file} - DEPENDS ${object_files} - COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} -dlink ${object_files} -o ${output_file} - ${flags} - COMMENT "Building NVCC intermediate link file ${output_file_relative_path}" - ${_verbatim} - ) - else() - get_filename_component(output_file_dir "${output_file}" DIRECTORY) - add_custom_command( - TARGET ${cuda_target} - PRE_LINK - COMMAND ${CMAKE_COMMAND} -E echo "Building NVCC intermediate link file ${output_file_relative_path}" - COMMAND ${CMAKE_COMMAND} -E make_directory "${output_file_dir}" - COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} ${flags} -dlink ${object_files} -o "${output_file}" - ${_verbatim} - ) - endif() - endif() -endfunction() - -############################################################################### -############################################################################### -# ADD LIBRARY -############################################################################### -############################################################################### -macro(CUDA_ADD_LIBRARY cuda_target) - - CUDA_ADD_CUDA_INCLUDE_ONCE() - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) - CUDA_BUILD_SHARED_LIBRARY(_cuda_shared_flag ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} - ${_cmake_options} ${_cuda_shared_flag} - OPTIONS ${_options} ) - - # Compute the file name of the intermedate link file used for separable - # compilation. - CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - - # Add the library. - add_library(${cuda_target} ${_cmake_options} - ${_generated_files} - ${_sources} - ${link_file} - ) - - # Add a link phase for the separable compilation if it has been enabled. If - # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS - # variable will have been defined. - CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - - target_link_libraries(${cuda_target} PUBLIC - ${CUDA_LIBRARIES} - ) - - if(CUDA_SEPARABLE_COMPILATION) - target_link_libraries(${cuda_target} - ${CUDA_cudadevrt_LIBRARY} - ) - endif() - - # We need to set the linker language based on what the expected generated file - # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP. - set_target_properties(${cuda_target} - PROPERTIES - LINKER_LANGUAGE ${CUDA_C_OR_CXX} - ) - -endmacro() - - -############################################################################### -############################################################################### -# ADD EXECUTABLE -############################################################################### -############################################################################### -macro(CUDA_ADD_EXECUTABLE cuda_target) - - CUDA_ADD_CUDA_INCLUDE_ONCE() - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} ) - - # Compute the file name of the intermedate link file used for separable - # compilation. - CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - - # Add the library. - add_executable(${cuda_target} ${_cmake_options} - ${_generated_files} - ${_sources} - ${link_file} - ) - - # Add a link phase for the separable compilation if it has been enabled. If - # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS - # variable will have been defined. - CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - - target_link_libraries(${cuda_target} PUBLIC ${CUDA_LIBRARIES}) - - # We need to set the linker language based on what the expected generated file - # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP. - set_target_properties(${cuda_target} - PROPERTIES - LINKER_LANGUAGE ${CUDA_C_OR_CXX} - ) - -endmacro() - - -############################################################################### -############################################################################### -# (Internal) helper for manually added cuda source files with specific targets -############################################################################### -############################################################################### -macro(cuda_compile_base cuda_target format generated_files) - # Update a counter in this directory, to keep phony target names unique. - set(_cuda_target "${cuda_target}") - get_property(_counter DIRECTORY PROPERTY _cuda_internal_phony_counter) - if(_counter) - math(EXPR _counter "${_counter} + 1") - else() - set(_counter 1) - endif() - set(_cuda_target "${_cuda_target}_${_counter}") - set_property(DIRECTORY PROPERTY _cuda_internal_phony_counter ${_counter}) - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) - - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( ${_cuda_target} ${format} _generated_files ${_sources} - ${_cmake_options} OPTIONS ${_options} PHONY) - - set( ${generated_files} ${_generated_files}) - -endmacro() - -############################################################################### -############################################################################### -# CUDA COMPILE -############################################################################### -############################################################################### -macro(CUDA_COMPILE generated_files) - cuda_compile_base(cuda_compile OBJ ${generated_files} ${ARGN}) -endmacro() - -############################################################################### -############################################################################### -# CUDA COMPILE PTX -############################################################################### -############################################################################### -macro(CUDA_COMPILE_PTX generated_files) - cuda_compile_base(cuda_compile_ptx PTX ${generated_files} ${ARGN}) -endmacro() - -############################################################################### -############################################################################### -# CUDA COMPILE FATBIN -############################################################################### -############################################################################### -macro(CUDA_COMPILE_FATBIN generated_files) - cuda_compile_base(cuda_compile_fatbin FATBIN ${generated_files} ${ARGN}) -endmacro() - -############################################################################### -############################################################################### -# CUDA COMPILE CUBIN -############################################################################### -############################################################################### -macro(CUDA_COMPILE_CUBIN generated_files) - cuda_compile_base(cuda_compile_cubin CUBIN ${generated_files} ${ARGN}) -endmacro() - - -############################################################################### -############################################################################### -# CUDA ADD CUFFT TO TARGET -############################################################################### -############################################################################### -macro(CUDA_ADD_CUFFT_TO_TARGET target) - if (CUDA_BUILD_EMULATION) - target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY}) - else() - target_link_libraries(${target} ${CUDA_cufft_LIBRARY}) - endif() -endmacro() - -############################################################################### -############################################################################### -# CUDA ADD CUBLAS TO TARGET -############################################################################### -############################################################################### -macro(CUDA_ADD_CUBLAS_TO_TARGET target) - if (CUDA_BUILD_EMULATION) - target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY}) - else() - target_link_libraries(${target} ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY}) - endif() -endmacro() - -############################################################################### -############################################################################### -# CUDA BUILD CLEAN TARGET -############################################################################### -############################################################################### -macro(CUDA_BUILD_CLEAN_TARGET) - # Call this after you add all your CUDA targets, and you will get a convience - # target. You should also make clean after running this target to get the - # build system to generate all the code again. - - set(cuda_clean_target_name clean_cuda_depends) - if (CMAKE_GENERATOR MATCHES "Visual Studio") - string(TOUPPER ${cuda_clean_target_name} cuda_clean_target_name) - endif() - add_custom_target(${cuda_clean_target_name} - COMMAND ${CMAKE_COMMAND} -E remove ${CUDA_ADDITIONAL_CLEAN_FILES}) - - # Clear out the variable, so the next time we configure it will be empty. - # This is useful so that the files won't persist in the list after targets - # have been removed. - set(CUDA_ADDITIONAL_CLEAN_FILES "" CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.") -endmacro() diff --git a/cmake/thirdparty/FindCUDA/make2cmake.cmake b/cmake/thirdparty/FindCUDA/make2cmake.cmake deleted file mode 100644 index 7b5389ec51..0000000000 --- a/cmake/thirdparty/FindCUDA/make2cmake.cmake +++ /dev/null @@ -1,106 +0,0 @@ -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# Copyright (c) 2007-2009 -# Scientific Computing and Imaging Institute, University of Utah -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. - -# The MIT License -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# - -####################################################################### -# This converts a file written in makefile syntax into one that can be included -# by CMake. - -# Input variables -# -# verbose:BOOL=<> OFF: Be as quiet as possible (default) -# ON : Extra output -# -# input_file:FILEPATH=<> Path to dependecy file in makefile format -# -# output_file:FILEPATH=<> Path to file with dependencies in CMake readable variable -# - -file(READ ${input_file} depend_text) - -if (NOT "${depend_text}" STREQUAL "") - - # message("FOUND DEPENDS") - - string(REPLACE "\\ " " " depend_text ${depend_text}) - - # This works for the nvcc -M generated dependency files. - string(REGEX REPLACE "^.* : " "" depend_text ${depend_text}) - string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text}) - - set(dependency_list "") - - foreach(file ${depend_text}) - - string(REGEX REPLACE "^ +" "" file ${file}) - - # OK, now if we had a UNC path, nvcc has a tendency to only output the first '/' - # instead of '//'. Here we will test to see if the file exists, if it doesn't then - # try to prepend another '/' to the path and test again. If it still fails remove the - # path. - - if(NOT EXISTS "${file}") - if (EXISTS "/${file}") - set(file "/${file}") - else() - if(verbose) - message(WARNING " Removing non-existent dependency file: ${file}") - endif() - set(file "") - endif() - endif() - - # Make sure we check to see if we have a file, before asking if it is not a directory. - # if(NOT IS_DIRECTORY "") will return TRUE. - if(file AND NOT IS_DIRECTORY "${file}") - # If softlinks start to matter, we should change this to REALPATH. For now we need - # to flatten paths, because nvcc can generate stuff like /bin/../include instead of - # just /include. - get_filename_component(file_absolute "${file}" ABSOLUTE) - list(APPEND dependency_list "${file_absolute}") - endif() - - endforeach() - -else() - # message("FOUND NO DEPENDS") -endif() - -# Remove the duplicate entries and sort them. -list(REMOVE_DUPLICATES dependency_list) -list(SORT dependency_list) - -foreach(file ${dependency_list}) - string(APPEND cuda_nvcc_depend " \"${file}\"\n") -endforeach() - -file(WRITE ${output_file} "# Generated by: make2cmake.cmake\nSET(CUDA_NVCC_DEPEND\n ${cuda_nvcc_depend})\n\n") diff --git a/cmake/thirdparty/FindCUDA/parse_cubin.cmake b/cmake/thirdparty/FindCUDA/parse_cubin.cmake deleted file mode 100644 index 626c8a2e47..0000000000 --- a/cmake/thirdparty/FindCUDA/parse_cubin.cmake +++ /dev/null @@ -1,111 +0,0 @@ -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# Copyright (c) 2007-2009 -# Scientific Computing and Imaging Institute, University of Utah -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. - -# The MIT License -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# - -####################################################################### -# Parses a .cubin file produced by nvcc and reports statistics about the file. - - -file(READ ${input_file} file_text) - -if (NOT "${file_text}" STREQUAL "") - - string(REPLACE ";" "\\;" file_text ${file_text}) - string(REPLACE "\ncode" ";code" file_text ${file_text}) - - list(LENGTH file_text len) - - foreach(line ${file_text}) - - # Only look at "code { }" blocks. - if(line MATCHES "^code") - - # Break into individual lines. - string(REGEX REPLACE "\n" ";" line ${line}) - - foreach(entry ${line}) - - # Extract kernel names. - if (${entry} MATCHES "[^g]name = ([^ ]+)") - set(entry "${CMAKE_MATCH_1}") - - # Check to see if the kernel name starts with "_" - set(skip FALSE) - # if (${entry} MATCHES "^_") - # Skip the rest of this block. - # message("Skipping ${entry}") - # set(skip TRUE) - # else () - message("Kernel: ${entry}") - # endif () - - endif() - - # Skip the rest of the block if necessary - if(NOT skip) - - # Registers - if (${entry} MATCHES "reg([ ]+)=([ ]+)([^ ]+)") - set(entry "${CMAKE_MATCH_3}") - message("Registers: ${entry}") - endif() - - # Local memory - if (${entry} MATCHES "lmem([ ]+)=([ ]+)([^ ]+)") - set(entry "${CMAKE_MATCH_3}") - message("Local: ${entry}") - endif() - - # Shared memory - if (${entry} MATCHES "smem([ ]+)=([ ]+)([^ ]+)") - set(entry "${CMAKE_MATCH_3}") - message("Shared: ${entry}") - endif() - - if (${entry} MATCHES "^}") - message("") - endif() - - endif() - - - endforeach() - - endif() - - endforeach() - -else() - # message("FOUND NO DEPENDS") -endif() - - diff --git a/cmake/thirdparty/FindCUDA/run_nvcc.cmake b/cmake/thirdparty/FindCUDA/run_nvcc.cmake deleted file mode 100644 index ba7f92e3c0..0000000000 --- a/cmake/thirdparty/FindCUDA/run_nvcc.cmake +++ /dev/null @@ -1,307 +0,0 @@ -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. - -# The MIT License -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - - -########################################################################## -# This file runs the nvcc commands to produce the desired output file along with -# the dependency file needed by CMake to compute dependencies. In addition the -# file checks the output of each command and if the command fails it deletes the -# output files. - -# Input variables -# -# verbose:BOOL=<> OFF: Be as quiet as possible (default) -# ON : Describe each step -# -# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or -# RelWithDebInfo, but it should match one of the -# entries in CUDA_HOST_FLAGS. This is the build -# configuration used when compiling the code. If -# blank or unspecified Debug is assumed as this is -# what CMake does. -# -# generated_file:STRING=<> File to generate. This argument must be passed in. -# -# generated_cubin_file:STRING=<> File to generate. This argument must be passed -# in if build_cubin is true. - -if(NOT generated_file) - message(FATAL_ERROR "You must specify generated_file on the command line") -endif() - -# Set these up as variables to make reading the generated file easier -set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path -set(source_file "@source_file@") # path -set(NVCC_generated_dependency_file "@NVCC_generated_dependency_file@") # path -set(cmake_dependency_file "@cmake_dependency_file@") # path -set(CUDA_make2cmake "@CUDA_make2cmake@") # path -set(CUDA_parse_cubin "@CUDA_parse_cubin@") # path -set(build_cubin @build_cubin@) # bool -set(CUDA_HOST_COMPILER "@CUDA_HOST_COMPILER@") # path -# We won't actually use these variables for now, but we need to set this, in -# order to force this file to be run again if it changes. -set(generated_file_path "@generated_file_path@") # path -set(generated_file_internal "@generated_file@") # path -set(generated_cubin_file_internal "@generated_cubin_file@") # path - -set(CUDA_NVCC_EXECUTABLE "@CUDA_NVCC_EXECUTABLE@") # path -set(CUDA_NVCC_FLAGS @CUDA_NVCC_FLAGS@ ;; @CUDA_WRAP_OPTION_NVCC_FLAGS@) # list -@CUDA_NVCC_FLAGS_CONFIG@ -set(nvcc_flags @nvcc_flags@) # list -set(CUDA_NVCC_INCLUDE_DIRS "@CUDA_NVCC_INCLUDE_DIRS@") # list (needs to be in quotes to handle spaces properly). -set(CUDA_NVCC_COMPILE_DEFINITIONS "@CUDA_NVCC_COMPILE_DEFINITIONS@") # list (needs to be in quotes to handle spaces properly). -set(format_flag "@format_flag@") # string -set(cuda_language_flag @cuda_language_flag@) # list - -# Clean up list of include directories and add -I flags -list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS) -set(CUDA_NVCC_INCLUDE_ARGS) -foreach(dir ${CUDA_NVCC_INCLUDE_DIRS}) - # Extra quotes are added around each flag to help nvcc parse out flags with spaces. - list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}") -endforeach() - -# Clean up list of compile definitions, add -D flags, and append to nvcc_flags -list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS) -foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS}) - list(APPEND nvcc_flags "-D${def}") -endforeach() - -if(build_cubin AND NOT generated_cubin_file) - message(FATAL_ERROR "You must specify generated_cubin_file on the command line") -endif() - -# This is the list of host compilation flags. It C or CXX should already have -# been chosen by FindCUDA.cmake. -@CUDA_HOST_FLAGS@ - -# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler -set(nvcc_host_compiler_flags "") -# If we weren't given a build_configuration, use Debug. -if(NOT build_configuration) - set(build_configuration Debug) -endif() -string(TOUPPER "${build_configuration}" build_configuration) -#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") -foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) - # Extra quotes are added around each flag to help nvcc parse out flags with spaces. - string(APPEND nvcc_host_compiler_flags ",\"${flag}\"") -endforeach() -if (nvcc_host_compiler_flags) - set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) -endif() -#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") -# Add the build specific configuration flags -list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) - -# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority -list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) -list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) -if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) - if (CUDA_HOST_COMPILER STREQUAL "$(VCInstallDir)bin" AND DEFINED CCBIN) - set(CCBIN -ccbin "${CCBIN}") - else() - set(CCBIN -ccbin "${CUDA_HOST_COMPILER}") - endif() -endif() - -# cuda_execute_process - Executes a command with optional command echo and status message. -# -# status - Status message to print if verbose is true -# command - COMMAND argument from the usual execute_process argument structure -# ARGN - Remaining arguments are the command with arguments -# -# CUDA_result - return value from running the command -# -# Make this a macro instead of a function, so that things like RESULT_VARIABLE -# and other return variables are present after executing the process. -macro(cuda_execute_process status command) - set(_command ${command}) - if(NOT "x${_command}" STREQUAL "xCOMMAND") - message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") - endif() - if(verbose) - execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) - # Now we need to build up our command string. We are accounting for quotes - # and spaces, anything else is left up to the user to fix if they want to - # copy and paste a runnable command line. - set(cuda_execute_process_string) - foreach(arg ${ARGN}) - # If there are quotes, excape them, so they come through. - string(REPLACE "\"" "\\\"" arg ${arg}) - # Args with spaces need quotes around them to get them to be parsed as a single argument. - if(arg MATCHES " ") - list(APPEND cuda_execute_process_string "\"${arg}\"") - else() - list(APPEND cuda_execute_process_string ${arg}) - endif() - endforeach() - # Echo the command - execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) - endif() - # Run the command - execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) -endmacro() - -# Delete the target file -cuda_execute_process( - "Removing ${generated_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" - ) - -# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag -# for dependency generation and hope for the best. -set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") -set(CUDA_VERSION @CUDA_VERSION@) -if(CUDA_VERSION VERSION_LESS "3.0") - cmake_policy(PUSH) - # CMake policy 0007 NEW states that empty list elements are not - # ignored. I'm just setting it to avoid the warning that's printed. - cmake_policy(SET CMP0007 NEW) - # Note that this will remove all occurances of -G. - list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G") - cmake_policy(POP) -endif() - -# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This -# can cause incorrect dependencies when #including files based on this macro which is -# defined in the generating passes of nvcc invokation. We will go ahead and manually -# define this for now until a future version fixes this bug. -set(CUDACC_DEFINE -D__CUDACC__) - -# Generate the dependency file -cuda_execute_process( - "Generating dependency file: ${NVCC_generated_dependency_file}" - COMMAND "${CUDA_NVCC_EXECUTABLE}" - -M - ${CUDACC_DEFINE} - "${source_file}" - ${cuda_language_flag} - -o "${NVCC_generated_dependency_file}" - ${CCBIN} - ${nvcc_flags} - ${nvcc_host_compiler_flags} - ${depends_CUDA_NVCC_FLAGS} - -DNVCC - ${CUDA_NVCC_INCLUDE_ARGS} - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Generate the cmake readable dependency file to a temp file. Don't put the -# quotes just around the filenames for the input_file and output_file variables. -# CMake will pass the quotes through and not be able to find the file. -cuda_execute_process( - "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" - COMMAND "${CMAKE_COMMAND}" - -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" - -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" - -D "verbose=${verbose}" - -P "${CUDA_make2cmake}" - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Copy the file if it is different -cuda_execute_process( - "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" - COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Delete the temporary file -cuda_execute_process( - "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Generate the code -cuda_execute_process( - "Generating ${generated_file}" - COMMAND "${CUDA_NVCC_EXECUTABLE}" - "${source_file}" - ${cuda_language_flag} - ${format_flag} -o "${generated_file}" - ${CCBIN} - ${nvcc_flags} - ${nvcc_host_compiler_flags} - ${CUDA_NVCC_FLAGS} - -DNVCC - ${CUDA_NVCC_INCLUDE_ARGS} - ) - -if(CUDA_result) - # Since nvcc can sometimes leave half done files make sure that we delete the output file. - cuda_execute_process( - "Removing ${generated_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" - ) - message(FATAL_ERROR "Error generating file ${generated_file}") -else() - if(verbose) - message("Generated ${generated_file} successfully.") - endif() -endif() - -# Cubin resource report commands. -if( build_cubin ) - # Run with -cubin to produce resource usage report. - cuda_execute_process( - "Generating ${generated_cubin_file}" - COMMAND "${CUDA_NVCC_EXECUTABLE}" - "${source_file}" - ${CUDA_NVCC_FLAGS} - ${nvcc_flags} - ${CCBIN} - ${nvcc_host_compiler_flags} - -DNVCC - -cubin - -o "${generated_cubin_file}" - ${CUDA_NVCC_INCLUDE_ARGS} - ) - - # Execute the parser script. - cuda_execute_process( - "Executing the parser script" - COMMAND "${CMAKE_COMMAND}" - -D "input_file:STRING=${generated_cubin_file}" - -P "${CUDA_parse_cubin}" - ) - -endif() diff --git a/cmake/thirdparty/FindCUDA/select_compute_arch.cmake b/cmake/thirdparty/FindCUDA/select_compute_arch.cmake deleted file mode 100644 index a96a8cac9b..0000000000 --- a/cmake/thirdparty/FindCUDA/select_compute_arch.cmake +++ /dev/null @@ -1,197 +0,0 @@ -# Synopsis: -# CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures]) -# -- Selects GPU arch flags for nvcc based on target_CUDA_architectures -# target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...) -# - "Auto" detects local machine GPU compute arch at runtime. -# - "Common" and "All" cover common and entire subsets of architectures -# ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX -# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal -# NUM: Any number. Only those pairs are currently accepted by NVCC though: -# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 -# Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable} -# Additionally, sets ${out_variable}_readable to the resulting numeric list -# Example: -# CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell) -# LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS}) -# -# More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA -# - -# This list will be used for CUDA_ARCH_NAME = All option -set(CUDA_KNOWN_GPU_ARCHITECTURES "Fermi" "Kepler" "Maxwell") - -# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default) -set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5" "5.0") - -if (CUDA_VERSION VERSION_GREATER "6.5") - list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra") - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2") -endif () - -if (CUDA_VERSION VERSION_GREATER "7.5") - list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal") - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1" "6.1+PTX") -else() - list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX") -endif () - - - -################################################################################################ -# A function for automatic detection of GPUs installed (if autodetection is enabled) -# Usage: -# CUDA_DETECT_INSTALLED_GPUS(OUT_VARIABLE) -# -function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE) - if(NOT CUDA_GPU_DETECT_OUTPUT) - set(file ${PROJECT_BINARY_DIR}/detect_cuda_compute_capabilities.cpp) - - file(WRITE ${file} "" - "#include \n" - "#include \n" - "int main()\n" - "{\n" - " int count = 0;\n" - " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" - " if (count == 0) return -1;\n" - " for (int device = 0; device < count; ++device)\n" - " {\n" - " cudaDeviceProp prop;\n" - " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" - " std::printf(\"%d.%d \", prop.major, prop.minor);\n" - " }\n" - " return 0;\n" - "}\n") - - try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file} - CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}" - LINK_LIBRARIES ${CUDA_LIBRARIES} - RUN_OUTPUT_VARIABLE compute_capabilities) - - if(run_result EQUAL 0) - string(REPLACE "2.1" "2.1(2.0)" compute_capabilities "${compute_capabilities}") - set(CUDA_GPU_DETECT_OUTPUT ${compute_capabilities} - CACHE INTERNAL "Returned GPU architectures from detect_gpus tool" FORCE) - endif() - endif() - - if(NOT CUDA_GPU_DETECT_OUTPUT) - message(STATUS "Automatic GPU detection failed. Building for common architectures.") - set(${OUT_VARIABLE} ${CUDA_COMMON_GPU_ARCHITECTURES} PARENT_SCOPE) - else() - set(${OUT_VARIABLE} ${CUDA_GPU_DETECT_OUTPUT} PARENT_SCOPE) - endif() -endfunction() - - -################################################################################################ -# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list -# Usage: -# SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs]) -function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable) - set(CUDA_ARCH_LIST "${ARGN}") - - if("X${CUDA_ARCH_LIST}" STREQUAL "X" ) - set(CUDA_ARCH_LIST "Auto") - endif() - - set(cuda_arch_bin) - set(cuda_arch_ptx) - - if("${CUDA_ARCH_LIST}" STREQUAL "All") - set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES}) - elseif("${CUDA_ARCH_LIST}" STREQUAL "Common") - set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES}) - elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto") - CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST) - message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}") - endif() - - # Now process the list and look for names - string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}") - list(REMOVE_DUPLICATES CUDA_ARCH_LIST) - foreach(arch_name ${CUDA_ARCH_LIST}) - set(arch_bin) - set(add_ptx FALSE) - # Check to see if we are compiling PTX - if(arch_name MATCHES "(.*)\\+PTX$") - set(add_ptx TRUE) - set(arch_name ${CMAKE_MATCH_1}) - endif() - if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$") - set(arch_bin ${CMAKE_MATCH_1}) - set(arch_ptx ${arch_bin}) - else() - # Look for it in our list of known architectures - if(${arch_name} STREQUAL "Fermi") - set(arch_bin 2.0 "2.1(2.0)") - elseif(${arch_name} STREQUAL "Kepler+Tegra") - set(arch_bin 3.2) - elseif(${arch_name} STREQUAL "Kepler+Tesla") - set(arch_bin 3.7) - elseif(${arch_name} STREQUAL "Kepler") - set(arch_bin 3.0 3.5) - set(arch_ptx 3.5) - elseif(${arch_name} STREQUAL "Maxwell+Tegra") - set(arch_bin 5.3) - elseif(${arch_name} STREQUAL "Maxwell") - set(arch_bin 5.0 5.2) - set(arch_ptx 5.2) - elseif(${arch_name} STREQUAL "Pascal") - set(arch_bin 6.0 6.1) - set(arch_ptx 6.1) - else() - message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS") - endif() - endif() - if(NOT arch_bin) - message(SEND_ERROR "arch_bin wasn't set for some reason") - endif() - list(APPEND cuda_arch_bin ${arch_bin}) - if(add_ptx) - if (NOT arch_ptx) - set(arch_ptx ${arch_bin}) - endif() - list(APPEND cuda_arch_ptx ${arch_ptx}) - endif() - endforeach() - - # remove dots and convert to lists - string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") - string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}") - string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") - string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") - - if(cuda_arch_bin) - list(REMOVE_DUPLICATES cuda_arch_bin) - endif() - if(cuda_arch_ptx) - list(REMOVE_DUPLICATES cuda_arch_ptx) - endif() - - set(nvcc_flags "") - set(nvcc_archs_readable "") - - # Tell NVCC to add binaries for the specified GPUs - foreach(arch ${cuda_arch_bin}) - if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") - # User explicitly specified ARCH for the concrete CODE - list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) - list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) - else() - # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE - list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) - list(APPEND nvcc_archs_readable sm_${arch}) - endif() - endforeach() - - # Tell NVCC to add PTX intermediate code for the specified architectures - foreach(arch ${cuda_arch_ptx}) - list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) - list(APPEND nvcc_archs_readable compute_${arch}) - endforeach() - - string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") - set(${out_variable} ${nvcc_flags} PARENT_SCOPE) - set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) -endfunction() diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 605092201a..2d92016360 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -1,3 +1,45 @@ +############################################################################### +# Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +# +# Produced at the Lawrence Livermore National Laboratory +# +# LLNL-CODE-689114 +# +# All rights reserved. +# +# This file is part of RAJA. +# +# For additional details, please also read RAJA/LICENSE. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the disclaimer below. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the disclaimer (as noted below) in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of the LLNS/LLNL nor the names of its contributors may +# be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, +# LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +############################################################################### + add_custom_target(docs) if (SPHINX_FOUND) @@ -9,6 +51,6 @@ if (DOXYGEN_FOUND) endif() if ( NOT SPHINX_FOUND AND NOT DOXGEN_FOUND) - message(WARNING "RAJA_ENABLE_DOCUMENTATION=On, but Sphinx or Doxygen not found. \ + message(WARNING "ENABLE_DOCUMENTATION=On, but Sphinx or Doxygen not found. \ Documentation won't be built.") endif () diff --git a/docs/Licenses/libc++ License b/docs/Licenses/libc++ License new file mode 100644 index 0000000000..c278f2c928 --- /dev/null +++ b/docs/Licenses/libc++ License @@ -0,0 +1,76 @@ +============================================================================== +libc++ License +============================================================================== + +The libc++ library is dual licensed under both the University of Illinois +"BSD-Like" license and the MIT license. As a user of this code you may choose +to use it under either license. As a contributor, you agree to allow your code +to be used under both. + +Full text of the relevant licenses is included below. + +============================================================================== + +University of Illinois/NCSA +Open Source License + +Copyright (c) 2009-2017 by the contributors listed in CREDITS.TXT + +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== + +Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py index e6d5921a59..cb055c9afa 100644 --- a/docs/sphinx/conf.py +++ b/docs/sphinx/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short X.Y version. -version = u'0.3' +version = u'0.4' # The full version, including alpha/beta/rc tags. -release = u'0.3.1' +release = u'0.4.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/sphinx/config_build.rst b/docs/sphinx/config_build.rst index b60c3daebf..592caf5526 100644 --- a/docs/sphinx/config_build.rst +++ b/docs/sphinx/config_build.rst @@ -80,7 +80,7 @@ so all options propagate through the build process consistently. These variables are turned on and off similar to standard CMake variables; e.g., to enable RAJA OpenMP functionality, add this CMake option :: - -DRAJA_ENABLE_OPENMP=On + -DENABLE_OPENMP=On The following list describes the RAJA CMake variables and their defaults. @@ -92,7 +92,7 @@ The following list describes the RAJA CMake variables and their defaults. ====================== ====================== Variable Default ====================== ====================== - RAJA_ENABLE_TESTS On + ENABLE_TESTS On ====================== ====================== * **Programming Models** @@ -103,8 +103,8 @@ The following list describes the RAJA CMake variables and their defaults. ====================== ====================== Variable Default ====================== ====================== - RAJA_ENABLE_OPENMP On - RAJA_ENABLE_CUDA Off + ENABLE_OPENMP On + ENABLE_CUDA Off ====================== ====================== * **Data Types, Sizes, Alignment Parameters, etc.** @@ -251,7 +251,7 @@ The following list describes the RAJA CMake variables and their defaults. ============================= ======================================== Variable Meaning ============================= ======================================== - RAJA_ENABLE_NESTED Enable/disable nested loop functionality + ENABLE_NESTED Enable/disable nested loop functionality ============================= ======================================== RAJA has an experimental loop-level fault tolerance model which is @@ -260,7 +260,7 @@ The following list describes the RAJA CMake variables and their defaults. ============================= ======================================== Variable Meaning ============================= ======================================== - RAJA_ENABLE_FT Enable/disable fault-tolerance mechanism + ENABLE_FT Enable/disable fault-tolerance mechanism RAJA_REPORT_FT Enable/disable a report of fault- tolerance enabled run (e.g., number of faults detected, recovered from, diff --git a/docs/sphinx/raja_license.rst b/docs/sphinx/raja_license.rst index db271a6bee..b4e7e80404 100644 --- a/docs/sphinx/raja_license.rst +++ b/docs/sphinx/raja_license.rst @@ -13,7 +13,7 @@ RAJA License =================================== -RAJA version 0.3.1 +RAJA version 0.4.0 Copyright (c) 2016, Lawrence Livermore National Security, LLC. diff --git a/docs/style_guide.md b/docs/style_guide.md index 212d7363ec..cbb0553c35 100644 --- a/docs/style_guide.md +++ b/docs/style_guide.md @@ -1,3 +1,27 @@ +# CAMP + +## Type classes + +### Expressions + +An expression is a template of the form: + +```c++ +template +struct expr_s { +}; +// OR +template +using expr = typename expr_s::type; +``` + +Generically it is an un-expanded template type that accepts one or more template +typename parameters. + +### Values + +Any complete type is a value + # Concepts ### namespaces: diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ad015af6eb..bf278953ec 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,6 +1,6 @@ ############################################################################### # -# Copyright (c) 2016, Lawrence Livermore National Security, LLC. +# Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. # # Produced at the Lawrence Livermore National Laboratory # @@ -10,38 +10,34 @@ # # This file is part of RAJA. # -# For additional details, please also read RAJA/LICENSE. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the disclaimer below. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the disclaimer (as noted below) in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the name of the LLNS/LLNL nor the names of its contributors may -# be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -# LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. +# For details about use and distribution, please read RAJA/LICENSE. # ############################################################################### raja_add_executable( - NAME example-raja-pi - SOURCES pi.cpp) + NAME example-pi + SOURCES example-pi.cpp) + +raja_add_executable( + NAME example-add-vectors + SOURCES example-add-vectors.cpp) + +raja_add_executable( + NAME example-matrix-multiply + SOURCES example-matrix-multiply.cpp) + +raja_add_executable( + NAME example-jacobi + SOURCES example-jacobi.cpp) + +raja_add_executable( + NAME example-wave + SOURCES example-wave.cpp) + +raja_add_executable( + NAME example-custom-index + SOURCES example-custom-index.cpp) +raja_add_executable( + NAME example-gauss-seidel + SOURCES example-gauss-seidel.cpp) diff --git a/examples/example-add-vectors.cpp b/examples/example-add-vectors.cpp new file mode 100644 index 0000000000..7dd73ba849 --- /dev/null +++ b/examples/example-add-vectors.cpp @@ -0,0 +1,171 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include +#include + +#include "memoryManager.hpp" + +#include "RAJA/RAJA.hpp" +#include "RAJA/util/defines.hpp" + +/* + Example 1: Adding Two Vectors + + ----[Details]--------------------- + Starting with a C++ style for loop, this example illustrates + how to construct RAJA versions of the same loop with different + execution policies. + + In this example, three integer arrays (A,B,C) are allocated + using the templated memory manager found in this folder. + The vectors A and B are initialized to have opposite values + and thus when the entries are added the result should be zero. + The result of the vector addition is stored in C. The function + checkSolution is used to verify correctness. + + -----[RAJA Concepts]--------------- + 1. Introduction of the forall loop and basic RAJA policies + + RAJA::forall(iter_space I, [=] (index_type i)) { + + //body + + }); + + [=] By-copy capture + [&] By-reference capture (for non-unified memory targets) + exec_policy - Specifies how the traversal occurs + iter_space - Iteration space for RAJA loop (any random access container is + expected) + index_type - Index for RAJA loops + + ----[Kernel Variants and RAJA Features]------------ + a. C++ style for loop + b. RAJA style for loop with sequential iterations + i. Introduces the seq_exec policy + ii. Introduces RAJA::RangeSegment + c. RAJA style for loop with omp parallelism + i. Introduces the omp_parallel_for_exec policy + d. RAJA style for loop with CUDA parallelism + i. Introduces the cuda_exec policy + */ + +/* + CUDA_BLOCK_SIZE - specifies the number of threads in a CUDA thread block +*/ +#if defined(RAJA_ENABLE_CUDA) +const int CUDA_BLOCK_SIZE = 256; +#endif + +/* + Function to verify correctness +*/ +void checkSolution(int *C, int in_N); + +int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) +{ + + printf("Example 1: Adding Two Vectors \n \n"); + + const int N = 1000; + int *A = memoryManager::allocate(N); + int *B = memoryManager::allocate(N); + int *C = memoryManager::allocate(N); + + for (int i = 0; i < N; ++i) { + A[i] = -i; + B[i] = i; + } + + printf("Standard C++ Loop \n"); + for (int i = 0; i < N; ++i) { + C[i] = A[i] + B[i]; + } + checkSolution(C, N); + + + printf("RAJA: Sequential Policy \n"); + /* + RAJA::seq_exec - Executes the loop sequentially + + RAJA::RangeSegment(start,stop) - Generates a contiguous sequence of numbers + by the [start, stop) interval specified + */ + RAJA::forall( + RAJA::RangeSegment(0, N), [=](RAJA::Index_type i) { + + C[i] = A[i] + B[i]; + + }); + checkSolution(C, N); + + +#if defined(RAJA_ENABLE_OPENMP) + printf("RAJA: OpenMP Policy \n"); + /* + RAJA::omp_parallel_for_exec - executes the forall loop using the + #pragma omp parallel for directive + */ + RAJA::forall( + RAJA::RangeSegment(0, N), [=](RAJA::Index_type i) { + + C[i] = A[i] + B[i]; + + }); + checkSolution(C, N); +#endif + + +#if defined(RAJA_ENABLE_CUDA) + printf("RAJA: CUDA Policy \n"); + /* + RAJA::cuda_exec - excecutes loop using the CUDA API + Here the __device__ keyword is used to specify a CUDA kernel + */ + RAJA::forall> + (RAJA::RangeSegment(0, N), [=] __device__(RAJA::Index_type i) { + + C[i] = A[i] + B[i]; + + }); + checkSolution(C, N); +#endif + + memoryManager::deallocate(A); + memoryManager::deallocate(B); + memoryManager::deallocate(C); + + return 0; +} + +/* + Function to check for correctness +*/ +void checkSolution(int *C, int in_N) +{ + + RAJA::forall + (RAJA::RangeSegment(0, in_N), [=](RAJA::Index_type i) { + + if (std::abs(C[i]) != 0) { + printf("Error in Result \n \n"); + return; + } + + }); + + printf("Correct Result \n \n"); +} diff --git a/examples/example-custom-index.cpp b/examples/example-custom-index.cpp new file mode 100644 index 0000000000..4e2ca06b3b --- /dev/null +++ b/examples/example-custom-index.cpp @@ -0,0 +1,143 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include +#include +#include +#include + +#include "RAJA/RAJA.hpp" +#include "RAJA/util/defines.hpp" +#include "RAJA/index/RangeSegment.hpp" + +const int DIM = 2; + +/* + Example 5: Custom Index Set + + ----[Details]------------------- + This example illustrates how to construct a custom + iteration space composed of segments. Here a segment + is an arbitrary collection of indices. + + Assuming a grid with the following contents + + grid = [1, 2, 1, 2, + 3, 4, 3, 4, + 1, 2, 1, 2, + 3, 4, 3, 4]; + + The following code will construct four segments wherein + each segment will store indices corresponding to a particular + value on the grid. For example the first segment will store the + indices {0,2,8,10} corresponding to the location of values equal to 1. + + --------[RAJA Concepts]--------- + 1. Constructing custom IndexSets + 2. RAJA::View - RAJA's wrapper for multidimensional indexing + 3. RAJA::ListSegment - Container for an arbitrary collection of indices + 4. RAJA::TypedListSegment - Container for an arbitrary collection of typed + indices + 5. RAJA::StaticIndexSet - Container for an index set which is a collection + of + ListSegments +*/ +int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) +{ + + printf("Example 5. Custom Index Set \n"); + int n = 4; + int *A = new int[n * n]; + + auto init = {1, 2, 1, 2, 3, 4, 3, 4, 1, 2, 1, 2, 3, 4, 3, 4}; + + std::copy(init.begin(), init.end(), A); + + /* + The template arguments for StaticIndexSet enables the user to indicate + the required storage types of various segments. In this example, + we only need to store TypedListSegment (aka ListSegment) + */ + RAJA::StaticIndexSet> colorset; + + /* + RAJA::View - RAJA's wrapper for multidimensional indexing + */ + RAJA::View> Aview(A, n, n); + + /* + Buffer used for intermediate indices storage + */ + auto *idx = new RAJA::Index_type[(n + 1) * (n + 1) / 4]; + + /* + Iterate over each dimension (DIM=2 for this example) + */ + for (int xdim : {0, 1}) { + for (int ydim : {0, 1}) { + + RAJA::Index_type count = 0; + + /* + Iterate over each extent in each dimension, incrementing by two to + safely advance over neighbors + */ + for (int xiter = xdim; xiter < n; xiter += 2) { + for (int yiter = ydim; yiter < n; yiter += 2) { + + /* + Add the computed index to the buffer + */ + idx[count] = std::distance(std::addressof(Aview(0, 0)), + std::addressof(Aview(xiter, yiter))); + ++count; + } + } + + /* + RAJA::ListSegment - creates a list segment from a given array with a + specific length. + + Here the indicies are inserted from the buffer as a new ListSegment. + */ + colorset.push_back(RAJA::ListSegment(idx, count)); + } + } + + delete[] idx; + + +/* + -----[RAJA Loop Traversal]------- + Under the custom color policy, a RAJA forall loop will transverse + through each list segment stored in the colorset sequentially and transverse + each segment in parallel (if enabled). + */ +#if defined(RAJA_ENABLE_OPENMP) + using ColorPolicy = + RAJA::ExecPolicy; +#else + using ColorPolicy = RAJA::ExecPolicy; +#endif + + RAJA::forall( + colorset, [=](int idx) { + + printf("A[%d] = %d\n", idx, A[idx]); + + }); + + return 0; +} diff --git a/examples/example-gauss-seidel.cpp b/examples/example-gauss-seidel.cpp new file mode 100644 index 0000000000..2c0e343fad --- /dev/null +++ b/examples/example-gauss-seidel.cpp @@ -0,0 +1,256 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include +#include +#include + +#include +#include + +#include "RAJA/RAJA.hpp" +#include "RAJA/util/defines.hpp" + +#include "memoryManager.hpp" + +/* + Example 6: Gauss-Seidel with Red-Black Ordering + + ----[Details]-------------------- + This example is an extension of Example 3. + In particular we maintain the five point stencil + to discretize the boundary value problem + + U_xx + U_yy = f on [0,1] x [0,1] + + on a structured grid. The right-hand side is + chosen to be f = 2*x*(y-1)*(y-2*x+x*y+2)*exp(x-y). + + Rather than computing values inside the domain with + the Jacobi method, a Gauss-Seidel method with red-black + ordering is now used. + + The scheme is implemented by treating the grid as + a checker board and storing the indices of red and + black cells in RAJA list segments. The segments are + then stored in a RAJA static index set. + + ----[RAJA Concepts]--------------- + 1. Forall loop + 2. RAJA Reduction + 3. RAJA::omp_collapse_nowait_exec + 4. RAJA::ListSegment + 5. RAJA::StaticIndexSet +*/ + +/* + Struct to hold grid info + o - Origin in a cartesian dimension + h - Spacing between grid points + n - Number of grid points + */ +struct grid_s { + double o, h; + int n; +}; + +/* + ----[Functions]--------- + solution - Function for the analytic solution + computeErr - Displays the maximum error in the solution + gsColorPolicy - Generates the custom index set for this example +*/ +double solution(double x, double y); +void computeErr(double *I, grid_s grid); +RAJA::StaticIndexSet gsColorPolicy(int N); + +int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) +{ + + printf("Example 6: Red-Black Gauss-Seidel \n"); + + /* + ----[Solver Parameters]------------ + tol - Method terminates once the norm is less than tol + N - Number of unknown gridpoints per cartesian dimension + NN - Total number of gridpoints on the grid + maxIter - Maximum number of iterations to be taken + + resI2 - Residual + iteration - Iteration number + grid_s - Struct with grid information for a cartesian dimension + */ + double tol = 1e-10; + + int N = 100; + int NN = (N + 2) * (N + 2); + int maxIter = 100000; + + double resI2; + int iteration; + + grid_s gridx; + gridx.o = 0.0; + gridx.h = 1.0 / (N + 1.0); + gridx.n = N + 2; + + double *I = memoryManager::allocate(NN); + + memset(I, 0, NN * sizeof(double)); + + RAJA::StaticIndexSet colorSet = gsColorPolicy(N); + + memset(I, 0, NN * sizeof(double)); + printf("Gauss-Seidel with Red and Black Ordering \n"); + +#if defined(RAJA_ENABLE_OPENMP) + using colorPolicy = + RAJA::ExecPolicy; +#else + using colorPolicy = RAJA::ExecPolicy; +#endif + + resI2 = 1; + iteration = 0; + while (resI2 > tol * tol) { + +#if defined(RAJA_ENABLE_OPENMP) + RAJA::ReduceSum RAJA_resI2(0.0); +#else + RAJA::ReduceSum RAJA_resI2(0.0); +#endif + + /* + Gauss-Seidel Iteration + */ + RAJA::forall( + colorSet, [=](RAJA::Index_type id) { + + /* + Compute x,y grid index + */ + int m = id % (N + 2); + int n = id / (N + 2); + + double x = gridx.o + m * gridx.h; + double y = gridx.o + n * gridx.h; + + double f = gridx.h * gridx.h + * (2 * x * (y - 1) * (y - 2 * x + x * y + 2) * exp(x - y)); + + double newI = -0.25 * (f - I[id - N - 2] - I[id + N + 2] - I[id - 1] + - I[id + 1]); + + double oldI = I[id]; + RAJA_resI2 += (newI - oldI) * (newI - oldI); + I[id] = newI; + + }); + resI2 = RAJA_resI2; + + if (iteration > maxIter) { + printf("Gauss-Seidel Maxed out on iterations \n"); + break; + } + + iteration++; + } + computeErr(I, gridx); + printf("No of iterations: %d \n \n", iteration); + + + memoryManager::deallocate(I); + + return 0; +} + +/* + This function will loop over the red and black cells of a grid + and store the index in a buffer. The buffers will then be used + to generate RAJA ListSegments and populate a RAJA Static Index + Set. +*/ +RAJA::StaticIndexSet gsColorPolicy(int N) +{ + + RAJA::StaticIndexSet colorSet; + + int redN = ceil(N * N / 2); + int blkN = floor(N * N / 2); + RAJA::Index_type *Red = new RAJA::Index_type[redN]; + RAJA::Index_type *Blk = new RAJA::Index_type[blkN]; + + + int ib = 0; + int ir = 0; + + bool isRed = true; + for (int n = 1; n <= N; ++n) { + for (int m = 1; m <= N; ++m) { + + RAJA::Index_type id = n * (N + 2) + m; + if (isRed) { + Red[ib] = id; + ib++; + } else { + Blk[ir] = id; + ir++; + } + isRed = !isRed; + } + } + // Create Index + colorSet.push_back(RAJA::ListSegment(Blk, blkN)); + colorSet.push_back(RAJA::ListSegment(Red, redN)); + delete[] Blk; + delete[] Red; + + return colorSet; +} + + +/* + Function for the anlytic solution +*/ +double solution(double x, double y) +{ + return x * y * exp(x - y) * (1 - x) * (1 - y); +} + +/* + Error is computed via ||I_{approx}(:) - U_{analytic}(:)||_{inf} +*/ +void computeErr(double *I, grid_s grid) +{ + + RAJA::RangeSegment fdBounds(0, grid.n); + RAJA::ReduceMax tMax(-1.0); + using myPolicy = + RAJA::NestedPolicy>; + + RAJA::forallN( + fdBounds, fdBounds, [=](RAJA::Index_type ty, RAJA::Index_type tx) { + + int id = tx + grid.n * ty; + double x = grid.o + tx * grid.h; + double y = grid.o + ty * grid.h; + double myErr = std::abs(I[id] - solution(x, y)); + tMax.max(myErr); + + }); + + double l2err = tMax; + printf("Max error = %lg, h = %f \n", l2err, grid.h); +} diff --git a/examples/example-jacobi.cpp b/examples/example-jacobi.cpp new file mode 100644 index 0000000000..8e9fc888fe --- /dev/null +++ b/examples/example-jacobi.cpp @@ -0,0 +1,413 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include +#include +#include + +#include +#include + +#include "RAJA/RAJA.hpp" +#include "RAJA/util/defines.hpp" + +#include "memoryManager.hpp" + +/* + Example 3: Jacobi Method + + ----[Details]-------------------- + This code uses a five point finite difference stencil + to discretize the following boundary value problem + + U_xx + U_yy = f on [0,1] x [0,1]. + + The right-hand side is chosen to be + f = 2*x*(y-1)*(y-2*x+x*y+2)*exp(x-y). + + A structured grid is used to discretize the domain + [0,1] x [0,1]. Values inside the domain are computed + using the Jacobi method to solve the associated + linear system. The scheme is invoked until the l_2 + difference of subsequent iterations is below a + tolerance. + + The scheme is implemented by allocating two arrays + (I, Iold) and initialized to zero. The first set of + nested for loops apply an iteration of the Jacobi + scheme. As boundary values are already known the + scheme is only applied to the interior nodes. + + The second set of nested for loops is used to + update Iold and compute the l_2 norm of the + difference of the iterates. + + Computing the l_2 norm requires a reduction operation. + To simplify the reduction procedure, the RAJA API + introduces thread safe variables. + + ----[RAJA Concepts]--------------- + 1. ForallN loop + 2. RAJA Reduction + 3. RAJA::omp_collapse_nowait_exec + + ----[Kernel Variants and RAJA Features]--- + a. C++ style nested for loops + b. RAJA style nested for loops with sequential iterations + i. Introduces RAJA reducers for sequential policies + c. RAJA style nested for loops with omp parallelism + i. Introduces collapsing loops using RAJA omp policies + ii. Introduces RAJA reducers for omp policies + d. RAJA style for loop with CUDA parallelism + i. Introduces RAJA reducers for cuda policies +*/ + + +/* + ----[Constant Values]----- + CUDA_BLOCK_SIZE_X - Number of threads in the + x-dimension of a cuda thread block + + CUDA_BLOCK_SIZE_Y - Number of threads in the + y-dimension of a cuda thread block + + CUDA_BLOCK_SIZE - Number of threads per threads block +*/ +#if defined(RAJA_ENABLE_CUDA) +const int CUDA_BLOCK_DIM_X = 16; +const int CUDA_BLOCK_DIM_Y = 16; +const int CUDA_BLOCK_SIZE = 256; +#endif + + +/* + Struct to hold grid info + o - Origin in a cartesian dimension + h - Spacing between grid points + n - Number of grid points + */ +struct grid_s { + double o, h; + int n; +}; + +/* + ----[Functions]--------- + solution - Function for the analytic solution + computeErr - Displays the maximum error in the solution +*/ +double solution(double x, double y); +void computeErr(double *I, grid_s grid); + +int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) +{ + + printf("Example 3: Jacobi Method \n"); + + /* + ----[Solver Parameters]------------ + tol - Method terminates once the norm is less than tol + N - Number of unknown gridpoints per cartesian dimension + NN - Total number of gridpoints on the grid + maxIter - Maximum number of iterations to be taken + + resI2 - Residual + iteration - Iteration number + grid_s - Struct with grid information for a cartesian dimension + */ + double tol = 1e-10; + + int N = 50; + int NN = (N + 2) * (N + 2); + int maxIter = 100000; + + double resI2; + int iteration; + + grid_s gridx; + gridx.o = 0.0; + gridx.h = 1.0 / (N + 1.0); + gridx.n = N + 2; + + /* + I, Iold - Holds iterates of Jacobi method + */ + double *I = memoryManager::allocate(NN); + double *Iold = memoryManager::allocate(NN); + + + memset(I, 0, NN * sizeof(double)); + memset(Iold, 0, NN * sizeof(double)); + + + printf("Standard C++ Loop \n"); + resI2 = 1; + iteration = 0; + + while (resI2 > tol * tol) { + + /* + Jacobi Iteration + */ + for (int n = 1; n <= N; ++n) { + for (int m = 1; m <= N; ++m) { + + double x = gridx.o + m * gridx.h; + double y = gridx.o + n * gridx.h; + + double f = gridx.h * gridx.h + * (2 * x * (y - 1) * (y - 2 * x + x * y + 2) * exp(x - y)); + + int id = n * (N + 2) + m; + I[id] = -0.25 * (f - Iold[id - N - 2] - Iold[id + N + 2] - Iold[id - 1] + - Iold[id + 1]); + } + } + + /* + Compute residual and update Iold + */ + resI2 = 0.0; + for (int k = 0; k < NN; k++) { + resI2 += (I[k] - Iold[k]) * (I[k] - Iold[k]); + Iold[k] = I[k]; + } + + if (iteration > maxIter) { + printf("Standard C++ Loop - Maxed out on iterations \n"); + exit(-1); + } + + iteration++; + } + computeErr(I, gridx); + printf("No of iterations: %d \n \n", iteration); + + + /* + RAJA loop calls may be shortened by predefining policies + */ + RAJA::RangeSegment gridRange(0, NN); + RAJA::RangeSegment jacobiRange(1, (N + 1)); + using jacobiSeqNestedPolicy = + RAJA::NestedPolicy>; + + printf("RAJA: Sequential Policy - Nested ForallN \n"); + resI2 = 1; + iteration = 0; + memset(I, 0, NN * sizeof(double)); + memset(Iold, 0, NN * sizeof(double)); + + while (resI2 > tol * tol) { + + /* + Jacobi Iteration + */ + RAJA::forallN( + jacobiRange, jacobiRange, [=](RAJA::Index_type m, RAJA::Index_type n) { + + double x = gridx.o + m * gridx.h; + double y = gridx.o + n * gridx.h; + + double f = gridx.h * gridx.h + * (2 * x * (y - 1) * (y - 2 * x + x * y + 2) * exp(x - y)); + + int id = n * (N + 2) + m; + I[id] = + -0.25 * (f - Iold[id - N - 2] - Iold[id + N + 2] - Iold[id - 1] + - Iold[id + 1]); + }); + + /* + ----[Reduction step]--------- + The RAJA API introduces a thread-safe accumulation variable + "ReduceSum" in order to perform reductions + */ + RAJA::ReduceSum RAJA_resI2(0.0); + RAJA::forall( + gridRange, [=](RAJA::Index_type k) { + + RAJA_resI2 += (I[k] - Iold[k]) * (I[k] - Iold[k]); + Iold[k] = I[k]; + + }); + + resI2 = RAJA_resI2; + if (iteration > maxIter) { + printf("RAJA: Sequential - Maxed out on iterations! \n"); + exit(-1); + } + iteration++; + } + computeErr(I, gridx); + printf("No of iterations: %d \n \n", iteration); + + +#if defined(RAJA_ENABLE_OPENMP) + printf("RAJA: OpenMP Policy - Nested ForallN \n"); + resI2 = 1; + iteration = 0; + memset(I, 0, NN * sizeof(double)); + memset(Iold, 0, NN * sizeof(double)); + + /* + ----[RAJA Policies]----------- + RAJA::omp_collapse_nowait_exec - + parallizes nested loops without introducing nested parallism + + RAJA::OMP_Parallel<> - Creates a parallel region, + must be the last argument of the nested policy list + */ + using jacobiompNestedPolicy = + RAJA::NestedPolicy, RAJA::OMP_Parallel<>>; + + while (resI2 > tol * tol) { + + /* + Jacobi Iteration + */ + RAJA::forallN( + jacobiRange, jacobiRange, [=](RAJA::Index_type m, RAJA::Index_type n) { + + double x = gridx.o + m * gridx.h; + double y = gridx.o + n * gridx.h; + + double f = gridx.h * gridx.h + * (2 * x * (y - 1) * (y - 2 * x + x * y + 2) * exp(x - y)); + + int id = n * (N + 2) + m; + I[id] = -0.25 * (f - Iold[id - N - 2] - Iold[id + N + 2] - Iold[id - 1] + - Iold[id + 1]); + }); + /* + Compute residual and update Iold + */ + RAJA::ReduceSum RAJA_resI2(0.0); + RAJA::forall( + gridRange, [=](RAJA::Index_type k) { + + RAJA_resI2 += (I[k] - Iold[k]) * (I[k] - Iold[k]); + Iold[k] = I[k]; + + }); + + resI2 = RAJA_resI2; + if (iteration > maxIter) { + printf("RAJA: OpenMP - Maxed out on iterations! \n"); + exit(-1); + } + iteration++; + } + computeErr(I, gridx); + printf("No of iterations: %d \n \n", iteration); +#endif + + +#if defined(RAJA_ENABLE_CUDA) + printf("RAJA: CUDA Policy - Nested ForallN \n"); + + using jacobiCUDANestedPolicy = RAJA::NestedPolicy, + RAJA::cuda_threadblock_x_exec>>; + + resI2 = 1; + iteration = 0; + memset(I, 0, NN * sizeof(double)); + memset(Iold, 0, NN * sizeof(double)); + + while (resI2 > tol * tol) { + + /* + Jacobi Iteration + */ + RAJA::forallN( + jacobiRange, jacobiRange, [=] __device__(RAJA::Index_type m, RAJA::Index_type n) { + + double x = gridx.o + m * gridx.h; + double y = gridx.o + n * gridx.h; + + double f = gridx.h * gridx.h + * (2 * x * (y - 1) * (y - 2 * x + x * y + 2) * exp(x - y)); + + int id = n * (N + 2) + m; + I[id] = -0.25 * (f - Iold[id - N - 2] - Iold[id + N + 2] - Iold[id - 1] + - Iold[id + 1]); + }); + + /* + Compute residual and update Iold + */ + RAJA::ReduceSum, double> RAJA_resI2(0.0); + RAJA::forall>( + gridRange, [=] __device__(RAJA::Index_type k) { + + RAJA_resI2 += (I[k] - Iold[k]) * (I[k] - Iold[k]); + Iold[k] = I[k]; + + }); + + resI2 = RAJA_resI2; + + if (iteration > maxIter) { + printf("RAJA: CUDA - Maxed out on iterations! \n"); + exit(-1); + } + iteration++; + } + cudaDeviceSynchronize(); + computeErr(I, gridx); + printf("No of iterations: %d \n \n", iteration); +#endif + + memoryManager::deallocate(I); + memoryManager::deallocate(Iold); + + + return 0; +} + +/* + Function for the anlytic solution +*/ +double solution(double x, double y) +{ + return x * y * exp(x - y) * (1 - x) * (1 - y); +} + +/* + Error is computed via ||I_{approx}(:) - U_{analytic}(:)||_{inf} +*/ +void computeErr(double *I, grid_s grid) +{ + + RAJA::RangeSegment fdBounds(0, grid.n); + RAJA::ReduceMax tMax(-1.0); + using myPolicy = + RAJA::NestedPolicy>; + + RAJA::forallN( + fdBounds, fdBounds, [=](RAJA::Index_type ty, RAJA::Index_type tx) { + + int id = tx + grid.n * ty; + double x = grid.o + tx * grid.h; + double y = grid.o + ty * grid.h; + double myErr = std::abs(I[id] - solution(x, y)); + tMax.max(myErr); + }); + + double l2err = tMax; + printf("Max error = %lg, h = %f \n", l2err, grid.h); +} diff --git a/examples/example-matrix-multiply.cpp b/examples/example-matrix-multiply.cpp new file mode 100644 index 0000000000..974caef4bf --- /dev/null +++ b/examples/example-matrix-multiply.cpp @@ -0,0 +1,318 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include +#include +#include +#include +#include + +#include "RAJA/RAJA.hpp" +#include "RAJA/index/RangeSegment.hpp" +#include "RAJA/util/defines.hpp" + +#include "memoryManager.hpp" + +/* + Example 2: Multiplying Two Matrices + + ----[Details]-------------------- + Starting with C++ style nested for loops, this example + illustrates how to construct RAJA versions of the same loops + using different execution policies. Furthermore, as nesting + RAJA forall loops are not currently supported with CUDA, + this example makes utility of RAJA's forallN loop which + may be used with any policy. + + In this example two matrices of dimension N x N are allocated and multiplied. + The matrix A is populated with a constant value along the rows while B is + populated with a constant value along the columns. The function checkSolution + checks for correctness. + + -----[RAJA Concepts]------------- + 1. Nesting forall loops (Not currently supported in CUDA) + + 2. ForallN loop (Supported with all policies) + + RAJA::forallN< + RAJA::NestedPolicy >( + iter_space I1,..., iter_space IN, [=](index_type i1,..., index_type iN) { + + //body + + }); + + [=] By-copy capture + [&] By-reference capture (for non-unified memory targets) + RAJA::NestedPolicy - Stores a list of RAJA execution policies + exec_policy - Specifies how the traversal occurs + iter_space - Iteration space for RAJA loop (any random access + container is expected) + + 3. RAJA::View - RAJA's wrapper for multidimensional indexing + + ----[Kernel Variants and RAJA Features]----- + a. C++ style nested for loops + b. RAJA style outer loop with a sequential policy + and a C++ style inner for loop + c. RAJA style nested for loops with sequential policies + d. RAJA forallN loop with sequential policies + i. This kernel introduces RAJA::ExecList + e. RAJA forallN loop with OpenMP parallism on the outer loop + f. RAJA forallN loop executed on the CUDA API + i. This kernel illustrates constructing two-dimensional thread blocks + for use of the CUDA execution policy. + ii. The current implementation of forallN using the CUDA + variant is performed asynchronously and thus a barrier + (cudaDeviceSynchronize) is placed after calling forallN. +*/ + +/* + ---[Constant values]---- + N - Defines the number of rows/columns in a matrix + NN - Total number of entries in a matrix + DIM - Dimension of the data structure in which the matrices + are stored + + CUDA_BLOCK_SIZE_X - Number of threads in the + x-dimension of a cuda thread block + + CUDA_BLOCK_SIZE_Y - Number of threads in the + y-dimension of a cuda thread block +*/ +const int N = 1000; +const int NN = N * N; +const int DIM = 2; + +#if defined(RAJA_ENABLE_CUDA) +const int CUDA_BLOCK_SIZE_X = 16; +const int CUDA_BLOCK_SIZE_Y = 16; +#endif + +/* + Macros are used here to simplify indexing +*/ +#define A(x1, x2) A[x1 + N * x2] +#define B(x1, x2) B[x1 + N * x2] +#define C(x1, x2) C[x1 + N * x2] + +template +void checkSolution(T *C, int N); + +template +void checkSolution(RAJA::View> Cview, int N); + +int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) +{ + + printf("Example 2: Multiplying Two N x N Matrices \n \n"); + double *A = memoryManager::allocate(NN); + double *B = memoryManager::allocate(NN); + double *C = memoryManager::allocate(NN); + + /* + Intialize matrices + */ + for (int row = 0; row < N; ++row) { + for (int col = 0; col < N; ++col) { + A(row, col) = row; + B(row, col) = col; + } + } + + printf("Standard C++ Nested Loops \n"); + for (int row = 0; row < N; ++row) { + for (int col = 0; col < N; ++col) { + + double dot = 0.0; + for (int k = 0; k < N; ++k) { + dot += A(row, k) * B(k, col); + } + + C(row, col) = dot; + } + } + checkSolution(C, N); + + /* + As an alternative to marcos RAJA::View wraps + a pointer to enable multi-dimensional indexing + In this example our data is assumed to be two-dimensional + with N values in each component. + */ + RAJA::View> Aview(A, N, N); + RAJA::View> Bview(B, N, N); + RAJA::View> Cview(C, N, N); + + /* + As the loops use the same bounds, we may specify + the bounds prior to the use of any RAJA loops + */ + RAJA::RangeSegment matBounds(0, N); + + + printf("RAJA: Forall - Sequential Policies\n"); + RAJA::forall( + matBounds, [=](RAJA::Index_type row) { + + for (int col = 0; col < N; ++col) { + + double dot = 0.0; + for (int k = 0; k < N; ++k) { + dot += Aview(row, k) * Bview(k, col); + } + + Cview(row, col) = dot; + } + + }); + checkSolution(Cview, N); + + printf("RAJA: Nested Forall - Sequential Policies\n"); + /* + Forall loops may be nested under sequential and omp policies + */ + RAJA::forall( + matBounds, [=](RAJA::Index_type row) { + + RAJA::forall( + matBounds, [=](RAJA::Index_type col) { + + + double dot = 0.0; + for (int k = 0; k < N; ++k) { + dot += Aview(row, k) * Bview(k, col); + } + + Cview(row, col) = dot; + }); + }); + checkSolution(Cview, N); + + + printf("RAJA: ForallN - Sequential Policies\n"); + /* + Nested forall loops may be collapsed into a single forallN loop + */ + RAJA::forallN>>( + matBounds, matBounds, [=](RAJA::Index_type row, RAJA::Index_type col) { + + double dot = 0.0; + for (int k = 0; k < N; ++k) { + dot += Aview(row, k) * Bview(k, col); + } + + Cview(row, col) = dot; + }); + checkSolution(Cview, N); + + +#if defined(RAJA_ENABLE_OPENMP) + printf("RAJA: ForallN - OpenMP/Sequential Policies\n"); + /* + Here the outer loop is excuted in parallel while the inner loop + is executed sequentially + */ + RAJA::forallN>>( + matBounds, matBounds, [=](RAJA::Index_type row, RAJA::Index_type col) { + + double dot = 0.0; + for (int k = 0; k < N; ++k) { + dot += Aview(row, k) * Bview(k, col); + } + + Cview(row, col) = dot; + }); + checkSolution(Cview, N); +#endif + + +#if defined(RAJA_ENABLE_CUDA) + printf("RAJA: ForallN - CUDA Policies\n"); + /* + This example illustrates creating two-dimensional thread blocks as described + under the CUDA nomenclature + */ + RAJA::forallN, + RAJA::cuda_threadblock_x_exec>>>( + matBounds, matBounds, [=] __device__(RAJA::Index_type row, RAJA::Index_type col) { + + double dot = 0.0; + for (int k = 0; k < N; ++k) { + dot += Aview(row, k) * Bview(k, col); + } + + Cview(row, col) = dot; + }); + cudaDeviceSynchronize(); + checkSolution(Cview, N); +#endif + + memoryManager::deallocate(A); + memoryManager::deallocate(B); + memoryManager::deallocate(C); + + return 0; +} + +/* + Function which checks for correctness +*/ +template +void checkSolution(RAJA::View> Cview, int in_N) +{ + + RAJA::forall( + RAJA::RangeSegment(0, N), [=](RAJA::Index_type row) { + + RAJA::forall( + RAJA::RangeSegment(0, N), [=](RAJA::Index_type col) { + + double diff = Cview(row, col) - row * col * in_N; + + if (std::abs(diff) > 1e-9) { + printf("Incorrect Result \n \n"); + return; + } + + }); + }); + printf("Correct Result \n \n"); +}; + +template +void checkSolution(T *C, int in_N) +{ + + RAJA::forall( + RAJA::RangeSegment(0, N), [=](RAJA::Index_type row) { + + RAJA::forall( + RAJA::RangeSegment(0, N), [=](RAJA::Index_type col) { + + double diff = C(row, col) - row * col * in_N; + + if (std::abs(diff) > 1e-9) { + printf("Incorrect Result \n \n"); + return; + } + + }); + }); + printf("Correct Result \n \n"); +}; diff --git a/examples/example-pi.cpp b/examples/example-pi.cpp new file mode 100644 index 0000000000..c3b414eaca --- /dev/null +++ b/examples/example-pi.cpp @@ -0,0 +1,42 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include +#include + +#include "RAJA/RAJA.hpp" +#include "RAJA/util/defines.hpp" + +int main(int RAJA_UNUSED_ARG(argc), char** RAJA_UNUSED_ARG(argv[])) +{ + typedef RAJA::seq_reduce reduce_policy; + typedef RAJA::seq_exec execute_policy; + + RAJA::Index_type begin = 0; + RAJA::Index_type numBins = 512 * 512; + + RAJA::ReduceSum piSum(0.0); + + RAJA::forall(begin, + numBins, + [=](int i) { + double x = (double(i) + 0.5) / numBins; + piSum += 4.0 / (1.0 + x * x); + }); + + std::cout << "PI is ~ " << double(piSum) / numBins << std::endl; + + return 0; +} diff --git a/examples/example-wave.cpp b/examples/example-wave.cpp new file mode 100644 index 0000000000..cd9ff125c7 --- /dev/null +++ b/examples/example-wave.cpp @@ -0,0 +1,288 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include +#include +#include +#include "memoryManager.hpp" + +#include "RAJA/RAJA.hpp" +#include "RAJA/util/defines.hpp" + +/* + Example 4: Time-Domain Finite Difference + Acoustic Wave Equation Solver + + ------[Details]---------------------- + This example highlights how to construct a single + kernel capable of being executed with different RAJA policies. + + Here we solve the acoustic wave equation + + P_tt = cc*(P_xx + P_yy) via finite differences. + + The scheme uses a second order central difference discretization + for time and a fourth order central difference discretization for space. + Periodic boundary conditions are assumed on the grid [-1,1] x [-1, 1]. + + NOTE: The x and y dimensions are discretized identically. + + ----[RAJA Concepts]------------------- + 1. RAJA kernels are portable and a single implemenation can run + on various platforms + + 2. RAJA MaxReduction - RAJA's implementation for computing a maximum value + (MinReduction computes the min) +*/ + +/* + ---[Constant Values]------- + sr - Radius of the finite difference stencil + PI - Value of pi + + CUDA_BLOCK_SIZE_X - Number of threads in the + x-dimension of a cuda thread block + CUDA_BLOCK_SIZE_Y - Number of threads in the + y-dimension of a cuda thread block +*/ + +const int sr = 2; +const double PI = 3.14159265359; + +#if defined(RAJA_ENABLE_CUDA) +const int CUDA_BLOCK_DIM_X = 16; +const int CUDA_BLOCK_DIM_Y = 16; +#endif + +/* + ----[Struct to hold grid info]----- + o - Origin in a cartesian dimension + h - Spacing between grid points + n - Number of grid points + */ +struct grid_s { + double ox, dx; + int nx; +}; + + +/* + ----[Functions]------ + wave - Templated wave propagator + waveSol - Function for the analytic solution of the equation + setIC - Sets the intial value at two time levels (t0,t1) + computeErr - Displays the maximum error in the approximation + */ + +template +void wave(T *P1, T *P2, RAJA::RangeSegment fdBounds, double ct, int nx); +double waveSol(double t, double x, double y); +void setIC(double *P1, double *P2, double t0, double t1, grid_s grid); +void computeErr(double *P, double tf, grid_s grid); + +int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[])) +{ + + printf("Example 4. Time-Domain Finite Difference Acoustic Wave Equation Solver \n"); + + /* + Wave speed squared + */ + double cc = 1. / 2.0; + + /* + Multiplier for spatial refinement + */ + int factor = 8; + + /* + Discretization of the domain. + The same discretization of the x-dimension wil be used for the y-dimension + */ + grid_s grid; + grid.ox = -1; + grid.dx = 0.1250 / factor; + grid.nx = 16 * factor; + RAJA::RangeSegment fdBounds(0, grid.nx); + + /* + Solution is propagated until time T + */ + double T = 0.82; + + + int entries = grid.nx * grid.nx; + double *P1 = memoryManager::allocate(entries); + double *P2 = memoryManager::allocate(entries); + + /* + ----[Time stepping parameters]---- + dt - Step size + nt - Total number of time steps + ct - Merged coefficents + */ + double dt, nt, time, ct; + dt = 0.01 * (grid.dx / sqrt(cc)); + nt = ceil(T / dt); + dt = T / nt; + ct = (cc * dt * dt) / (grid.dx * grid.dx); + + /* + Predefined Nested Policies + */ + + // Sequential + using fdPolicy = + RAJA::NestedPolicy>; + + // OpenMP + // using fdPolicy = + // RAJA::NestedPolicy, + // RAJA::OMP_Parallel<>>; + + // CUDA + // using fdPolicy + //= RAJA::NestedPolicy, + // RAJA::cuda_threadblock_x_exec>>; + + time = 0; + setIC(P1, P2, (time - dt), time, grid); + for (int k = 0; k < nt; ++k) { + + wave(P1, P2, fdBounds, ct, grid.nx); + time += dt; + + double *Temp = P2; + P2 = P1; + P1 = Temp; + } +#if defined(RAJA_ENABLE_CUDA) + cudaDeviceSynchronize(); +#endif + computeErr(P2, time, grid); + printf("Evolved solution to time = %f \n", time); + + memoryManager::deallocate(P1); + memoryManager::deallocate(P2); + + return 0; +} + + +/* + Function for the analytic solution +*/ +double waveSol(double t, double x, double y) +{ + return cos(2. * PI * t) * sin(2. * PI * x) * sin(2. * PI * y); +} + +/* + Error is computed via ||P_{approx}(:) - P_{analytic}(:)||_{inf} +*/ +void computeErr(double *P, double tf, grid_s grid) +{ + + RAJA::RangeSegment fdBounds(0, grid.nx); + RAJA::ReduceMax tMax(-1.0); + using myPolicy = + RAJA::NestedPolicy>; + + RAJA::forallN( + fdBounds, fdBounds, [=](RAJA::Index_type ty, RAJA::Index_type tx) { + + int id = tx + grid.nx * ty; + double x = grid.ox + tx * grid.dx; + double y = grid.ox + ty * grid.dx; + double myErr = std::abs(P[id] - waveSol(tf, x, y)); + + /* + tMax.max() is used to store the maximum value + */ + tMax.max(myErr); + }); + + double lInfErr = tMax; + printf("Max Error = %lg, dx = %f \n", lInfErr, grid.dx); +} + + +/* + Function to set intial condition +*/ +void setIC(double *P1, double *P2, double t0, double t1, grid_s grid) +{ + + using myPolicy = + RAJA::NestedPolicy>; + RAJA::RangeSegment fdBounds(0, grid.nx); + + RAJA::forallN( + fdBounds, fdBounds, [=](RAJA::Index_type ty, RAJA::Index_type tx) { + + int id = tx + ty * grid.nx; + double x = grid.ox + tx * grid.dx; + double y = grid.ox + ty * grid.dx; + + P1[id] = waveSol(t0, x, y); + P2[id] = waveSol(t1, x, y); + }); +} + +/* + Wave Propagator +*/ +template +void wave(T *P1, T *P2, RAJA::RangeSegment fdBounds, double ct, int nx) +{ + + RAJA::forallN( + fdBounds, fdBounds, [=] RAJA_HOST_DEVICE(RAJA::Index_type ty, RAJA::Index_type tx) { + + /* + Coefficients for a fourth order stencil + */ + double coeff[5] = { + -1.0 / 12.0, 4.0 / 3.0, -5.0 / 2.0, 4.0 / 3.0, -1.0 / 12.0}; + + const int id = tx + ty * nx; + double P_old = P1[id]; + double P_curr = P2[id]; + + /* + Computes Laplacian + */ + double lap = 0.0; + + for (auto r : RAJA::RangeSegment(-sr, sr + 1)) { + const int xi = (tx + r + nx) % nx; + const int idx = xi + nx * ty; + lap += coeff[r + sr] * P2[idx]; + + const int yi = (ty + r + nx) % nx; + const int idy = tx + nx * yi; + lap += coeff[r + sr] * P2[idy]; + } + + /* + Writes out result + */ + P1[id] = 2 * P_curr - P_old + ct * lap; + + }); +} diff --git a/examples/memoryManager.hpp b/examples/memoryManager.hpp new file mode 100644 index 0000000000..d1d59bd8f3 --- /dev/null +++ b/examples/memoryManager.hpp @@ -0,0 +1,58 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. +// +// Produced at the Lawrence Livermore National Laboratory +// +// LLNL-CODE-689114 +// +// All rights reserved. +// +// This file is part of RAJA. +// +// For details about use and distribution, please read RAJA/LICENSE. +// +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#ifndef EXAMPLES_MEMORYMANAGER_HPP +#define EXAMPLES_MEMORYMANAGER_HPP + +#include "RAJA/RAJA.hpp" +#include "RAJA/util/defines.hpp" + +/* + As RAJA does not manage memory the user must allocate and deallocate memory. + + This header contains a general purpose memory manager which may be used + to perform c++ style allocation/deallocation or allocate/deallocate + CUDA unified memory. The type of memory allocated is dependent on how + RAJA was configured. +*/ +namespace memoryManager{ + + template + T *allocate(RAJA::Index_type size) + { + T *ptr; +#if defined(RAJA_ENABLE_CUDA) + cudaMallocManaged((void **)&ptr, sizeof(T) * size, cudaMemAttachGlobal); +#else + ptr = new T[size]; +#endif + return ptr; + } + + template + void deallocate(T *&ptr) + { + if (ptr) { +#if defined(RAJA_ENABLE_CUDA) + cudaFree(ptr); +#else + delete[] ptr; +#endif + ptr = nullptr; + } + } + +}; +#endif diff --git a/host-configs/bgqos/clang_3_9_0.cmake b/host-configs/bgqos/clang_3_9_0.cmake index bdaa2b69e5..500f833783 100644 --- a/host-configs/bgqos/clang_3_9_0.cmake +++ b/host-configs/bgqos/clang_3_9_0.cmake @@ -20,7 +20,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -ffast-math -std=c++ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 -ffast-math -std=c++11 -stdlib=libc++" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -std=c++11 -stdlib=libc++" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/bgqos/gcc_4_7_2.cmake b/host-configs/bgqos/gcc_4_7_2.cmake index e24bf51eb0..52b38fb105 100644 --- a/host-configs/bgqos/gcc_4_7_2.cmake +++ b/host-configs/bgqos/gcc_4_7_2.cmake @@ -20,7 +20,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -mcpu=a2 -mtune=a set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -Ofast -mcpu=a2 -mtune=a2 -finline-functions -finline-limit=20000 -std=c++11" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -std=c++11" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/chaos/clang.cmake b/host-configs/chaos/clang.cmake deleted file mode 120000 index d23b85548f..0000000000 --- a/host-configs/chaos/clang.cmake +++ /dev/null @@ -1 +0,0 @@ -clang_3_8_0.cmake \ No newline at end of file diff --git a/host-configs/chaos/clang.cmake b/host-configs/chaos/clang.cmake new file mode 100755 index 0000000000..f26830b7d1 --- /dev/null +++ b/host-configs/chaos/clang.cmake @@ -0,0 +1,29 @@ +## +## Copyright (c) 2016, Lawrence Livermore National Security, LLC. +## +## Produced at the Lawrence Livermore National Laboratory. +## +## LLNL-CODE-689114 +## +## All rights reserved. +## +## For release details and restrictions, please see RAJA/LICENSE. +## + +set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") + +set(CMAKE_CXX_COMPILER "/usr/global/tools/clang/chaos_5_x86_64_ib/clang-3.8.0/bin/clang++" CACHE PATH "") +set(CMAKE_C_COMPILER "/usr/global/tools/clang/chaos_5_x86_64_ib/clang-3.8.0/bin/clang" CACHE PATH "") + +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0" CACHE STRING "") + +set(ENABLE_OPENMP On CACHE BOOL "") + +set(RAJA_RANGE_ALIGN 4 CACHE INT "") +set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") +set(RAJA_DATA_ALIGN 64 CACHE INT "") +set(RAJA_COHERENCE_BLOCK_SIZE 64 CACHE INT "") + +set(RAJA_HOST_CONFIG_LOADED On CACHE Bool "") diff --git a/host-configs/chaos/clang_3_8_0.cmake b/host-configs/chaos/clang_3_8_0.cmake index 3003697f72..f26830b7d1 100755 --- a/host-configs/chaos/clang_3_8_0.cmake +++ b/host-configs/chaos/clang_3_8_0.cmake @@ -19,7 +19,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/chaos/clang_3_9_0.cmake b/host-configs/chaos/clang_3_9_0.cmake index 0bc96506f5..f8e0b96987 100755 --- a/host-configs/chaos/clang_3_9_0.cmake +++ b/host-configs/chaos/clang_3_9_0.cmake @@ -19,7 +19,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/chaos/clang_cuda.cmake b/host-configs/chaos/clang_cuda.cmake index 1565d21e26..694a22a2e9 100755 --- a/host-configs/chaos/clang_cuda.cmake +++ b/host-configs/chaos/clang_cuda.cmake @@ -19,9 +19,9 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") -set(RAJA_ENABLE_CUDA On CACHE BOOL "") -set(RAJA_ENABLE_CLANG_CUDA On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_CUDA On CACHE BOOL "") +set(ENABLE_CLANG_CUDA On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/chaos/gcc.cmake b/host-configs/chaos/gcc.cmake index 8ceb9fbff7..29373faa96 120000 --- a/host-configs/chaos/gcc.cmake +++ b/host-configs/chaos/gcc.cmake @@ -1 +1 @@ -gnu_4_9_3.cmake \ No newline at end of file +gcc_4_9_3.cmake \ No newline at end of file diff --git a/host-configs/chaos/gcc_4_9_3.cmake b/host-configs/chaos/gcc_4_9_3.cmake index 9a3b00ff70..d48380ea37 100755 --- a/host-configs/chaos/gcc_4_9_3.cmake +++ b/host-configs/chaos/gcc_4_9_3.cmake @@ -18,7 +18,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -mavx -finline-fu set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -Ofast -mavx -finline-functions -finline-limit=20000" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/chaos/icpc.cmake b/host-configs/chaos/icpc.cmake index 0fae613ef7..2ca174a28a 120000 --- a/host-configs/chaos/icpc.cmake +++ b/host-configs/chaos/icpc.cmake @@ -1 +1 @@ -icpc_16_0_109.cmake \ No newline at end of file +icpc_16_0_258.cmake \ No newline at end of file diff --git a/host-configs/chaos/icpc_16_0_258.cmake b/host-configs/chaos/icpc_16_0_258.cmake index 84657a06d4..c525739a8b 100755 --- a/host-configs/chaos/icpc_16_0_258.cmake +++ b/host-configs/chaos/icpc_16_0_258.cmake @@ -20,7 +20,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_FLAGS} -O3 -mar set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COMMON_FLAGS} -O3 -march=native -ansi-alias" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_FLAGS} -O0 -g" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/chaos/icpc_17_0_174.cmake b/host-configs/chaos/icpc_17_0_174.cmake index f71825e78e..2013619035 100755 --- a/host-configs/chaos/icpc_17_0_174.cmake +++ b/host-configs/chaos/icpc_17_0_174.cmake @@ -20,7 +20,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_FLAGS} -O3 -mar set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COMMON_FLAGS} -O3 -march=native -ansi-alias" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_FLAGS} -O0 -g" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/chaos/nvcc.cmake b/host-configs/chaos/nvcc.cmake index c4b49cb1d2..a44ec9164a 100755 --- a/host-configs/chaos/nvcc.cmake +++ b/host-configs/chaos/nvcc.cmake @@ -22,14 +22,16 @@ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -Wall -Werror -Wextra" C if(CMAKE_BUILD_TYPE MATCHES Release) set(RAJA_NVCC_FLAGS -O2; -restrict; -arch compute_35; -std c++11; --expt-extended-lambda; -ccbin; ${CMAKE_CXX_COMPILER} CACHE LIST "") elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo) - set(RAJA_NVCC_FLAGS -g; -G; -O2; -restrict; -arch compute_35; -std c++11; --expt-extended-lambda; -ccbin ${CMAKE_CXX_COMPILER} CACHE LIST "") + set(RAJA_NVCC_FLAGS -g; -lineinfo; -O2; -restrict; -arch compute_35; -std c++11; --expt-extended-lambda; -ccbin ${CMAKE_CXX_COMPILER} CACHE LIST "") elseif(CMAKE_BUILD_TYPE MATCHES Debug) set(RAJA_NVCC_FLAGS -g; -G; -O0; -restrict; -arch compute_35; -std c++11; --expt-extended-lambda; -ccbin ${CMAKE_CXX_COMPILER} CACHE LIST "") +else() + set(RAJA_NVCC_FLAGS -restrict; -arch compute_35; -std c++11; --expt-extended-lambda; -ccbin ${CMAKE_CXX_COMPILER} CACHE LIST "") endif() -set(RAJA_ENABLE_CUDA On CACHE BOOL "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_CUDA On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/chaos_5_x86_64_ib b/host-configs/chaos_5_x86_64_ib index f022ceb675..d4cd79969a 120000 --- a/host-configs/chaos_5_x86_64_ib +++ b/host-configs/chaos_5_x86_64_ib @@ -1 +1 @@ -chaos/intel.cmake \ No newline at end of file +chaos/icpc.cmake \ No newline at end of file diff --git a/host-configs/linux/clang.cmake b/host-configs/linux/clang.cmake index 6a9d5a1c7e..6f417e9a67 100755 --- a/host-configs/linux/clang.cmake +++ b/host-configs/linux/clang.cmake @@ -18,7 +18,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/linux/gcc.cmake b/host-configs/linux/gcc.cmake index 080aebbcd5..913e4a04fe 100755 --- a/host-configs/linux/gcc.cmake +++ b/host-configs/linux/gcc.cmake @@ -18,7 +18,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -mavx -finline-fu set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -Ofast -mavx -finline-functions -finline-limit=20000" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -fpermissive" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/host-configs/linux/icpc.cmake b/host-configs/linux/icpc.cmake index 1f5af6f998..d1c9646165 100755 --- a/host-configs/linux/icpc.cmake +++ b/host-configs/linux/icpc.cmake @@ -18,7 +18,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -mavx -inline-max-to set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 -mavx -inline-max-total-size=20000 -inline-forceinline -ansi-alias" CACHE STRING "") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0" CACHE STRING "") -set(RAJA_ENABLE_OPENMP On CACHE BOOL "") +set(ENABLE_OPENMP On CACHE BOOL "") set(RAJA_RANGE_ALIGN 4 CACHE INT "") set(RAJA_RANGE_MIN_LENGTH 32 CACHE INT "") diff --git a/include/RAJA/RAJA.hpp b/include/RAJA/RAJA.hpp index 19b9e2ad0f..93a2d22617 100644 --- a/include/RAJA/RAJA.hpp +++ b/include/RAJA/RAJA.hpp @@ -16,11 +16,8 @@ ****************************************************************************** */ -#ifndef RAJA_HPP -#define RAJA_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -30,37 +27,13 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_HPP +#define RAJA_HPP + #include "RAJA/config.hpp" #include "RAJA/util/defines.hpp" @@ -69,6 +42,8 @@ #include "RAJA/util/Operators.hpp" +#include "RAJA/util/basic_mempool.hpp" + // // All platforms must support sequential execution. // @@ -99,7 +74,7 @@ #include "RAJA/index/IndexSet.hpp" // -// Strongly typed index class. +// Strongly typed index class // #include "RAJA/index/IndexValue.hpp" @@ -113,7 +88,7 @@ // -// Multidimensional layouts and views. +// Multidimensional layouts and views // #include "RAJA/util/Layout.hpp" #include "RAJA/util/OffsetLayout.hpp" @@ -131,8 +106,13 @@ // Generic iteration templates for perfectly nested loops // #include "RAJA/pattern/forallN.hpp" +#include "RAJA/pattern/nested.hpp" + +// +// Reduction objects +// #include "RAJA/pattern/reduce.hpp" diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 455869cb4f..311e621d06 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -21,11 +21,8 @@ ****************************************************************************** */ -#ifndef RAJA_config_HPP -#define RAJA_config_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -35,39 +32,15 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_config_HPP +#define RAJA_config_HPP + #cmakedefine RAJA_USE_STL -#cmakedefine RAJA_ENABLE_FT +#cmakedefine ENABLE_FT #define @RAJA_FP@ #define @RAJA_PTR@ @@ -96,12 +69,12 @@ /* * Detect the host C++ compiler we are using. */ -#if defined(__clang__) -#define RAJA_COMPILER_CLANG -#elif defined(__INTEL_COMPILER) +#if defined(__INTEL_COMPILER) #define RAJA_COMPILER_INTEL #elif defined(__xlc__) #define RAJA_COMPILER_XLC +#elif defined(__clang__) +#define RAJA_COMPILER_CLANG #elif defined(__PGI) #define RAJA_COMPILER_PGI #elif defined(_WIN32) @@ -193,7 +166,7 @@ const int COHERENCE_BLOCK_SIZE = @RAJA_COHERENCE_BLOCK_SIZE@; #define RAJA_INLINE inline __attribute__((always_inline)) -#if defined(RAJA_ENABLE_CUDA) +#if defined(ENABLE_CUDA) #define RAJA_ALIGN_DATA(d) #else #define RAJA_ALIGN_DATA(d) __assume_aligned(d, DATA_ALIGN) @@ -215,7 +188,7 @@ const int COHERENCE_BLOCK_SIZE = @RAJA_COHERENCE_BLOCK_SIZE@; #define RAJA_INLINE inline __attribute__((always_inline)) -#if defined(RAJA_ENABLE_CUDA) +#if defined(ENABLE_CUDA) #define RAJA_ALIGN_DATA(d) #else #define RAJA_ALIGN_DATA(d) __builtin_assume_aligned(d, DATA_ALIGN) @@ -261,7 +234,7 @@ const int COHERENCE_BLOCK_SIZE = @RAJA_COHERENCE_BLOCK_SIZE@; #define RAJA_INLINE inline __attribute__((always_inline)) -#if defined(RAJA_ENABLE_CUDA) +#if defined(ENABLE_CUDA) #define RAJA_ALIGN_DATA(d) #else #define RAJA_ALIGN_DATA(d) __builtin_assume_aligned(d, DATA_ALIGN) diff --git a/include/RAJA/index/IndexSet.hpp b/include/RAJA/index/IndexSet.hpp index 735aff023a..a68185c29f 100644 --- a/include/RAJA/index/IndexSet.hpp +++ b/include/RAJA/index/IndexSet.hpp @@ -8,11 +8,8 @@ ****************************************************************************** */ -#ifndef RAJA_IndexSet_HPP -#define RAJA_IndexSet_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -22,37 +19,13 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_IndexSet_HPP +#define RAJA_IndexSet_HPP + #include "RAJA/config.hpp" #include "RAJA/index/ListSegment.hpp" @@ -72,6 +45,13 @@ namespace RAJA enum PushEnd { PUSH_FRONT, PUSH_BACK }; enum PushCopy { PUSH_COPY, PUSH_NOCOPY }; +template +class StaticIndexSet; + +namespace policy +{ +namespace indexset +{ /// /// Class representing index set execution policy. @@ -88,8 +68,10 @@ struct ExecPolicy typedef SEG_EXEC_POLICY_T seg_exec; }; -template -class StaticIndexSet; +} // end namespace indexset +} // end namespace policy + +using policy::indexset::ExecPolicy; /*! @@ -107,7 +89,6 @@ class StaticIndexSet : public StaticIndexSet const int T0_TypeId = sizeof...(TREST); public: - //! Construct empty index set RAJA_INLINE constexpr StaticIndexSet() : PARENT() {} @@ -220,9 +201,7 @@ class StaticIndexSet : public StaticIndexSet //! Returns the number of types this IndexSet can store. RAJA_INLINE - constexpr size_t getNumTypes() const { - return 1 + PARENT::getNumTypes(); - } + constexpr size_t getNumTypes() const { return 1 + PARENT::getNumTypes(); } /* * IMPORTANT: Some methods to add a segment to an index set @@ -255,20 +234,20 @@ class StaticIndexSet : public StaticIndexSet Index_type num = getNumSegments(); RangeStrideSegment Iter = (pend == PUSH_BACK) - ? RangeStrideSegment(0, num, 1) - : RangeStrideSegment(num - 1, -1, -1); + ? RangeStrideSegment(0, num, 1) + : RangeStrideSegment(num - 1, -1, -1); for (Index_type i : Iter) segment_push_into(i, c, pend, pcopy); } - static constexpr int value_for(PushEnd end, PushCopy copy) { + static constexpr int value_for(PushEnd end, PushCopy copy) + { return (end == PUSH_BACK) << 1 | (copy == PUSH_COPY); } public: - template RAJA_INLINE void segment_push_into(size_t segid, StaticIndexSet &c, @@ -281,18 +260,18 @@ class StaticIndexSet : public StaticIndexSet } Index_type offset = getSegmentOffsets()[segid]; switch (value_for(pend, pcopy)) { - case value_for(PUSH_BACK, PUSH_COPY): - c.push_back(*data[offset]); - break; - case value_for(PUSH_BACK, PUSH_NOCOPY): - c.push_back_nocopy(data[offset]); - break; - case value_for(PUSH_FRONT, PUSH_COPY): - c.push_front(*data[offset]); - break; - case value_for(PUSH_FRONT, PUSH_NOCOPY): - c.push_front_nocopy(data[offset]); - break; + case value_for(PUSH_BACK, PUSH_COPY): + c.push_back(*data[offset]); + break; + case value_for(PUSH_BACK, PUSH_NOCOPY): + c.push_back_nocopy(data[offset]); + break; + case value_for(PUSH_FRONT, PUSH_COPY): + c.push_front(*data[offset]); + break; + case value_for(PUSH_FRONT, PUSH_NOCOPY): + c.push_front_nocopy(data[offset]); + break; } } @@ -355,7 +334,9 @@ class StaticIndexSet : public StaticIndexSet RAJA_INLINE void segmentCall(size_t segid, BODY body, ARGS... args) const { if (getSegmentTypes()[segid] != T0_TypeId) { - PARENT::segmentCall(segid, std::forward(body), std::forward(args)...); + PARENT::segmentCall(segid, + std::forward(body), + std::forward(args)...); return; } Index_type offset = getSegmentOffsets()[segid]; @@ -363,7 +344,6 @@ class StaticIndexSet : public StaticIndexSet } protected: - //! Internal logic to add a new segment -- catch invalid type insertion template RAJA_INLINE void push_internal(Tnew *val, @@ -412,38 +392,28 @@ class StaticIndexSet : public StaticIndexSet } //! Returns the number of indices (the total icount of segments - RAJA_INLINE Index_type &getTotalLength() { - return PARENT::getTotalLength(); - } + RAJA_INLINE Index_type &getTotalLength() { return PARENT::getTotalLength(); } //! set total length of the indexset - RAJA_INLINE void setTotalLength(int n) { - return PARENT::setTotalLength(n); - } + RAJA_INLINE void setTotalLength(int n) { return PARENT::setTotalLength(n); } //! increase the total stored size of the indexset - RAJA_INLINE void increaseTotalLength(int n) { + RAJA_INLINE void increaseTotalLength(int n) + { return PARENT::increaseTotalLength(n); } public: - using iterator = Iterators::numeric_iterator; //! Get an iterator to the end. - iterator end() const { - return iterator(getNumSegments()); - } + iterator end() const { return iterator(getNumSegments()); } //! Get an iterator to the beginning. - iterator begin() const { - return iterator(0); - } + iterator begin() const { return iterator(0); } //! Return the number of elements in the range. - Index_type size() const { - return getNumSegments(); - } + Index_type size() const { return getNumSegments(); } //! @name IndexSet segment subsetting methods (slices ranges) /// @@ -500,9 +470,11 @@ class StaticIndexSet : public StaticIndexSet { StaticIndexSet *retVal = new StaticIndexSet(); int numSeg = getNumSegments(); - for (auto & seg : segIds) - if (seg >= 0 && seg < numSeg) + for (auto &seg : segIds) { + if (seg >= 0 && seg < numSeg) { segment_push_into(seg, *retVal, PUSH_BACK, PUSH_NOCOPY); + } + } return retVal; } @@ -526,7 +498,6 @@ class StaticIndexSet : public StaticIndexSet } protected: - //! Returns the mapping of segment_index -> segment_type RAJA_INLINE RAJA::RAJAVec &getSegmentTypes() { @@ -564,7 +535,6 @@ class StaticIndexSet : public StaticIndexSet } public: - /// /// Equality operator returns true if all segments are equal; else false. /// @@ -575,8 +545,7 @@ class StaticIndexSet : public StaticIndexSet RAJA_INLINE bool operator==(const StaticIndexSet &other) const { size_t num_seg = getNumSegments(); - if (num_seg != other.getNumSegments()) - return false; + if (num_seg != other.getNumSegments()) return false; for (size_t segid = 0; segid < num_seg; ++segid) { if (!compareSegmentById(segid, other)) { @@ -594,7 +563,6 @@ class StaticIndexSet : public StaticIndexSet } private: - //! vector of IndexSet data objects of type T0 RAJA::RAJAVec data; @@ -613,7 +581,6 @@ template <> class StaticIndexSet<> { public: - //! create empty IndexSet RAJA_INLINE StaticIndexSet() : m_len(0) {} @@ -642,9 +609,7 @@ class StaticIndexSet<> } protected: - RAJA_INLINE static size_t getNumTypes() { - return 0; - } + RAJA_INLINE static size_t getNumTypes() { return 0; } template RAJA_INLINE constexpr bool isValidSegmentType(T const &) const @@ -653,22 +618,17 @@ class StaticIndexSet<> return false; } - RAJA_INLINE static int getNumSegments() - { - return 0; - } + RAJA_INLINE static int getNumSegments() { return 0; } - RAJA_INLINE static size_t getLength() - { - return 0; - } + RAJA_INLINE static size_t getLength() { return 0; } template RAJA_INLINE void segmentCall(size_t, BODY, ARGS...) const { } - RAJA_INLINE RAJA::RAJAVec &getSegmentTypes() { + RAJA_INLINE RAJA::RAJAVec &getSegmentTypes() + { return segment_types; } @@ -677,7 +637,8 @@ class StaticIndexSet<> return segment_types; } - RAJA_INLINE RAJA::RAJAVec &getSegmentOffsets() { + RAJA_INLINE RAJA::RAJAVec &getSegmentOffsets() + { return segment_offsets; } @@ -686,7 +647,8 @@ class StaticIndexSet<> return segment_offsets; } - RAJA_INLINE RAJA::RAJAVec &getSegmentIcounts() { + RAJA_INLINE RAJA::RAJAVec &getSegmentIcounts() + { return segment_icounts; } @@ -695,17 +657,11 @@ class StaticIndexSet<> return segment_icounts; } - RAJA_INLINE Index_type &getTotalLength() { - return m_len; - } + RAJA_INLINE Index_type &getTotalLength() { return m_len; } - RAJA_INLINE void setTotalLength(int n) { - m_len = n; - } + RAJA_INLINE void setTotalLength(int n) { m_len = n; } - RAJA_INLINE void increaseTotalLength(int n) { - m_len += n; - } + RAJA_INLINE void increaseTotalLength(int n) { m_len += n; } template RAJA_INLINE bool compareSegmentById( @@ -724,13 +680,13 @@ class StaticIndexSet<> template RAJA_INLINE P0 &getSegment(size_t) { - return *((P0*)(this - this)); + return *((P0 *)(this - this)); } template RAJA_INLINE P0 const &getSegment(size_t) const { - return *((P0*)(this - this)); + return *((P0 *)(this - this)); } template @@ -752,31 +708,26 @@ class StaticIndexSet<> } public: - using iterator = Iterators::numeric_iterator; - RAJA_INLINE int getStartingIcount(int segid) { + RAJA_INLINE int getStartingIcount(int segid) + { return segment_icounts[segid]; } - RAJA_INLINE int getStartingIcount(int segid) const { + RAJA_INLINE int getStartingIcount(int segid) const + { return segment_icounts[segid]; } //! Get an iterator to the end. - iterator end() const { - return iterator(getNumSegments()); - } + iterator end() const { return iterator(getNumSegments()); } //! Get an iterator to the beginning. - iterator begin() const { - return iterator(0); - } + iterator begin() const { return iterator(0); } //! Return the number of elements in the range. - Index_type size() const { - return getNumSegments(); - } + Index_type size() const { return getNumSegments(); } private: //! Vector of segment types: seg_index -> seg_type diff --git a/include/RAJA/index/IndexSetBuilders.hpp b/include/RAJA/index/IndexSetBuilders.hpp index 841ebcfb4b..5ca8001b7b 100644 --- a/include/RAJA/index/IndexSetBuilders.hpp +++ b/include/RAJA/index/IndexSetBuilders.hpp @@ -8,11 +8,8 @@ ****************************************************************************** */ -#ifndef RAJA_IndexSetBuilders_HPP -#define RAJA_IndexSetBuilders_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -22,46 +19,21 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_IndexSetBuilders_HPP +#define RAJA_IndexSetBuilders_HPP + #include "RAJA/config.hpp" #include "RAJA/util/types.hpp" +#include "RAJA/index/IndexSet.hpp" namespace RAJA { -class IndexSet; - /*! ****************************************************************************** * @@ -126,12 +98,12 @@ void buildLockFreeBlockIndexset(IndexSet& iset, ****************************************************************************** */ void buildLockFreeColorIndexset(IndexSet& iset, - int const* domainToRange, + Index_type const* domainToRange, int numEntity, int numRangePerDomain, int numEntityRange, - int* elemPermutation = 0l, - int* ielemPermutation = 0l); + Index_type* elemPermutation = 0l, + Index_type* ielemPermutation = 0l); } // closing brace for RAJA namespace diff --git a/include/RAJA/index/IndexSetUtils.hpp b/include/RAJA/index/IndexSetUtils.hpp index df59d4a8f0..828cb88188 100644 --- a/include/RAJA/index/IndexSetUtils.hpp +++ b/include/RAJA/index/IndexSetUtils.hpp @@ -9,11 +9,8 @@ ****************************************************************************** */ -#ifndef RAJA_IndexSetUtils_HPP -#define RAJA_IndexSetUtils_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -23,37 +20,13 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_IndexSetUtils_HPP +#define RAJA_IndexSetUtils_HPP + #include "RAJA/config.hpp" #include "RAJA/pattern/forall.hpp" #include "RAJA/policy/sequential.hpp" diff --git a/include/RAJA/index/IndexValue.hpp b/include/RAJA/index/IndexValue.hpp index 86e86110ad..f7de53cffc 100644 --- a/include/RAJA/index/IndexValue.hpp +++ b/include/RAJA/index/IndexValue.hpp @@ -8,11 +8,8 @@ ****************************************************************************** */ -#ifndef RAJA_INDEXVALUE_HPP -#define RAJA_INDEXVALUE_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -22,37 +19,13 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_INDEXVALUE_HPP +#define RAJA_INDEXVALUE_HPP + #include "RAJA/config.hpp" #include "RAJA/util/defines.hpp" diff --git a/include/RAJA/index/ListSegment.hpp b/include/RAJA/index/ListSegment.hpp index 8262c5a23b..1e72027301 100644 --- a/include/RAJA/index/ListSegment.hpp +++ b/include/RAJA/index/ListSegment.hpp @@ -8,11 +8,8 @@ ****************************************************************************** */ -#ifndef RAJA_ListSegment_HPP -#define RAJA_ListSegment_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -22,37 +19,13 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_ListSegment_HPP +#define RAJA_ListSegment_HPP + #include "RAJA/config.hpp" #include "RAJA/util/defines.hpp" #include "RAJA/util/types.hpp" @@ -128,12 +101,14 @@ class TypedListSegment m_data = new T[m_size]; } +#ifdef RAJA_ENABLE_CUDA //! copy data from container using BlockCopy template void copy(Container&& src, BlockCopy) { cudaErrchk(cudaMemcpy(m_data, &(*src.begin()), m_size * sizeof(T), cudaMemcpyDefault)); } +#endif //! copy data from container using TrivialCopy template @@ -237,10 +212,9 @@ class TypedListSegment /// RAJA_HOST_DEVICE void swap(TypedListSegment& other) { - using std::swap; - swap(m_data, other.m_data); - swap(m_size, other.m_size); - swap(m_owned, other.m_owned); + camp::safe_swap(m_data, other.m_data); + camp::safe_swap(m_size, other.m_size); + camp::safe_swap(m_owned, other.m_owned); } //! accessor to get the end iterator for a TypedListSegment diff --git a/include/RAJA/index/RangeSegment.hpp b/include/RAJA/index/RangeSegment.hpp index 318c917131..0fd220bfe4 100644 --- a/include/RAJA/index/RangeSegment.hpp +++ b/include/RAJA/index/RangeSegment.hpp @@ -8,11 +8,8 @@ ****************************************************************************** */ -#ifndef RAJA_RangeSegment_HPP -#define RAJA_RangeSegment_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -22,43 +19,21 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_RangeSegment_HPP +#define RAJA_RangeSegment_HPP + #include "RAJA/config.hpp" #include "RAJA/internal/Iterators.hpp" #include "RAJA/util/concepts.hpp" +#include + namespace RAJA { @@ -138,6 +113,15 @@ struct TypedRangeSegment { { } + //! copy assignment + RAJA_HOST_DEVICE TypedRangeSegment& operator=(TypedRangeSegment const& o) + { + m_begin = o.m_begin; + m_end = o.m_end; + m_size = o.m_size; + return *this; + } + //! destructor RAJA_HOST_DEVICE ~TypedRangeSegment() {} @@ -147,10 +131,9 @@ struct TypedRangeSegment { */ RAJA_HOST_DEVICE void swap(TypedRangeSegment& other) { - using std::swap; - swap(m_begin, other.m_begin); - swap(m_end, other.m_end); - swap(m_size, other.m_size); + camp::safe_swap(m_begin, other.m_begin); + camp::safe_swap(m_end, other.m_end); + camp::safe_swap(m_size, other.m_size); } //! obtain an iterator to the beginning of this TypedRangeSegment @@ -171,6 +154,19 @@ struct TypedRangeSegment { */ RAJA_HOST_DEVICE StorageT size() const { return m_size; } + //! Create a slice of this instance as a new instance + /*! + * \return A new instance spanning *begin() + begin to *begin() + begin + + * length + */ + RAJA_HOST_DEVICE TypedRangeSegment slice(Index_type begin, + Index_type length) const + { + auto start = m_begin[0] + begin; + auto end = start + length > m_end[0] ? m_end[0] : start + length; + return TypedRangeSegment{start, end}; + } + //! equality comparison /*! * \return true if and only if the begin, end, and size match @@ -278,7 +274,7 @@ struct TypedRangeStrideSegment { m_end(iterator(end, stride)), // essentially a ceil((end-begin)/stride) but using integer math, // and allowing for negative strides - m_size((end - begin + stride - ( stride > 0 ? 1 : -1 ) ) / stride) + m_size((end - begin + stride - (stride > 0 ? 1 : -1)) / stride) { // if m_size was initialized as negative, that indicates a zero iteration // space @@ -311,10 +307,9 @@ struct TypedRangeStrideSegment { */ RAJA_HOST_DEVICE void swap(TypedRangeStrideSegment& other) { - using std::swap; - swap(m_begin, other.m_begin); - swap(m_end, other.m_end); - swap(m_size, other.m_size); + camp::safe_swap(m_begin, other.m_begin); + camp::safe_swap(m_end, other.m_end); + camp::safe_swap(m_size, other.m_size); } //! obtain an iterator to the beginning of this TypedRangeStrideSegment @@ -338,6 +333,19 @@ struct TypedRangeStrideSegment { */ RAJA_HOST_DEVICE StorageT size() const { return m_size; } + //! Create a slice of this instance as a new instance + /*! + * \return A new instance spanning *begin() + begin * stride to *begin() + + * (begin + length) * stride + */ + RAJA_HOST_DEVICE TypedRangeStrideSegment slice(Index_type begin, + Index_type length) const + { + return TypedRangeStrideSegment{*(this->begin() + begin), + *(this->begin() + begin + length), + m_begin.stride}; + } + //! equality comparison /*! * \return true if and only if the begin, end, and size match @@ -396,8 +404,8 @@ using common_type_t = typename common_type::type; template > -RAJA_HOST_DEVICE -TypedRangeSegment make_range(BeginT&& begin, EndT&& end) +RAJA_HOST_DEVICE TypedRangeSegment make_range(BeginT&& begin, + EndT&& end) { return {begin, end}; } @@ -416,10 +424,10 @@ template > -RAJA_HOST_DEVICE -TypedRangeStrideSegment make_strided_range(BeginT&& begin, - EndT&& end, - StrideT&& stride) +RAJA_HOST_DEVICE TypedRangeStrideSegment make_strided_range( + BeginT&& begin, + EndT&& end, + StrideT&& stride) { return {begin, end, stride}; } @@ -429,12 +437,12 @@ namespace concepts template struct RangeConstructible - : DefineConcept(val>()) { + : DefineConcept(camp::val>()) { }; template struct RangeStrideConstructible - : DefineConcept(val>()) { + : DefineConcept(camp::val>()) { }; } // closing brace for concepts namespace @@ -457,16 +465,16 @@ namespace std //! specialization of swap for TypedRangeSegment template -RAJA_INLINE void swap(RAJA::TypedRangeSegment& a, - RAJA::TypedRangeSegment& b) +RAJA_HOST_DEVICE RAJA_INLINE void swap(RAJA::TypedRangeSegment& a, + RAJA::TypedRangeSegment& b) { a.swap(b); } //! specialization of swap for TypedRangeStrideSegment template -RAJA_INLINE void swap(RAJA::TypedRangeStrideSegment& a, - RAJA::TypedRangeStrideSegment& b) +RAJA_HOST_DEVICE RAJA_INLINE void swap(RAJA::TypedRangeStrideSegment& a, + RAJA::TypedRangeStrideSegment& b) { a.swap(b); } diff --git a/include/RAJA/internal/DepGraphNode.hpp b/include/RAJA/internal/DepGraphNode.hpp index 7398d97a38..bcd04b13d5 100644 --- a/include/RAJA/internal/DepGraphNode.hpp +++ b/include/RAJA/internal/DepGraphNode.hpp @@ -9,11 +9,8 @@ ****************************************************************************** */ -#ifndef RAJA_DepGraphNode_HPP -#define RAJA_DepGraphNode_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -23,37 +20,13 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_DepGraphNode_HPP +#define RAJA_DepGraphNode_HPP + #include "RAJA/config.hpp" #include "RAJA/util/types.hpp" diff --git a/include/RAJA/internal/ForallNPolicy.hpp b/include/RAJA/internal/ForallNPolicy.hpp index b2d734b4ee..570203fb59 100644 --- a/include/RAJA/internal/ForallNPolicy.hpp +++ b/include/RAJA/internal/ForallNPolicy.hpp @@ -8,11 +8,8 @@ ****************************************************************************** */ -#ifndef RAJA_internal_ForallNPolicy_HPP -#define RAJA_internal_ForallNPolicy_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -22,37 +19,13 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution ind use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_internal_ForallNPolicy_HPP +#define RAJA_internal_ForallNPolicy_HPP + #include "RAJA/config.hpp" namespace RAJA diff --git a/include/RAJA/internal/IndexArray.hpp b/include/RAJA/internal/IndexArray.hpp deleted file mode 100644 index b6ed89f2f3..0000000000 --- a/include/RAJA/internal/IndexArray.hpp +++ /dev/null @@ -1,226 +0,0 @@ -/*! - ****************************************************************************** - * - * \file - * - * \brief Header file for array indexing helpers. - * - ****************************************************************************** - */ - - -#ifndef RAJA_DETAIL_INDEXARRAY_HPP -#define RAJA_DETAIL_INDEXARRAY_HPP - -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. -// -// Produced at the Lawrence Livermore National Laboratory -// -// LLNL-CODE-689114 -// -// All rights reserved. -// -// This file is part of RAJA. -// -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - - -#include -#include - -namespace RAJA -{ -namespace detail -{ -template -struct index_storage { - RAJA_HOST_DEVICE - RAJA_INLINE - Type& get() { return data; } - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr const Type& get() const { return data; } - Type data; -}; - -template -RAJA_HOST_DEVICE RAJA_INLINE constexpr auto get_data(StorageType& s) - -> decltype(s.get()) -{ - return s.get(); -} -template -RAJA_HOST_DEVICE RAJA_INLINE constexpr auto get_data(const StorageType& s) - -> decltype(s.get()) -{ - return s.get(); -} - -template -struct select_element { - using AType = typename std::remove_reference::type; - using return_type = typename AType::type&; - using const_return_type = const typename AType::type&; - using value_type = typename AType::type; - - RAJA_HOST_DEVICE - RAJA_INLINE - static constexpr return_type get(AType_in& a, size_t offset) - { - return (offset == I) ? get_data>(a) - : select_element::get(a, offset); - } - RAJA_HOST_DEVICE - RAJA_INLINE - static constexpr const_return_type get(const AType_in& a, size_t offset) - { - return (offset == I) ? get_data>(a) - : select_element::get(a, offset); - } -}; - -template -struct select_element<0, AType_in> { - using AType = typename std::remove_reference::type; - using return_type = typename AType::type&; - using const_return_type = const typename AType::type&; - using value_type = typename AType::type; - - RAJA_HOST_DEVICE - RAJA_INLINE - static constexpr return_type get(AType_in& a, size_t offset) - { - return get_data>(a); - } - - RAJA_HOST_DEVICE - RAJA_INLINE - static constexpr const_return_type get(const AType_in& a, size_t offset) - { - return get_data>(a); - } -}; - -template -struct index_array_helper; - -template -struct index_array_helper> - : index_storage... { - using type = Type; - using my_type = index_array_helper>; - static constexpr size_t size = sizeof...(orest); - - RAJA_HOST_DEVICE - RAJA_INLINE - // constexpr : c++14 only - Type& operator[](size_t offset) - { - return select_element::get(*this, offset); - } - - RAJA_HOST_DEVICE - RAJA_INLINE - constexpr const Type& operator[](size_t offset) const - { - return select_element::get(*this, offset); - } -}; - -template -constexpr size_t - index_array_helper>::size; -} - -template -struct index_array - : public detail::index_array_helper> { - static_assert(Size > 0, "index_arrays must have at least one element"); - using base = - detail::index_array_helper>; - using base::index_array_helper; - using base::operator[]; -}; - -template -RAJA_HOST_DEVICE RAJA_INLINE Type& get(detail::index_storage& s) -{ - return s.data; -} - -template -RAJA_HOST_DEVICE RAJA_INLINE const Type& get( - const detail::index_storage& s) -{ - return s.data; -} - -namespace detail -{ -template -RAJA_HOST_DEVICE RAJA_INLINE auto make_index_array_helper( - VarOps::index_sequence, - Args... args) -> index_array -{ - index_array arr{}; - VarOps::ignore_args((get(arr) = args)...); - return arr; -}; -} - -template -RAJA_HOST_DEVICE RAJA_INLINE auto make_index_array(Arg1 arg1, Args... args) - -> index_array -{ - return detail::make_index_array_helper( - VarOps::make_index_sequence(), arg1, args...); -}; - -template -std::ostream& operator<<(std::ostream& os, index_array const& a) -{ - // const detail::index_array_helper> & - // ah = a; - // os << "array templated iteration: " << ah << std::endl; - // os << "array runtime operator iteration: "; - os << '['; - for (size_t i = 0; i < Size - 1; ++i) - os << a[i] << ", "; - if (Size - 1 > 0) os << a[Size - 1]; - os << ']'; - return os; -} -} - -#endif /* RAJA_DETAIL_INDEXARRAY_HPP */ diff --git a/include/RAJA/internal/Iterators.hpp b/include/RAJA/internal/Iterators.hpp index 807056fa1f..eccc3fbe2c 100644 --- a/include/RAJA/internal/Iterators.hpp +++ b/include/RAJA/internal/Iterators.hpp @@ -9,11 +9,8 @@ */ -#ifndef RAJA_ITERATORS_HPP -#define RAJA_ITERATORS_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -23,37 +20,13 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_ITERATORS_HPP +#define RAJA_ITERATORS_HPP + #include "RAJA/config.hpp" #include "RAJA/util/defines.hpp" #include "RAJA/util/types.hpp" @@ -248,6 +221,7 @@ class strided_numeric_iterator : public base_iterator : base(rhs), stride(stride) { } + RAJA_HOST_DEVICE constexpr strided_numeric_iterator( const strided_numeric_iterator& rhs) : base(rhs.val), stride(rhs.stride) diff --git a/include/RAJA/internal/LegacyCompatibility.hpp b/include/RAJA/internal/LegacyCompatibility.hpp index d36612653d..168be19fe3 100644 --- a/include/RAJA/internal/LegacyCompatibility.hpp +++ b/include/RAJA/internal/LegacyCompatibility.hpp @@ -8,11 +8,8 @@ ****************************************************************************** */ -#ifndef RAJA_LEGACY_COMPATIBILITY_HPP -#define RAJA_LEGACY_COMPATIBILITY_HPP - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016, Lawrence Livermore National Security, LLC. +// Copyright (c) 2016-17, Lawrence Livermore National Security, LLC. // // Produced at the Lawrence Livermore National Laboratory // @@ -22,41 +19,19 @@ // // This file is part of RAJA. // -// For additional details, please also read RAJA/LICENSE. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the disclaimer below. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the name of the LLNS/LLNL nor the names of its contributors may -// be used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, -// LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY -// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +// For details about use and distribution, please read RAJA/LICENSE. // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifndef RAJA_LEGACY_COMPATIBILITY_HPP +#define RAJA_LEGACY_COMPATIBILITY_HPP + #include "RAJA/config.hpp" #include "RAJA/util/defines.hpp" +#include "camp/camp.hpp" + #if (!defined(__INTEL_COMPILER)) && (!defined(RAJA_COMPILER_MSVC)) static_assert(__cplusplus >= 201103L, "C++ standards below 2011 are not " @@ -104,30 +79,6 @@ namespace VarOps // Basics, using c++14 semantics in a c++11 compatible way, credit to libc++ // Forward -template -struct remove_reference { - typedef T type; -}; -template -struct remove_reference { - typedef T type; -}; -template -struct remove_reference { - typedef T type; -}; -template -RAJA_HOST_DEVICE RAJA_INLINE constexpr T&& forward( - typename remove_reference::type& t) noexcept -{ - return static_cast(t); -} -template -RAJA_HOST_DEVICE RAJA_INLINE constexpr T&& forward( - typename remove_reference::type&& t) noexcept -{ - return static_cast(t); -} // FoldL template @@ -149,12 +100,11 @@ template struct foldl_impl { - using Ret = - typename foldl_impl::type, - Arg3)>::type, - Rest...>::Ret; + using Ret = typename foldl_impl< + Op, + typename std::result_of::type, + Arg3)>::type, + Rest...>::Ret; }; template @@ -162,7 +112,7 @@ RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl( Op&& RAJA_UNUSED_ARG(operation), Arg1&& arg) -> typename foldl_impl::Ret { - return forward(arg); + return camp::forward(arg); } template @@ -171,7 +121,8 @@ RAJA_HOST_DEVICE RAJA_INLINE constexpr auto foldl(Op&& operation, Arg2&& arg2) -> typename foldl_impl::Ret { - return forward(operation)(forward(arg1), forward(arg2)); + return camp::forward(operation)(camp::forward(arg1), + camp::forward(arg2)); } template typename foldl_impl::Ret { - return foldl(forward(operation), - forward( - operation)(forward(operation)(forward(arg1), - forward(arg2)), - forward(arg3)), - forward(rest)...); + return foldl(camp::forward(operation), + camp::forward(operation)( + camp::forward(operation)(camp::forward(arg1), + camp::forward(arg2)), + camp::forward(arg3)), + camp::forward(rest)...); } struct adder { @@ -247,15 +198,6 @@ RAJA_HOST_DEVICE RAJA_INLINE constexpr Result max(Args... args) // : value() { } // }; -// Index sequence - -template -struct integer_sequence { - using type = integer_sequence; - static constexpr size_t size = sizeof...(Ints); - static constexpr std::array value{{Ints...}}; -}; - template