From 28031fcc36a7542702e8120d770398e46769dab8 Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Fri, 9 Dec 2022 15:09:47 -0500 Subject: [PATCH 01/12] create and swap in RAJA_HOST_MAX_ALIGN --- include/RAJA/config.hpp.in | 3 ++- include/RAJA/pattern/WorkGroup/WorkStruct.hpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 404d8beebf..5b3e1060ba 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -480,7 +480,8 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_INLINE inline __attribute__((always_inline)) #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable)) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N)) - +#define RAJA_HOST_MAX_ALIGN std::max_align_t +#define RAJA_DEVICE_MAX_ALIGN 8 // note that neither nvcc nor Apple Clang compiler currently doesn't support // the __builtin_assume_aligned attribute diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp index 6bdd56a3c3..a9b531bda0 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp @@ -45,7 +45,7 @@ struct WorkStruct; * sizeof(GenericWorkStruct) <= sizeof(WorkStruct) */ template < typename Dispatcher_T > -using GenericWorkStruct = WorkStruct; +using GenericWorkStruct = WorkStruct; template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs > struct WorkStruct> @@ -112,7 +112,7 @@ struct WorkStruct::type obj; + typename std::aligned_storage::type obj; }; } // namespace detail From 479d93ce400905f09465f36f7496f9df6a9f6096 Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Fri, 9 Dec 2022 15:57:53 -0500 Subject: [PATCH 02/12] remove std:max_align_t entirely --- include/RAJA/config.hpp.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 5b3e1060ba..c9a8b09df0 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -480,7 +480,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_INLINE inline __attribute__((always_inline)) #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable)) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N)) -#define RAJA_HOST_MAX_ALIGN std::max_align_t +#define RAJA_HOST_MAX_ALIGN 16 #define RAJA_DEVICE_MAX_ALIGN 8 // note that neither nvcc nor Apple Clang compiler currently doesn't support From f844352e97ca7807041df41bd01f8858ccdd7d58 Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Fri, 9 Dec 2022 16:06:46 -0500 Subject: [PATCH 03/12] fix syntax error --- include/RAJA/pattern/WorkGroup/WorkStruct.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp index a9b531bda0..aa2e0d1919 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp @@ -45,7 +45,7 @@ struct WorkStruct; * sizeof(GenericWorkStruct) <= sizeof(WorkStruct) */ template < typename Dispatcher_T > -using GenericWorkStruct = WorkStruct; +using GenericWorkStruct = WorkStruct; template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs > struct WorkStruct> @@ -112,7 +112,7 @@ struct WorkStruct::type obj; + typename std::aligned_storage::type obj; }; } // namespace detail From 65891a0c111eaade6e090c640144075036d85087 Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Fri, 9 Dec 2022 16:45:59 -0500 Subject: [PATCH 04/12] fix some compiler errors --- include/RAJA/config.hpp.in | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index c9a8b09df0..c879a6a911 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -387,6 +387,8 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_INLINE inline __attribute__((always_inline)) #endif +#define RAJA_HOST_MAX_ALIGN 16 + #define RAJA_UNROLL RAJA_PRAGMA(unroll) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N)) @@ -414,6 +416,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; // #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline __attribute__((always_inline)) +#define RAJA_HOST_MAX_ALIGN 16 #if !defined(__NVCC__) #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000) @@ -450,11 +453,14 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_INLINE inline __attribute__((always_inline)) #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) +#define RAJA_HOST_MAX_ALIGN 16 // FIXME: alignx is breaking CUDA+xlc #if defined(RAJA_ENABLE_CUDA) #define RAJA_ALIGN_DATA(d) d +#define RAJA_DEVICE_MAX_ALIGN 16 #else +#define RAJA_DEVICE_MAX_ALIGN 16 #define RAJA_ALIGN_DATA(d) __alignx(RAJA::DATA_ALIGN, d) #endif @@ -481,13 +487,14 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable)) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N)) #define RAJA_HOST_MAX_ALIGN 16 -#define RAJA_DEVICE_MAX_ALIGN 8 // note that neither nvcc nor Apple Clang compiler currently doesn't support // the __builtin_assume_aligned attribute #if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__) #define RAJA_ALIGN_DATA(d) d +#define RAJA_DEVICE_MAX_ALIGN 16 #else +#define RAJA_DEVICE_MAX_ALIGN 8 #define RAJA_ALIGN_DATA(d) __builtin_assume_aligned(d, RAJA::DATA_ALIGN) #endif @@ -524,9 +531,12 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) +#define RAJA_HOST_MAX_ALIGN 16 #else #pragma message("RAJA_COMPILER unknown, using default empty macros.") +#define RAJA_HOST_MAX_ALIGN 16 +#define RAJA_DEVICE_MAX_ALIGN 8 #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline #define RAJA_ALIGN_DATA(d) d From ce880377895970f2087f82c5513fad00676ab421 Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Fri, 9 Dec 2022 20:05:04 -0500 Subject: [PATCH 05/12] continue to clean up compiler errors --- include/RAJA/config.hpp.in | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index c879a6a911..3babda0d93 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -388,6 +388,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #endif #define RAJA_HOST_MAX_ALIGN 16 +#define RAJA_DEVICE_MAX_ALIGN 16 #define RAJA_UNROLL RAJA_PRAGMA(unroll) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N)) @@ -417,7 +418,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline __attribute__((always_inline)) #define RAJA_HOST_MAX_ALIGN 16 - +#if defined(RAJA_ENABLE_HIP) +#define RAJA_DEVICE_MAX_ALIGN 8 +#else +#define RAJA_HOST_MAX_ALIGN 16 +#endif #if !defined(__NVCC__) #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N) @@ -454,13 +459,12 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) #define RAJA_HOST_MAX_ALIGN 16 +#define RAJA_DEVICE_MAX_ALIGN 16 // FIXME: alignx is breaking CUDA+xlc #if defined(RAJA_ENABLE_CUDA) #define RAJA_ALIGN_DATA(d) d -#define RAJA_DEVICE_MAX_ALIGN 16 #else -#define RAJA_DEVICE_MAX_ALIGN 16 #define RAJA_ALIGN_DATA(d) __alignx(RAJA::DATA_ALIGN, d) #endif @@ -487,14 +491,16 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable)) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N)) #define RAJA_HOST_MAX_ALIGN 16 - +#if defined(RAJA_ENABLE_HIP) +#define RAJA_DEVICE_MAX_ALIGN 8 +#else +#define RAJA_HOST_MAX_ALIGN 16 +#endif // note that neither nvcc nor Apple Clang compiler currently doesn't support // the __builtin_assume_aligned attribute #if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__) #define RAJA_ALIGN_DATA(d) d -#define RAJA_DEVICE_MAX_ALIGN 16 #else -#define RAJA_DEVICE_MAX_ALIGN 8 #define RAJA_ALIGN_DATA(d) __builtin_assume_aligned(d, RAJA::DATA_ALIGN) #endif @@ -530,13 +536,13 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_NO_SIMD #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) - #define RAJA_HOST_MAX_ALIGN 16 +#define RAJA_DEVICE_MAX_ALIGN 16 #else #pragma message("RAJA_COMPILER unknown, using default empty macros.") #define RAJA_HOST_MAX_ALIGN 16 -#define RAJA_DEVICE_MAX_ALIGN 8 +#define RAJA_DEVICE_MAX_ALIGN 16 #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline #define RAJA_ALIGN_DATA(d) d From e35c488b652b7ee99865dde2727aaec9b4aa68d9 Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Fri, 9 Dec 2022 20:17:45 -0500 Subject: [PATCH 06/12] clean up MSVC config --- include/RAJA/config.hpp.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 3babda0d93..44ad5bf946 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -536,7 +536,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_NO_SIMD #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) -#define RAJA_HOST_MAX_ALIGN 16 +#define RAJA_HOST_MAX_ALIGN alignof(std::max_align_t) #define RAJA_DEVICE_MAX_ALIGN 16 #else From c61154d772089b002e237d22a80fe1c869e7c9b5 Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Mon, 12 Dec 2022 12:08:02 -0500 Subject: [PATCH 07/12] add a unified RAJA_MAX_ALIGN, add some comments documenting it reasoning, and add a static static_assert to validate it's use in WorkStruct --- include/RAJA/config.hpp.in | 35 ++++++++----------- include/RAJA/pattern/WorkGroup/WorkStruct.hpp | 7 ++-- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 44ad5bf946..d8a0a85745 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -239,6 +239,15 @@ static_assert(RAJA_HAS_SOME_CXX14, #define RAJA_PRAGMA(x) _Pragma(RAJA_STRINGIFY(x)) #endif + +/* NOTE: Below we define RAJA_MAX_ALIGN for each compiler, currently it is set as 16 bytes +for all cases. Previously this was set by alignof(std::max_align_t) which, in Clang, +is based on the sizeof(long double). This causes an in inconsistency as CUDA/HIP long doubles +are demoted to doubles causing alignof(std::max_align_t) to return 8 bytes on the device and +16 bytes on the host. We therefore set a standard size and ensure validity through a +static_assert in pattern/WorkGroup/WorkStruct.hpp. +*/ + namespace RAJA { #if defined(RAJA_ENABLE_OPENMP) && !defined(__HIP_DEVICE_COMPILE__) @@ -387,8 +396,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_INLINE inline __attribute__((always_inline)) #endif -#define RAJA_HOST_MAX_ALIGN 16 -#define RAJA_DEVICE_MAX_ALIGN 16 +#define RAJA_MAX_ALIGN 16 #define RAJA_UNROLL RAJA_PRAGMA(unroll) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N)) @@ -417,12 +425,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; // #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline __attribute__((always_inline)) -#define RAJA_HOST_MAX_ALIGN 16 -#if defined(RAJA_ENABLE_HIP) -#define RAJA_DEVICE_MAX_ALIGN 8 -#else -#define RAJA_HOST_MAX_ALIGN 16 -#endif +#define RAJA_HOST_ALIGN 16 #if !defined(__NVCC__) #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N) @@ -458,8 +461,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_INLINE inline __attribute__((always_inline)) #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) -#define RAJA_HOST_MAX_ALIGN 16 -#define RAJA_DEVICE_MAX_ALIGN 16 +#define RAJA_MAX_ALIGN 16 // FIXME: alignx is breaking CUDA+xlc #if defined(RAJA_ENABLE_CUDA) @@ -490,12 +492,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_INLINE inline __attribute__((always_inline)) #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable)) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N)) -#define RAJA_HOST_MAX_ALIGN 16 -#if defined(RAJA_ENABLE_HIP) -#define RAJA_DEVICE_MAX_ALIGN 8 -#else -#define RAJA_HOST_MAX_ALIGN 16 -#endif +#define RAJA_MAX_ALIGN 16 // note that neither nvcc nor Apple Clang compiler currently doesn't support // the __builtin_assume_aligned attribute #if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__) @@ -536,13 +533,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_NO_SIMD #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) -#define RAJA_HOST_MAX_ALIGN alignof(std::max_align_t) -#define RAJA_DEVICE_MAX_ALIGN 16 +#define RAJA_MAX_ALIGN 16 #else #pragma message("RAJA_COMPILER unknown, using default empty macros.") -#define RAJA_HOST_MAX_ALIGN 16 -#define RAJA_DEVICE_MAX_ALIGN 16 +#define RAJA_MAX_ALIGN 16 #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline #define RAJA_ALIGN_DATA(d) d diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp index aa2e0d1919..b553f2167e 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp @@ -45,7 +45,7 @@ struct WorkStruct; * sizeof(GenericWorkStruct) <= sizeof(WorkStruct) */ template < typename Dispatcher_T > -using GenericWorkStruct = WorkStruct; +using GenericWorkStruct = WorkStruct; template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs > struct WorkStruct> @@ -71,7 +71,8 @@ struct WorkStruct= alignof(std::max_align_t) && (16/alignof(std::max_align_t))*alignof(std::max_align_t) == 16, + "WorkStruct max align value must be 16 bytes"); true_value_type* value_ptr = static_cast(ptr); value_ptr->dispatcher = dispatcher; @@ -112,7 +113,7 @@ struct WorkStruct::type obj; + typename std::aligned_storage::type obj; }; } // namespace detail From 830697962a547870aca5a218769db7d690f19b86 Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Mon, 12 Dec 2022 12:14:47 -0500 Subject: [PATCH 08/12] fix typo --- include/RAJA/config.hpp.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index d8a0a85745..2c4109cc3d 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -425,7 +425,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; // #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline __attribute__((always_inline)) -#define RAJA_HOST_ALIGN 16 +#define RAJA_MAX_ALIGN 16 #if !defined(__NVCC__) #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N) From f723958d91fd400a8c0307aadb0c2bcd4adda5ff Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Mon, 12 Dec 2022 12:37:03 -0500 Subject: [PATCH 09/12] revert RAJA_MAX_ALIGN for MSVC compiler --- include/RAJA/config.hpp.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 2c4109cc3d..9939281932 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -533,7 +533,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_NO_SIMD #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) -#define RAJA_MAX_ALIGN 16 +#define RAJA_MAX_ALIGN alignof(std::max_align_t) #else #pragma message("RAJA_COMPILER unknown, using default empty macros.") From 47c9c935ccd5fb52b3f3a714b60a0137366b704c Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Mon, 12 Dec 2022 13:38:57 -0500 Subject: [PATCH 10/12] move static_asset for alignment out of WorkStruct.hpp and into config.hpp.in, swap out hard coded value for RAJA_MAX_ALIGN --- include/RAJA/config.hpp.in | 6 ++++-- include/RAJA/pattern/WorkGroup/WorkStruct.hpp | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 9939281932..d2965d4d3a 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -32,6 +32,7 @@ #define RAJA_config_HPP #include +#include #include #if defined(_MSVC_LANG) @@ -462,7 +463,6 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) #define RAJA_MAX_ALIGN 16 - // FIXME: alignx is breaking CUDA+xlc #if defined(RAJA_ENABLE_CUDA) #define RAJA_ALIGN_DATA(d) d @@ -534,6 +534,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) #define RAJA_MAX_ALIGN alignof(std::max_align_t) + #else #pragma message("RAJA_COMPILER unknown, using default empty macros.") @@ -547,7 +548,8 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_UNROLL_COUNT(N) #endif - +static_assert(RAJA_MAX_ALIGN >= alignof(std::max_align_t) && (RAJA_MAX_ALIGN/alignof(std::max_align_t))*alignof(std::max_align_t) == RAJA_MAX_ALIGN, + "Inconsistent RAJA_MAX_ALIGN size"); #cmakedefine RAJA_HAVE_POSIX_MEMALIGN #cmakedefine RAJA_HAVE_ALIGNED_ALLOC #cmakedefine RAJA_HAVE_MM_MALLOC diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp index b553f2167e..c24e09fc53 100644 --- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp +++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp @@ -71,8 +71,6 @@ struct WorkStruct= alignof(std::max_align_t) && (16/alignof(std::max_align_t))*alignof(std::max_align_t) == 16, - "WorkStruct max align value must be 16 bytes"); true_value_type* value_ptr = static_cast(ptr); value_ptr->dispatcher = dispatcher; From 4fdf583e19958a8f8a982844fd3bb96785653b5a Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Mon, 12 Dec 2022 13:52:03 -0500 Subject: [PATCH 11/12] move RAJA_MAX_ALIGN to be in consistent ordering in compiler directives --- include/RAJA/config.hpp.in | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index d2965d4d3a..3565066c6a 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -384,7 +384,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; // // Configuration options for Intel compilers // - +#define RAJA_MAX_ALIGN 16 #if defined (RAJA_ENABLE_FORCEINLINE_RECURSIVE) #define RAJA_FORCEINLINE_RECURSIVE RAJA_PRAGMA(forceinline recursive) #else @@ -397,7 +397,6 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_INLINE inline __attribute__((always_inline)) #endif -#define RAJA_MAX_ALIGN 16 #define RAJA_UNROLL RAJA_PRAGMA(unroll) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N)) @@ -424,9 +423,9 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; // // Configuration options for GNU compilers // +#define RAJA_MAX_ALIGN 16 #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline __attribute__((always_inline)) -#define RAJA_MAX_ALIGN 16 #if !defined(__NVCC__) #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N) @@ -458,11 +457,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; // // Configuration options for xlc compiler (i.e., bgq/sequoia). // +#define RAJA_MAX_ALIGN 16 #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline __attribute__((always_inline)) #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) -#define RAJA_MAX_ALIGN 16 // FIXME: alignx is breaking CUDA+xlc #if defined(RAJA_ENABLE_CUDA) #define RAJA_ALIGN_DATA(d) d @@ -488,11 +487,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; // // Configuration options for clang compilers // +#define RAJA_MAX_ALIGN 16 #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline __attribute__((always_inline)) #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable)) #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N)) -#define RAJA_MAX_ALIGN 16 // note that neither nvcc nor Apple Clang compiler currently doesn't support // the __builtin_assume_aligned attribute #if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__) @@ -525,7 +524,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; // This is the same as undefined compiler, but squelches the warning message #elif defined(RAJA_COMPILER_MSVC) - +#define RAJA_MAX_ALIGN alignof(std::max_align_t) #define RAJA_FORCEINLINE_RECURSIVE #define RAJA_INLINE inline #define RAJA_ALIGN_DATA(d) d @@ -533,7 +532,6 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_NO_SIMD #define RAJA_UNROLL #define RAJA_UNROLL_COUNT(N) -#define RAJA_MAX_ALIGN alignof(std::max_align_t) #else @@ -548,8 +546,10 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@; #define RAJA_UNROLL_COUNT(N) #endif + static_assert(RAJA_MAX_ALIGN >= alignof(std::max_align_t) && (RAJA_MAX_ALIGN/alignof(std::max_align_t))*alignof(std::max_align_t) == RAJA_MAX_ALIGN, "Inconsistent RAJA_MAX_ALIGN size"); + #cmakedefine RAJA_HAVE_POSIX_MEMALIGN #cmakedefine RAJA_HAVE_ALIGNED_ALLOC #cmakedefine RAJA_HAVE_MM_MALLOC From 5d458cef0c1e9e9f99e4f9dda341f7de262e6f21 Mon Sep 17 00:00:00 2001 From: Corbin Robeck <13821049+CRobeck@users.noreply.github.com> Date: Tue, 13 Dec 2022 17:34:47 -0500 Subject: [PATCH 12/12] Update comments about RAJA_MAX_ALIGN --- include/RAJA/config.hpp.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 3565066c6a..037ec13e2a 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -242,11 +242,11 @@ static_assert(RAJA_HAS_SOME_CXX14, /* NOTE: Below we define RAJA_MAX_ALIGN for each compiler, currently it is set as 16 bytes -for all cases. Previously this was set by alignof(std::max_align_t) which, in Clang, +for all cases, except MSVC. Previously this was set by alignof(std::max_align_t) which, in Clang, is based on the sizeof(long double). This causes an in inconsistency as CUDA/HIP long doubles are demoted to doubles causing alignof(std::max_align_t) to return 8 bytes on the device and 16 bytes on the host. We therefore set a standard size and ensure validity through a -static_assert in pattern/WorkGroup/WorkStruct.hpp. +static_assert. */ namespace RAJA {