From 28031fcc36a7542702e8120d770398e46769dab8 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Fri, 9 Dec 2022 15:09:47 -0500
Subject: [PATCH 01/12] create and swap in RAJA_HOST_MAX_ALIGN

---
 include/RAJA/config.hpp.in                    | 3 ++-
 include/RAJA/pattern/WorkGroup/WorkStruct.hpp | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index 404d8beebf..5b3e1060ba 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -480,7 +480,8 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable))
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N))
-
+#define RAJA_HOST_MAX_ALIGN std::max_align_t
+#define RAJA_DEVICE_MAX_ALIGN 8
 
 // note that neither nvcc nor Apple Clang compiler currently doesn't support
 // the __builtin_assume_aligned attribute
diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
index 6bdd56a3c3..a9b531bda0 100644
--- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
+++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
@@ -45,7 +45,7 @@ struct WorkStruct;
  *   sizeof(GenericWorkStruct) <= sizeof(WorkStruct<size>)
  */
 template < typename Dispatcher_T >
-using GenericWorkStruct = WorkStruct<alignof(std::max_align_t), Dispatcher_T>;
+using GenericWorkStruct = WorkStruct<alignof(RAJA_HOST_MAX_ALIGN), Dispatcher_T>;
 
 template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs >
 struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, CallArgs...>>
@@ -112,7 +112,7 @@ struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, Call
 
   const dispatcher_type* dispatcher;
   typename dispatcher_type::invoker_type invoke;
-  typename std::aligned_storage<size, alignof(std::max_align_t)>::type obj;
+  typename std::aligned_storage<size, alignof(RAJA_HOST_MAX_ALIGN)>::type obj;
 };
 
 }  // namespace detail

From 479d93ce400905f09465f36f7496f9df6a9f6096 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Fri, 9 Dec 2022 15:57:53 -0500
Subject: [PATCH 02/12] remove std:max_align_t entirely

---
 include/RAJA/config.hpp.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index 5b3e1060ba..c9a8b09df0 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -480,7 +480,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable))
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N))
-#define RAJA_HOST_MAX_ALIGN std::max_align_t
+#define RAJA_HOST_MAX_ALIGN 16
 #define RAJA_DEVICE_MAX_ALIGN 8
 
 // note that neither nvcc nor Apple Clang compiler currently doesn't support

From f844352e97ca7807041df41bd01f8858ccdd7d58 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Fri, 9 Dec 2022 16:06:46 -0500
Subject: [PATCH 03/12] fix syntax error

---
 include/RAJA/pattern/WorkGroup/WorkStruct.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
index a9b531bda0..aa2e0d1919 100644
--- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
+++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
@@ -45,7 +45,7 @@ struct WorkStruct;
  *   sizeof(GenericWorkStruct) <= sizeof(WorkStruct<size>)
  */
 template < typename Dispatcher_T >
-using GenericWorkStruct = WorkStruct<alignof(RAJA_HOST_MAX_ALIGN), Dispatcher_T>;
+using GenericWorkStruct = WorkStruct<RAJA_HOST_MAX_ALIGN, Dispatcher_T>;
 
 template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs >
 struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, CallArgs...>>
@@ -112,7 +112,7 @@ struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, Call
 
   const dispatcher_type* dispatcher;
   typename dispatcher_type::invoker_type invoke;
-  typename std::aligned_storage<size, alignof(RAJA_HOST_MAX_ALIGN)>::type obj;
+  typename std::aligned_storage<size, RAJA_HOST_MAX_ALIGN>::type obj;
 };
 
 }  // namespace detail

From 65891a0c111eaade6e090c640144075036d85087 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Fri, 9 Dec 2022 16:45:59 -0500
Subject: [PATCH 04/12] fix some compiler errors

---
 include/RAJA/config.hpp.in | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index c9a8b09df0..c879a6a911 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -387,6 +387,8 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #endif
 
+#define RAJA_HOST_MAX_ALIGN 16
+
 #define RAJA_UNROLL RAJA_PRAGMA(unroll)
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N))
 
@@ -414,6 +416,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 //
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline  __attribute__((always_inline))
+#define RAJA_HOST_MAX_ALIGN 16
 
 #if !defined(__NVCC__)
 #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000)
@@ -450,11 +453,14 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
+#define RAJA_HOST_MAX_ALIGN 16
 
 // FIXME: alignx is breaking CUDA+xlc
 #if defined(RAJA_ENABLE_CUDA)
 #define RAJA_ALIGN_DATA(d) d
+#define RAJA_DEVICE_MAX_ALIGN 16
 #else
+#define RAJA_DEVICE_MAX_ALIGN 16
 #define RAJA_ALIGN_DATA(d) __alignx(RAJA::DATA_ALIGN, d)
 #endif
 
@@ -481,13 +487,14 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable))
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N))
 #define RAJA_HOST_MAX_ALIGN 16
-#define RAJA_DEVICE_MAX_ALIGN 8
 
 // note that neither nvcc nor Apple Clang compiler currently doesn't support
 // the __builtin_assume_aligned attribute
 #if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__)
 #define RAJA_ALIGN_DATA(d) d
+#define RAJA_DEVICE_MAX_ALIGN 16
 #else
+#define RAJA_DEVICE_MAX_ALIGN 8
 #define RAJA_ALIGN_DATA(d) __builtin_assume_aligned(d, RAJA::DATA_ALIGN)
 #endif
 
@@ -524,9 +531,12 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
 
+#define RAJA_HOST_MAX_ALIGN 16
 #else
 
 #pragma message("RAJA_COMPILER unknown, using default empty macros.")
+#define RAJA_HOST_MAX_ALIGN 16
+#define RAJA_DEVICE_MAX_ALIGN 8
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline
 #define RAJA_ALIGN_DATA(d) d

From ce880377895970f2087f82c5513fad00676ab421 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Fri, 9 Dec 2022 20:05:04 -0500
Subject: [PATCH 05/12] continue to clean up compiler errors

---
 include/RAJA/config.hpp.in | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index c879a6a911..3babda0d93 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -388,6 +388,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #endif
 
 #define RAJA_HOST_MAX_ALIGN 16
+#define RAJA_DEVICE_MAX_ALIGN 16
 
 #define RAJA_UNROLL RAJA_PRAGMA(unroll)
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N))
@@ -417,7 +418,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #define RAJA_HOST_MAX_ALIGN 16
-
+#if defined(RAJA_ENABLE_HIP)
+#define RAJA_DEVICE_MAX_ALIGN 8
+#else
+#define RAJA_HOST_MAX_ALIGN 16
+#endif
 #if !defined(__NVCC__)
 #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000)
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N)
@@ -454,13 +459,12 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
 #define RAJA_HOST_MAX_ALIGN 16
+#define RAJA_DEVICE_MAX_ALIGN 16
 
 // FIXME: alignx is breaking CUDA+xlc
 #if defined(RAJA_ENABLE_CUDA)
 #define RAJA_ALIGN_DATA(d) d
-#define RAJA_DEVICE_MAX_ALIGN 16
 #else
-#define RAJA_DEVICE_MAX_ALIGN 16
 #define RAJA_ALIGN_DATA(d) __alignx(RAJA::DATA_ALIGN, d)
 #endif
 
@@ -487,14 +491,16 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable))
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N))
 #define RAJA_HOST_MAX_ALIGN 16
-
+#if defined(RAJA_ENABLE_HIP)
+#define RAJA_DEVICE_MAX_ALIGN 8
+#else
+#define RAJA_HOST_MAX_ALIGN 16
+#endif
 // note that neither nvcc nor Apple Clang compiler currently doesn't support
 // the __builtin_assume_aligned attribute
 #if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__)
 #define RAJA_ALIGN_DATA(d) d
-#define RAJA_DEVICE_MAX_ALIGN 16
 #else
-#define RAJA_DEVICE_MAX_ALIGN 8
 #define RAJA_ALIGN_DATA(d) __builtin_assume_aligned(d, RAJA::DATA_ALIGN)
 #endif
 
@@ -530,13 +536,13 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_NO_SIMD
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
-
 #define RAJA_HOST_MAX_ALIGN 16
+#define RAJA_DEVICE_MAX_ALIGN 16
 #else
 
 #pragma message("RAJA_COMPILER unknown, using default empty macros.")
 #define RAJA_HOST_MAX_ALIGN 16
-#define RAJA_DEVICE_MAX_ALIGN 8
+#define RAJA_DEVICE_MAX_ALIGN 16
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline
 #define RAJA_ALIGN_DATA(d) d

From e35c488b652b7ee99865dde2727aaec9b4aa68d9 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Fri, 9 Dec 2022 20:17:45 -0500
Subject: [PATCH 06/12] clean up MSVC config

---
 include/RAJA/config.hpp.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index 3babda0d93..44ad5bf946 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -536,7 +536,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_NO_SIMD
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
-#define RAJA_HOST_MAX_ALIGN 16
+#define RAJA_HOST_MAX_ALIGN alignof(std::max_align_t)
 #define RAJA_DEVICE_MAX_ALIGN 16
 #else
 

From c61154d772089b002e237d22a80fe1c869e7c9b5 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Mon, 12 Dec 2022 12:08:02 -0500
Subject: [PATCH 07/12] add a unified RAJA_MAX_ALIGN, add some comments
 documenting it reasoning, and add a static static_assert to validate it's use
 in WorkStruct

---
 include/RAJA/config.hpp.in                    | 35 ++++++++-----------
 include/RAJA/pattern/WorkGroup/WorkStruct.hpp |  7 ++--
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index 44ad5bf946..d8a0a85745 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -239,6 +239,15 @@ static_assert(RAJA_HAS_SOME_CXX14,
 #define RAJA_PRAGMA(x) _Pragma(RAJA_STRINGIFY(x))
 #endif
 
+
+/* NOTE: Below we define RAJA_MAX_ALIGN for each compiler, currently it is set as 16 bytes
+for all cases. Previously this was set by alignof(std::max_align_t) which, in Clang,
+is based on the sizeof(long double). This causes an in inconsistency as CUDA/HIP long doubles 
+are demoted to doubles causing alignof(std::max_align_t) to return 8 bytes on the device and
+16 bytes on the host. We therefore set a standard size and ensure validity through a 
+static_assert in pattern/WorkGroup/WorkStruct.hpp.
+*/
+
 namespace RAJA {
 
 #if defined(RAJA_ENABLE_OPENMP) && !defined(__HIP_DEVICE_COMPILE__)
@@ -387,8 +396,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #endif
 
-#define RAJA_HOST_MAX_ALIGN 16
-#define RAJA_DEVICE_MAX_ALIGN 16
+#define RAJA_MAX_ALIGN 16
 
 #define RAJA_UNROLL RAJA_PRAGMA(unroll)
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N))
@@ -417,12 +425,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 //
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline  __attribute__((always_inline))
-#define RAJA_HOST_MAX_ALIGN 16
-#if defined(RAJA_ENABLE_HIP)
-#define RAJA_DEVICE_MAX_ALIGN 8
-#else
-#define RAJA_HOST_MAX_ALIGN 16
-#endif
+#define RAJA_HOST_ALIGN 16
 #if !defined(__NVCC__)
 #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000)
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N)
@@ -458,8 +461,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
-#define RAJA_HOST_MAX_ALIGN 16
-#define RAJA_DEVICE_MAX_ALIGN 16
+#define RAJA_MAX_ALIGN 16
 
 // FIXME: alignx is breaking CUDA+xlc
 #if defined(RAJA_ENABLE_CUDA)
@@ -490,12 +492,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable))
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N))
-#define RAJA_HOST_MAX_ALIGN 16
-#if defined(RAJA_ENABLE_HIP)
-#define RAJA_DEVICE_MAX_ALIGN 8
-#else
-#define RAJA_HOST_MAX_ALIGN 16
-#endif
+#define RAJA_MAX_ALIGN 16
 // note that neither nvcc nor Apple Clang compiler currently doesn't support
 // the __builtin_assume_aligned attribute
 #if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__)
@@ -536,13 +533,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_NO_SIMD
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
-#define RAJA_HOST_MAX_ALIGN alignof(std::max_align_t)
-#define RAJA_DEVICE_MAX_ALIGN 16
+#define RAJA_MAX_ALIGN 16
 #else
 
 #pragma message("RAJA_COMPILER unknown, using default empty macros.")
-#define RAJA_HOST_MAX_ALIGN 16
-#define RAJA_DEVICE_MAX_ALIGN 16
+#define RAJA_MAX_ALIGN 16
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline
 #define RAJA_ALIGN_DATA(d) d
diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
index aa2e0d1919..b553f2167e 100644
--- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
+++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
@@ -45,7 +45,7 @@ struct WorkStruct;
  *   sizeof(GenericWorkStruct) <= sizeof(WorkStruct<size>)
  */
 template < typename Dispatcher_T >
-using GenericWorkStruct = WorkStruct<RAJA_HOST_MAX_ALIGN, Dispatcher_T>;
+using GenericWorkStruct = WorkStruct<RAJA_MAX_ALIGN, Dispatcher_T>;
 
 template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs >
 struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, CallArgs...>>
@@ -71,7 +71,8 @@ struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, Call
         "WorkStruct and GenericWorkStruct must have obj at the same offset");
     static_assert(sizeof(value_type) <= sizeof(true_value_type),
         "WorkStruct must not be smaller than GenericWorkStruct");
-
+    static_assert(16 >= alignof(std::max_align_t) && (16/alignof(std::max_align_t))*alignof(std::max_align_t) == 16,
+			  "WorkStruct max align value must be 16 bytes");
     true_value_type* value_ptr = static_cast<true_value_type*>(ptr);
 
     value_ptr->dispatcher = dispatcher;
@@ -112,7 +113,7 @@ struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, Call
 
   const dispatcher_type* dispatcher;
   typename dispatcher_type::invoker_type invoke;
-  typename std::aligned_storage<size, RAJA_HOST_MAX_ALIGN>::type obj;
+  typename std::aligned_storage<size, RAJA_MAX_ALIGN>::type obj;
 };
 
 }  // namespace detail

From 830697962a547870aca5a218769db7d690f19b86 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Mon, 12 Dec 2022 12:14:47 -0500
Subject: [PATCH 08/12] fix typo

---
 include/RAJA/config.hpp.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index d8a0a85745..2c4109cc3d 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -425,7 +425,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 //
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline  __attribute__((always_inline))
-#define RAJA_HOST_ALIGN 16
+#define RAJA_MAX_ALIGN 16
 #if !defined(__NVCC__)
 #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000)
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N)

From f723958d91fd400a8c0307aadb0c2bcd4adda5ff Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Mon, 12 Dec 2022 12:37:03 -0500
Subject: [PATCH 09/12] revert RAJA_MAX_ALIGN for MSVC compiler

---
 include/RAJA/config.hpp.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index 2c4109cc3d..9939281932 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -533,7 +533,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_NO_SIMD
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
-#define RAJA_MAX_ALIGN 16
+#define RAJA_MAX_ALIGN alignof(std::max_align_t)
 #else
 
 #pragma message("RAJA_COMPILER unknown, using default empty macros.")

From 47c9c935ccd5fb52b3f3a714b60a0137366b704c Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Mon, 12 Dec 2022 13:38:57 -0500
Subject: [PATCH 10/12] move static_asset for alignment out of WorkStruct.hpp
 and into config.hpp.in, swap out hard coded value for RAJA_MAX_ALIGN

---
 include/RAJA/config.hpp.in                    | 6 ++++--
 include/RAJA/pattern/WorkGroup/WorkStruct.hpp | 2 --
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index 9939281932..d2965d4d3a 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -32,6 +32,7 @@
 #define RAJA_config_HPP
 
 #include <utility>
+#include <cstddef>
 #include <type_traits>
 
 #if defined(_MSVC_LANG)
@@ -462,7 +463,6 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
 #define RAJA_MAX_ALIGN 16
-
 // FIXME: alignx is breaking CUDA+xlc
 #if defined(RAJA_ENABLE_CUDA)
 #define RAJA_ALIGN_DATA(d) d
@@ -534,6 +534,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
 #define RAJA_MAX_ALIGN alignof(std::max_align_t)
+
 #else
 
 #pragma message("RAJA_COMPILER unknown, using default empty macros.")
@@ -547,7 +548,8 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_UNROLL_COUNT(N)
 
 #endif
-
+static_assert(RAJA_MAX_ALIGN >= alignof(std::max_align_t) && (RAJA_MAX_ALIGN/alignof(std::max_align_t))*alignof(std::max_align_t) == RAJA_MAX_ALIGN, 
+        "Inconsistent RAJA_MAX_ALIGN size");
 #cmakedefine RAJA_HAVE_POSIX_MEMALIGN
 #cmakedefine RAJA_HAVE_ALIGNED_ALLOC
 #cmakedefine RAJA_HAVE_MM_MALLOC
diff --git a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
index b553f2167e..c24e09fc53 100644
--- a/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
+++ b/include/RAJA/pattern/WorkGroup/WorkStruct.hpp
@@ -71,8 +71,6 @@ struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, Call
         "WorkStruct and GenericWorkStruct must have obj at the same offset");
     static_assert(sizeof(value_type) <= sizeof(true_value_type),
         "WorkStruct must not be smaller than GenericWorkStruct");
-    static_assert(16 >= alignof(std::max_align_t) && (16/alignof(std::max_align_t))*alignof(std::max_align_t) == 16,
-			  "WorkStruct max align value must be 16 bytes");
     true_value_type* value_ptr = static_cast<true_value_type*>(ptr);
 
     value_ptr->dispatcher = dispatcher;

From 4fdf583e19958a8f8a982844fd3bb96785653b5a Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck@amd.com>
Date: Mon, 12 Dec 2022 13:52:03 -0500
Subject: [PATCH 11/12] move RAJA_MAX_ALIGN to be in consistent ordering in
 compiler directives

---
 include/RAJA/config.hpp.in | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index d2965d4d3a..3565066c6a 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -384,7 +384,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 //
 // Configuration options for Intel compilers
 //
-
+#define RAJA_MAX_ALIGN 16
 #if defined (RAJA_ENABLE_FORCEINLINE_RECURSIVE)
 #define RAJA_FORCEINLINE_RECURSIVE  RAJA_PRAGMA(forceinline recursive)
 #else
@@ -397,7 +397,6 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #endif
 
-#define RAJA_MAX_ALIGN 16
 
 #define RAJA_UNROLL RAJA_PRAGMA(unroll)
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N))
@@ -424,9 +423,9 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 //
 // Configuration options for GNU compilers
 //
+#define RAJA_MAX_ALIGN 16
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline  __attribute__((always_inline))
-#define RAJA_MAX_ALIGN 16
 #if !defined(__NVCC__)
 #define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000)
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N)
@@ -458,11 +457,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 //
 // Configuration options for xlc compiler (i.e., bgq/sequoia).
 //
+#define RAJA_MAX_ALIGN 16
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
-#define RAJA_MAX_ALIGN 16
 // FIXME: alignx is breaking CUDA+xlc
 #if defined(RAJA_ENABLE_CUDA)
 #define RAJA_ALIGN_DATA(d) d
@@ -488,11 +487,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 //
 // Configuration options for clang compilers
 //
+#define RAJA_MAX_ALIGN 16
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline  __attribute__((always_inline))
 #define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable))
 #define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N))
-#define RAJA_MAX_ALIGN 16
 // note that neither nvcc nor Apple Clang compiler currently doesn't support
 // the __builtin_assume_aligned attribute
 #if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__)
@@ -525,7 +524,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 
 // This is the same as undefined compiler, but squelches the warning message
 #elif defined(RAJA_COMPILER_MSVC)
-
+#define RAJA_MAX_ALIGN alignof(std::max_align_t)
 #define RAJA_FORCEINLINE_RECURSIVE
 #define RAJA_INLINE inline
 #define RAJA_ALIGN_DATA(d) d
@@ -533,7 +532,6 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_NO_SIMD
 #define RAJA_UNROLL
 #define RAJA_UNROLL_COUNT(N)
-#define RAJA_MAX_ALIGN alignof(std::max_align_t)
 
 #else
 
@@ -548,8 +546,10 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
 #define RAJA_UNROLL_COUNT(N)
 
 #endif
+
 static_assert(RAJA_MAX_ALIGN >= alignof(std::max_align_t) && (RAJA_MAX_ALIGN/alignof(std::max_align_t))*alignof(std::max_align_t) == RAJA_MAX_ALIGN, 
         "Inconsistent RAJA_MAX_ALIGN size");
+
 #cmakedefine RAJA_HAVE_POSIX_MEMALIGN
 #cmakedefine RAJA_HAVE_ALIGNED_ALLOC
 #cmakedefine RAJA_HAVE_MM_MALLOC

From 5d458cef0c1e9e9f99e4f9dda341f7de262e6f21 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <13821049+CRobeck@users.noreply.github.com>
Date: Tue, 13 Dec 2022 17:34:47 -0500
Subject: [PATCH 12/12] Update comments about RAJA_MAX_ALIGN

---
 include/RAJA/config.hpp.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in
index 3565066c6a..037ec13e2a 100644
--- a/include/RAJA/config.hpp.in
+++ b/include/RAJA/config.hpp.in
@@ -242,11 +242,11 @@ static_assert(RAJA_HAS_SOME_CXX14,
 
 
 /* NOTE: Below we define RAJA_MAX_ALIGN for each compiler, currently it is set as 16 bytes
-for all cases. Previously this was set by alignof(std::max_align_t) which, in Clang,
+for all cases, except MSVC. Previously this was set by alignof(std::max_align_t) which, in Clang,
 is based on the sizeof(long double). This causes an in inconsistency as CUDA/HIP long doubles 
 are demoted to doubles causing alignof(std::max_align_t) to return 8 bytes on the device and
 16 bytes on the host. We therefore set a standard size and ensure validity through a 
-static_assert in pattern/WorkGroup/WorkStruct.hpp.
+static_assert.
 */
 
 namespace RAJA {