diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index d097955478..053fed2a0d 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -281,15 +281,11 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy auto __seg_end_identification = __exec.queue().submit([&](sycl::handler& __cgh) { oneapi::dpl::__ranges::__require_access(__cgh, __keys); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_count_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegReduceCountKernel>( - sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=]( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_reduce_count_kernel, -#endif - sycl::nd_item<1> __item) { + sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __group = __item.get_group(); ::std::size_t __group_id = __item.get_group(0); ::std::size_t __local_id = __item.get_local_id(0); @@ -319,13 +315,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy __cgh.depends_on(__seg_end_identification); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_offset_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegReduceOffsetKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_reduce_offset_kernel, -#endif sycl::nd_range<1>{__wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __beg = __dpl_sycl::__get_accessor_ptr(__seg_ends_acc); auto __out_beg = __dpl_sycl::__get_accessor_ptr(__seg_ends_scan_acc); @@ -342,13 +335,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy auto __partials_acc = __partials.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_wg_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegReduceWgKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_reduce_wg_kernel, -#endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { ::std::array<__val_type, __vals_per_item> __loc_partials; @@ -465,13 +455,10 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy __dpl_sycl::__local_accessor<__diff_type> __loc_seg_ends_acc(__wgroup_size, __cgh); __cgh.depends_on(__wg_reduce); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_reduce_prefix_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegReducePrefixKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_reduce_prefix_kernel, -#endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __group = __item.get_group(); ::std::int64_t __group_id = __item.get_group(0); diff --git a/include/oneapi/dpl/internal/scan_by_segment_impl.h b/include/oneapi/dpl/internal/scan_by_segment_impl.h index b895561bae..224c6744ff 100644 --- a/include/oneapi/dpl/internal/scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/scan_by_segment_impl.h @@ -164,13 +164,10 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_scan_wg_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegScanWgKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_scan_wg_kernel, -#endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { __val_type __accumulator = __identity; @@ -268,13 +265,10 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_partials_acc(__wgroup_size, __cgh); __dpl_sycl::__local_accessor __loc_seg_ends_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +# if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__seg_scan_prefix_kernel.get_kernel_bundle()); -#endif +# endif __cgh.parallel_for<_SegScanPrefixKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __seg_scan_prefix_kernel, -#endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { auto __group = __item.get_group(); ::std::size_t __group_id = __item.get_group(0); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index c0f3e9ad2d..3f72a42e9f 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -321,13 +321,10 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name oneapi::dpl::__ranges::__require_access(__cgh, __rng1, __rng2); //get an access to data under SYCL buffer auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh); __dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__kernel_1.get_kernel_bundle()); #endif __cgh.parallel_for<_LocalScanKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel_1, -#endif sycl::nd_range<1>(__n_groups * __wgroup_size, __wgroup_size), [=](sycl::nd_item<1> __item) { auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc); __local_scan(__item, __n, __local_acc, __rng1, __rng2, __temp_ptr, __size_per_wg, __wgroup_size, @@ -342,13 +339,10 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name __cgh.depends_on(__submit_event); auto __temp_acc = __result_and_scratch.__get_scratch_acc(__cgh); __dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__kernel_2.get_kernel_bundle()); #endif __cgh.parallel_for<_GroupScanKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel_2, -#endif // TODO: try to balance work between several workgroups instead of one sycl::nd_range<1>(__wgroup_size, __wgroup_size), [=](sycl::nd_item<1> __item) { auto __temp_ptr = __result_and_scratch_storage_t::__get_usm_or_buffer_accessor_ptr(__temp_acc); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index b6ee2c4f3b..e821a7b795 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -198,13 +198,10 @@ __radix_sort_count_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, : oneapi::dpl::__ranges::__require_access(__hdl, __val_rng, __count_rng); // an accessor per work-group with value counters from each work-item auto __count_lacc = __dpl_sycl::__local_accessor<_CountT>(__wg_size * __radix_states, __hdl); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel, -#endif sycl::nd_range<1>(__segments * __wg_size, __wg_size), [=](sycl::nd_item<1> __self_item) { // item info const ::std::size_t __self_lidx = __self_item.get_local_id(0); @@ -299,13 +296,10 @@ __radix_sort_scan_submit(_ExecutionPolicy&& __exec, ::std::size_t __scan_wg_size __hdl.depends_on(__dependency_event); // access the counters for all work groups oneapi::dpl::__ranges::__require_access(__hdl, __count_rng); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel, -#endif sycl::nd_range<1>(__radix_states * __scan_wg_size, __scan_wg_size), [=](sycl::nd_item<1> __self_item) { // find borders of a region with a specific bucket id sycl::global_ptr<_CountT> __begin = __count_rng.begin() + __scan_size * __self_item.get_group(0); @@ -346,7 +340,6 @@ enum class __peer_prefix_algo template struct __peer_prefix_helper; -#if (_ONEDPL_LIBSYCL_VERSION >= 50700) template struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::atomic_fetch_or> { @@ -390,7 +383,6 @@ struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::atomic return __offset; } }; -#endif // (_ONEDPL_LIBSYCL_VERSION >= 50700) template struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::scan_then_broadcast> @@ -544,13 +536,10 @@ __radix_sort_reorder_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, typename _PeerHelper::_TempStorageT __peer_temp(1, __hdl); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel, -#endif //Each SYCL work group processes one data segment. sycl::nd_range<1>(__segments * __sg_size, __sg_size), [=](sycl::nd_item<1> __self_item) { @@ -607,7 +596,11 @@ __radix_sort_reorder_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, if (__residual > 0) { //_ValueT may not have a default constructor, so we create just a storage via union type - union __storage { _ValueT __v; __storage(){} } __in_val; + union __storage + { + _ValueT __v; + __storage() {} + } __in_val; ::std::uint32_t __bucket = __radix_states; // greater than any actual radix state if (__self_lidx < __residual) @@ -728,10 +721,8 @@ struct __parallel_radix_sort_iteration { #if _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT constexpr auto __peer_algorithm = __peer_prefix_algo::subgroup_ballot; -#elif _ONEDPL_LIBSYCL_VERSION >= 50700 - constexpr auto __peer_algorithm = __peer_prefix_algo::atomic_fetch_or; #else - constexpr auto __peer_algorithm = __peer_prefix_algo::scan_then_broadcast; + constexpr auto __peer_algorithm = __peer_prefix_algo::atomic_fetch_or; #endif // _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT __reorder_event = diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index ca776e94dc..4c9974417b 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -365,13 +365,10 @@ struct __parallel_transform_reduce_impl oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size); __dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL __cgh.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_ReduceKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT - __kernel, -#endif sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size), sycl::range<1>(__work_group_size)), [=](sycl::nd_item<1> __item_id) { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index 9bd195a80a..519cd42c51 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -108,19 +108,12 @@ __supports_sub_group_size(const _ExecutionPolicy& __exec, std::size_t __target_s // Kernel run-time information helpers //----------------------------------------------------------------------------- -// 20201214 value corresponds to Intel(R) oneAPI C++ Compiler Classic 2021.1.2 Patch release -#define _USE_KERNEL_DEVICE_SPECIFIC_API (__SYCL_COMPILER_VERSION > 20201214) || (_ONEDPL_LIBSYCL_VERSION >= 50700) - template ::std::size_t __kernel_work_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __kernel) { const sycl::device& __device = __policy.queue().get_device(); -#if _USE_KERNEL_DEVICE_SPECIFIC_API return __kernel.template get_info(__device); -#else - return __kernel.template get_work_group_info(__device); -#endif } template @@ -130,7 +123,6 @@ __kernel_sub_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __ const sycl::device& __device = __policy.queue().get_device(); [[maybe_unused]] const ::std::size_t __wg_size = __kernel_work_group_size(__policy, __kernel); const ::std::uint32_t __sg_size = -#if _USE_KERNEL_DEVICE_SPECIFIC_API __kernel.template get_info( __device # if _ONEDPL_LIBSYCL_VERSION < 60000 @@ -138,9 +130,6 @@ __kernel_sub_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __ sycl::range<3> { __wg_size, 1, 1 } # endif ); -#else - __kernel.template get_sub_group_info( - __device, sycl::range<3>{__wg_size, 1, 1}); #endif return __sg_size; } @@ -267,7 +256,6 @@ class __kernel_compiler static_assert(__kernel_count > 0, "At least one kernel name should be provided"); public: -#if _ONEDPL_KERNEL_BUNDLE_PRESENT template static auto __compile(_Exec&& __exec) @@ -290,18 +278,6 @@ class __kernel_compiler { return __kernel_array_type{__kernel_bundle.get_kernel(__kernel_ids[_Ip])...}; } -#else - template - static auto - __compile(_Exec&& __exec) - { - sycl::program __program(__exec.queue().get_context()); - - using __return_type = std::conditional_t<(__kernel_count > 1), __kernel_array_type, sycl::kernel>; - return __return_type{ - (__program.build_with_kernel_type<_KernelNames>(), __program.get_kernel<_KernelNames>())...}; - } -#endif }; #if _ONEDPL_DEBUG_SYCL diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index 83c44a8a07..bbfa1c1576 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -32,26 +32,19 @@ #if defined(__LIBSYCL_MAJOR_VERSION) && defined(__LIBSYCL_MINOR_VERSION) && defined(__LIBSYCL_PATCH_VERSION) # define _ONEDPL_LIBSYCL_VERSION \ (__LIBSYCL_MAJOR_VERSION * 10000 + __LIBSYCL_MINOR_VERSION * 100 + __LIBSYCL_PATCH_VERSION) +# if _ONEDPL_LIBSYCL_VERSION < 57000 +# error "oneDPL requires Intel(R) oneAPI DPC++/C++ Compiler 2022.2.0 or newer" +# endif #else # define _ONEDPL_LIBSYCL_VERSION 0 #endif #if _ONEDPL_FPGA_DEVICE -# if _ONEDPL_LIBSYCL_VERSION >= 50400 -# include -# else -# include -# endif +# include #endif // Macros to check the new SYCL features -#define _ONEDPL_NO_INIT_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_KERNEL_BUNDLE_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2020_COLLECTIVES_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50500) -#define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) && (_ONEDPL_LIBSYCL_VERSION >= 50700) +#define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) #define _ONEDPL_SYCL_PLACEHOLDER_HOST_ACCESSOR_DEPRECATED (_ONEDPL_LIBSYCL_VERSION >= 60200) #define _ONEDPL_SYCL_DEVICE_COPYABLE_SPECIALIZATION_BROKEN \ (_ONEDPL_LIBSYCL_VERSION < 70100) && (_ONEDPL_LIBSYCL_VERSION != 0) @@ -59,8 +52,6 @@ // TODO: determine which compiler configurations provide subgroup load/store #define _ONEDPL_SYCL_SUB_GROUP_LOAD_STORE_PRESENT false -#define _ONEDPL_SYCL_SUB_GROUP_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50700) - // Macro to check if we are compiling for SPIR-V devices. This macro must only be used within // SYCL kernels for determining SPIR-V compilation. Using this macro on the host may lead to incorrect behavior. #ifndef _ONEDPL_DETECT_SPIRV_COMPILATION // Check if overridden for testing @@ -71,11 +62,7 @@ # endif #endif // _ONEDPL_DETECT_SPIRV_COMPILATION -#if _ONEDPL_LIBSYCL_VERSION >= 50300 -# define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) sycl::reqd_sub_group_size(SIZE) -#else -# define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) intel::reqd_sub_group_size(SIZE) -#endif +#define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) sycl::reqd_sub_group_size(SIZE) // This macro is intended to be used for specifying a subgroup size as a SYCL kernel attribute for SPIR-V targets // only. For non-SPIR-V targets, it will be empty. This macro should only be used in device code and may lead @@ -97,32 +84,17 @@ namespace __dpl_sycl { -using __no_init = -#if _ONEDPL_NO_INIT_PRESENT - sycl::property::no_init; -#else - sycl::property::noinit; -#endif +using __no_init = sycl::property::no_init; -#if _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT template using __known_identity = sycl::known_identity<_BinaryOp, _T>; template using __has_known_identity = sycl::has_known_identity<_BinaryOp, _T>; -#elif _ONEDPL_LIBSYCL_VERSION == 50200 -template -using __known_identity = sycl::ONEAPI::known_identity<_BinaryOp, _T>; - -template -using __has_known_identity = sycl::ONEAPI::has_known_identity<_BinaryOp, _T>; -#endif // _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT - template inline constexpr auto __known_identity_v = __known_identity<_BinaryOp, _T>::value; -#if _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT template using __plus = sycl::plus<_T>; @@ -131,50 +103,28 @@ using __maximum = sycl::maximum<_T>; template using __minimum = sycl::minimum<_T>; -#else // _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT -template -using __plus = sycl::ONEAPI::plus<_T>; -template -using __maximum = sycl::ONEAPI::maximum<_T>; - -template -using __minimum = sycl::ONEAPI::minimum<_T>; -#endif // _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT - -#if _ONEDPL_SYCL_SUB_GROUP_PRESENT using __sub_group = sycl::sub_group; -#else -using __sub_group = sycl::ONEAPI::sub_group; -#endif template constexpr auto __get_buffer_size(const _Buffer& __buffer) { -#if _ONEDPL_LIBSYCL_VERSION >= 50300 return __buffer.size(); -#else - return __buffer.get_count(); -#endif } template constexpr auto __get_accessor_size(const _Accessor& __accessor) { -#if _ONEDPL_LIBSYCL_VERSION >= 50300 return __accessor.size(); -#else - return __accessor.get_count(); -#endif } template constexpr void __group_barrier(_Item __item) { -#if 0 //_ONEDPL_LIBSYCL_VERSION >= 50300 +#if 0 //TODO: usage of sycl::group_barrier: probably, we have to revise SYCL parallel patterns which use a group_barrier. // 1) sycl::group_barrier() implementation is not ready // 2) sycl::group_barrier and sycl::item::group_barrier are not quite equivalent @@ -188,143 +138,91 @@ template constexpr auto __group_broadcast(_Args... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::group_broadcast(__args...); -#else - return sycl::ONEAPI::broadcast(__args...); -#endif } template constexpr auto __exclusive_scan_over_group(_Args... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::exclusive_scan_over_group(__args...); -#else - return sycl::ONEAPI::exclusive_scan(__args...); -#endif } template constexpr auto __inclusive_scan_over_group(_Args... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::inclusive_scan_over_group(__args...); -#else - return sycl::ONEAPI::inclusive_scan(__args...); -#endif } template constexpr auto __reduce_over_group(_Args... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::reduce_over_group(__args...); -#else - return sycl::ONEAPI::reduce(__args...); -#endif } template constexpr auto __any_of_group(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::any_of_group(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::any_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __all_of_group(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::all_of_group(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::all_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __none_of_group(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::none_of_group(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::none_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_exclusive_scan(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_exclusive_scan(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::exclusive_scan(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_inclusive_scan(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_inclusive_scan(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::inclusive_scan(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_reduce(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_reduce(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::reduce(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_any_of(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_any_of(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::any_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_all_of(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_all_of(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::all_of(::std::forward<_Args>(__args)...); -#endif } template constexpr auto __joint_none_of(_Args&&... __args) { -#if _ONEDPL_SYCL2020_COLLECTIVES_PRESENT return sycl::joint_none_of(::std::forward<_Args>(__args)...); -#else - return sycl::ONEAPI::none_of(::std::forward<_Args>(__args)...); -#endif } #if _ONEDPL_FPGA_DEVICE @@ -338,7 +236,7 @@ inline auto __fpga_selector() return sycl::ext::intel::fpga_selector_v; } -# elif _ONEDPL_LIBSYCL_VERSION >= 50300 +# else inline auto __fpga_emulator_selector() { return sycl::ext::intel::fpga_emulator_selector{}; @@ -347,31 +245,12 @@ inline auto __fpga_selector() { return sycl::ext::intel::fpga_selector{}; } -# else -inline auto __fpga_emulator_selector() -{ - return sycl::INTEL::fpga_emulator_selector{}; -} -inline auto __fpga_selector() -{ - return sycl::INTEL::fpga_selector{}; -} # endif #endif // _ONEDPL_FPGA_DEVICE -using __target = -#if _ONEDPL_LIBSYCL_VERSION >= 50400 - sycl::target; -#else - sycl::access::target; -#endif +using __target = sycl::target; -constexpr __target __target_device = -#if _ONEDPL_LIBSYCL_VERSION >= 50400 - __target::device; -#else - __target::global_buffer; -#endif +constexpr __target __target_device = __target::device; constexpr __target __host_target = #if _ONEDPL_LIBSYCL_VERSION >= 60200 @@ -389,15 +268,7 @@ using __buffer_allocator = #endif template -#if _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT using __atomic_ref = sycl::atomic_ref<_AtomicType, sycl::memory_order::relaxed, sycl::memory_scope::work_group, _Space>; -#else -struct __atomic_ref : sycl::atomic<_AtomicType, _Space> -{ - explicit __atomic_ref(_AtomicType& ref) - : sycl::atomic<_AtomicType, _Space>(sycl::multi_ptr<_AtomicType, _Space>(&ref)){}; -}; -#endif // _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT template using __local_accessor = diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h index d180e00fc2..b764f2bc10 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h @@ -49,7 +49,6 @@ inline constexpr bool __can_use_known_identity = template using __has_known_identity = ::std::conditional_t< __can_use_known_identity<_Tp>, -# if _ONEDPL_LIBSYCL_VERSION >= 50200 typename ::std::disjunction< __dpl_sycl::__has_known_identity<_BinaryOp, _Tp>, ::std::conjunction<::std::is_arithmetic<_Tp>, @@ -61,15 +60,7 @@ using __has_known_identity = ::std::conditional_t< ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__minimum>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum<_Tp>>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum>>>>, -# else //_ONEDPL_LIBSYCL_VERSION >= 50200 - typename ::std::conjunction< - ::std::is_arithmetic<_Tp>, - ::std::disjunction<::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus<_Tp>>, - ::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus>, - ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus<_Tp>>, - ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus>>>, -# endif //_ONEDPL_LIBSYCL_VERSION >= 50200 - ::std::false_type>; // This is for the case of __can_use_known_identity<_Tp>==false + ::std::false_type>; // This is for the case of __can_use_known_identity<_Tp>==false #else //_USE_GROUP_ALGOS && defined(SYCL_IMPLEMENTATION_INTEL) @@ -89,12 +80,7 @@ struct __known_identity_for_plus }; template -inline constexpr _Tp __known_identity = -#if _ONEDPL_LIBSYCL_VERSION >= 50200 - __dpl_sycl::__known_identity<_BinaryOp, _Tp>::value; -#else //_ONEDPL_LIBSYCL_VERSION >= 50200 - __known_identity_for_plus<_BinaryOp, _Tp>::value; //for plus only -#endif //_ONEDPL_LIBSYCL_VERSION >= 50200 +inline constexpr _Tp __known_identity = __dpl_sycl::__known_identity<_BinaryOp, _Tp>::value; template struct walk_n diff --git a/include/oneapi/dpl/pstl/utils.h b/include/oneapi/dpl/pstl/utils.h index 8a8dfdae1b..0901fb46e3 100644 --- a/include/oneapi/dpl/pstl/utils.h +++ b/include/oneapi/dpl/pstl/utils.h @@ -505,7 +505,7 @@ __dpl_bit_cast(const _Src& __src) noexcept { #if __cpp_lib_bit_cast >= 201806L return ::std::bit_cast<_Dst>(__src); -#elif _ONEDPL_BACKEND_SYCL && _ONEDPL_LIBSYCL_VERSION >= 50300 +#elif _ONEDPL_BACKEND_SYCL return sycl::bit_cast<_Dst>(__src); #elif __has_builtin(__builtin_bit_cast) return __builtin_bit_cast(_Dst, __src);