From a0ed3f7c4bc804df3389360b5da66034976d57eb Mon Sep 17 00:00:00 2001 From: Denis Huenich Date: Tue, 27 Mar 2018 16:24:16 +0200 Subject: [PATCH 1/6] added halo namespace; renamed HaloStencilIterator and HaloStencilOperator; changed private member types to pointer, especially for the iterator; added operator- for StencilIterator to work with std::distance; introduced StencilSpecificViews --- .../ex.02.matrix.halo.heat_equation/main.cpp | 17 +- dash/examples/ex.11.halo-stencil/main.cpp | 6 +- dash/include/dash/halo/Halo.h | 116 +++- dash/include/dash/halo/HaloMatrixWrapper.h | 95 ++- ...aloStencilOperator.h => StencilOperator.h} | 180 +++-- .../dash/halo/iterator/StencilIterator.h | 640 ++++++++++++++++++ dash/test/halo/HaloTest.cc | 16 +- 7 files changed, 909 insertions(+), 161 deletions(-) rename dash/include/dash/halo/{HaloStencilOperator.h => StencilOperator.h} (64%) create mode 100644 dash/include/dash/halo/iterator/StencilIterator.h diff --git a/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp b/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp index 2ee816b2c..b91622a16 100644 --- a/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp +++ b/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp @@ -11,10 +11,10 @@ using matrix_t = dash::Matrix< double, 2, typename pattern_t::index_type, pattern_t>; -using StencilT = dash::StencilPoint<2>; -using StencilSpecT = dash::StencilSpec; -using GlobBoundSpecT = dash::GlobalBoundarySpec<2>; -using HaloMatrixWrapperT = dash::HaloMatrixWrapper; +using StencilT = dash::halo::StencilPoint<2>; +using StencilSpecT = dash::halo::StencilSpec; +using GlobBoundSpecT = dash::halo::GlobalBoundarySpec<2>; +using HaloMatrixWrapperT = dash::halo::HaloMatrixWrapper; using array_t = dash::Array; @@ -48,9 +48,6 @@ int main(int argc, char *argv[]) cerr << "Not enough arguments ./ matrix_ext iterations" << endl; return 1; } - using HaloBlockT = dash::HaloBlock; - using HaloMemT = dash::HaloMemory; - auto matrix_ext = std::atoi(argv[1]); auto iterations = std::atoi(argv[2]); @@ -87,7 +84,7 @@ int main(int argc, char *argv[]) StencilSpecT stencil_spec( StencilT(-1, 0), StencilT(1, 0), StencilT( 0, -1), StencilT(0, 1)); - GlobBoundSpecT bound_spec(dash::BoundaryProp::CYCLIC, dash::BoundaryProp::CYCLIC); + GlobBoundSpecT bound_spec(dash::halo::BoundaryProp::CYCLIC, dash::halo::BoundaryProp::CYCLIC); HaloMatrixWrapperT halomat(matrix, bound_spec, stencil_spec); HaloMatrixWrapperT halomat2(matrix2, bound_spec, stencil_spec); @@ -95,8 +92,8 @@ int main(int argc, char *argv[]) auto stencil_op = halomat.stencil_operator(stencil_spec); auto stencil_op2 = halomat2.stencil_operator(stencil_spec); - decltype(stencil_op)* current_op = &stencil_op; - decltype(stencil_op2)* new_op = &stencil_op2; + auto* current_op = &stencil_op; + auto* new_op = &stencil_op2; HaloMatrixWrapperT* current_halo = &halomat; HaloMatrixWrapperT* new_halo = &halomat2; diff --git a/dash/examples/ex.11.halo-stencil/main.cpp b/dash/examples/ex.11.halo-stencil/main.cpp index b333048e3..2c8234c98 100644 --- a/dash/examples/ex.11.halo-stencil/main.cpp +++ b/dash/examples/ex.11.halo-stencil/main.cpp @@ -36,9 +36,9 @@ using element_t = unsigned char; using Pattern_t = dash::Pattern<2>; using index_t = typename Pattern_t::index_type; using Array_t = dash::NArray; -using StencilP_t = dash::StencilPoint<2>; -using StencilSpec_t = dash::StencilSpec; -using HaloWrapper_t = dash::HaloMatrixWrapper; +using StencilP_t = dash::halo::StencilPoint<2>; +using StencilSpec_t = dash::halo::StencilSpec; +using HaloWrapper_t = dash::halo::HaloMatrixWrapper; void write_pgm(const std::string & filename, const Array_t & data){ if(dash::myid() == 0){ diff --git a/dash/include/dash/halo/Halo.h b/dash/include/dash/halo/Halo.h index c894081c0..94032143a 100644 --- a/dash/include/dash/halo/Halo.h +++ b/dash/include/dash/halo/Halo.h @@ -11,14 +11,20 @@ namespace dash { +namespace halo { + /** * Stencil point with raletive coordinates for N dimensions * e.g. StencilPoint<2>(-1,-1) -> north west */ template class StencilPoint : public Dimensional { +public: + using point_value_t = int16_t; + using coefficient_t = CoeffT; + private: - using Base_t = Dimensional; + using Base_t = Dimensional; public: // TODO constexpr @@ -42,9 +48,9 @@ class StencilPoint : public Dimensional { template constexpr StencilPoint( typename std::enable_if::type value, + point_value_t>::type value, Values... values) - : Base_t::Dimensional(value, (int16_t) values...) {} + : Base_t::Dimensional(value, (point_value_t) values...) {} /** * Constructor @@ -55,8 +61,8 @@ class StencilPoint : public Dimensional { constexpr StencilPoint( typename std::enable_if::type coefficient, - int16_t value, Values... values) - : Base_t::Dimensional(value, (int16_t) values...), _coefficient(coefficient) { + point_value_t value, Values... values) + : Base_t::Dimensional(value, (point_value_t) values...), _coefficient(coefficient) { } // TODO as constexpr @@ -64,9 +70,9 @@ class StencilPoint : public Dimensional { * Returns maximum distance to center over all dimensions */ int max() const { - int16_t max = 0; + int max = 0; for(dim_t i(0); i < NumDimensions; ++i) - max = std::max((int) max, (int) std::abs(this->_values[i])); + max = std::max( max, (int) std::abs(this->_values[i])); return max; } @@ -88,11 +94,16 @@ template class StencilSpec { private: using Self_t = StencilSpec; + static constexpr auto NumDimensions = StencilPointT::ndim(); public: using stencil_size_t = std::size_t; using stencil_index_t = std::size_t; using StencilArray_t = std::array; + using StencilPoint_t = StencilPointT; + using point_value_t = typename StencilPoint_t::point_value_t; + using MaxDistanceDim_t = std::pair; + using MaxDistanceAll_t = std::array; public: /** @@ -134,7 +145,7 @@ class StencilSpec { } /** - * Finds the stencil point index for a given \ref StencilPoint + * Returns the stencil point index for a given \ref StencilPoint * * \return The index and true if the given stecil point was found, * else the index 0 and false. @@ -150,6 +161,35 @@ class StencilSpec { return std::make_pair(0, false); } + MaxDistanceAll_t minmax_distances() const { + MaxDistanceAll_t max_dist{}; + for(const auto& stencil_point : _specs) { + for(auto d = 0; d < NumDimensions; ++d) { + if(stencil_point[d] < max_dist[d].first) { + max_dist[d].first = stencil_point[d]; + continue; + } + if(stencil_point[d] > max_dist[d].second) + max_dist[d].second = stencil_point[d]; + } + } + + return max_dist; + } + + MaxDistanceDim_t minmax_distances(dim_t dim) const { + MaxDistanceDim_t max_dist{}; + for(const auto& stencil_point : _specs) { + if(stencil_point[dim] < max_dist.first) { + max_dist.first = stencil_point[dim]; + continue; + } + if(stencil_point[dim] > max_dist.second) + max_dist.second = stencil_point[dim]; + } + + return max_dist; + } /** * \return stencil point for a given index */ @@ -612,12 +652,12 @@ class RegionIter { /** * Constructor, creates a region iterator. */ - RegionIter(GlobMem_t& globmem, const PatternT& pattern, + RegionIter(GlobMem_t* globmem, const PatternT* pattern, const ViewSpec_t& _region_view, pattern_index_t pos, pattern_size_t size) : _globmem(globmem), _pattern(pattern), _region_view(_region_view), _idx(pos), - _max_idx(size - 1), _myid(pattern.team().myid()), - _lbegin(globmem.lbegin()) {} + _max_idx(size - 1), _myid(pattern->team().myid()), + _lbegin(globmem->lbegin()) {} /** * Copy constructor. @@ -663,9 +703,9 @@ class RegionIter { */ reference operator[](pattern_index_t n) const { auto coords = glob_coords(_idx + n); - auto local_pos = _pattern.local_index(coords); + auto local_pos = _pattern->local_index(coords); - return reference(_globmem.at(local_pos.unit, local_pos.index)); + return reference(_globmem->at(local_pos.unit, local_pos.index)); } dart_gptr_t dart_gptr() const { return operator[](_idx).dart_gptr(); } @@ -678,7 +718,7 @@ class RegionIter { GlobIter global() const { auto g_idx = gpos(); - return GlobIter(&_globmem, _pattern, g_idx); + return GlobIter(_globmem, &_pattern, g_idx); } ElementT* local() const { @@ -713,7 +753,7 @@ class RegionIter { * \see DashGlobalIteratorConcept */ pattern_index_t gpos() const { - return _pattern.memory_layout().at(glob_coords(_idx)); + return _pattern->memory_layout().at(glob_coords(_idx)); } std::array gcoords() const { @@ -721,7 +761,7 @@ class RegionIter { } typename PatternT::local_index_t lpos() const { - return _pattern.local_index(glob_coords(_idx)); + return _pattern->local_index(glob_coords(_idx)); } const ViewSpec_t viewspec() const { return _region_view; } @@ -734,7 +774,7 @@ class RegionIter { * * \see DashGlobalIteratorConcept */ - const GlobMem_t& globmem() const { return _globmem; } + const GlobMem_t& globmem() const { return *_globmem; } /** * Prefix increment operator. @@ -818,7 +858,7 @@ class RegionIter { return compare(other, std::not_equal_to()); } - const PatternT& pattern() const { return _pattern; } + const PatternT& pattern() const { return *_pattern; } private: /** @@ -844,14 +884,14 @@ class RegionIter { std::array glob_coords( pattern_index_t idx) const { - return _pattern.memory_layout().coords(idx, _region_view); + return _pattern->memory_layout().coords(idx, _region_view); } private: /// Global memory used to dereference iterated values. - GlobMem_t& _globmem; + GlobMem_t* _globmem; /// Pattern that created the encapsulated block. - const PatternT& _pattern; + const PatternT* _pattern; const ViewSpec_t _region_view; /// Iterator's position relative to the block border's iteration space. @@ -915,8 +955,8 @@ class Region { std::any_of(border.begin(), border.end(), [](bool border_dim) { return border_dim == true; })), _custom_region(custom_region), - _beg(globmem, pattern, _region, 0, _region.size()), - _end(globmem, pattern, _region, _region.size(), _region.size()) {} + _beg(&globmem, &pattern, _region, 0, _region.size()), + _end(&globmem, &pattern, _region, _region.size(), _region.size()) {} const region_index_t index() const { return _region_spec.index(); } @@ -1072,10 +1112,6 @@ class HaloBlock { } } } - /* - * Setup for the non duplicate boundary elements and the views: inner, - * boundary and inner + boundary - */ auto index = spec.index(); _halo_regions.push_back( Region_t(spec, ViewSpec_t(halo_region_offsets, halo_region_extents), @@ -1089,6 +1125,10 @@ class HaloBlock { _boundary_reg_mapping[index] = &_boundary_regions.back(); } + /* + * Setup for the non duplicate boundary elements and the views: inner, + * boundary and inner + boundary + */ for(auto d = 0; d < NumDimensions; ++d) { const auto view_offset = view.offset(d); const auto view_extent = view.extent(d); @@ -1211,8 +1251,8 @@ class HaloBlock { } /** - * Returns the inner view. Depends on the used \ref HaloSpec when the - * HaloBlock was build. + * Returns the inner view with global offsets depending on the used + * \ref HaloSpec. */ const ViewSpec_t& view_inner() const { return _view_inner; } @@ -1360,14 +1400,14 @@ class HaloMemory { * \return Pointer to the first halo element or nullptr if the * region doesn't exist */ - Element_t* pos_at(region_index_t index) { return _halo_offsets[index]; } + Element_t* first_element_at(region_index_t index) { return _halo_offsets[index]; } /** * Pointer to the first halo element * * \return Pointer to the first halo element */ - Element_t* pos_begin() { return _halobuffer.data(); } + Element_t* first_element() { return _halobuffer.data(); } /** * Container storing all halo elements @@ -1382,7 +1422,7 @@ class HaloMemory { * false if not. */ bool to_halo_mem_coords_check(const region_index_t region_index, - ElementCoords_t& coords) { + ElementCoords_t& coords) const { const auto& extents = _haloblock.halo_region(region_index)->region().extents(); for(auto d = 0; d < NumDimensions; ++d) { @@ -1402,7 +1442,7 @@ class HaloMemory { * Converts coordinates to halo memory coordinates for a given region index. */ void to_halo_mem_coords(const region_index_t region_index, - ElementCoords_t& coords) { + ElementCoords_t& coords) const { const auto& extents = _haloblock.halo_region(region_index)->region().extents(); for(auto d = 0; d < NumDimensions; ++d) { @@ -1421,18 +1461,20 @@ class HaloMemory { * region. */ pattern_size_t offset(const region_index_t region_index, - const ElementCoords_t& coords) { + const ElementCoords_t& coords) const { const auto& extents = _haloblock.halo_region(region_index)->region().extents(); pattern_size_t off = 0; if(MemoryArrange == ROW_MAJOR) { off = coords[0]; - for(auto d = 1; d < NumDimensions; ++d) + for(dim_t d = 1; d < NumDimensions; ++d) off = off * extents[d] + coords[d]; } else { off = coords[NumDimensions - 1]; - for(auto d = NumDimensions - 2; d >= 0; --d) + for(dim_t d = NumDimensions - 1; d > 0; ) { + --d; off = off * extents[d] + coords[d]; + } } return off; @@ -1444,6 +1486,8 @@ class HaloMemory { std::array _halo_offsets{}; }; // class HaloMemory +} // namespace halo + } // namespace dash #endif // DASH__HALO_HALO_H__ diff --git a/dash/include/dash/halo/HaloMatrixWrapper.h b/dash/include/dash/halo/HaloMatrixWrapper.h index 254cb3ede..23bb9bad6 100644 --- a/dash/include/dash/halo/HaloMatrixWrapper.h +++ b/dash/include/dash/halo/HaloMatrixWrapper.h @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include @@ -13,6 +13,8 @@ namespace dash { +namespace halo { + /** * As known from classic stencil algorithms, *boundaries* are the outermost * elements within a block that are requested by neighoring units. @@ -21,7 +23,7 @@ namespace dash { * * The \c HaloMatrixWrapper acts as a wrapper of the local blocks of the NArray * and extends these by boundary and halo regions. The HaloMatrixWrapper also - * provides a function to create a \ref HaloStencilOperator. + * provides a function to create a \ref StencilOperator. * * Example for an outer block boundary iteration space (halo regions): * @@ -80,7 +82,7 @@ class HaloMatrixWrapper { const StencilSpecT&... stencil_spec) : _matrix(matrix), _cycle_spec(cycle_spec), _halo_spec(stencil_spec...), _view_local(matrix.local.extents()), - _view_global(ViewSpec_t(matrix.local.offsets(), matrix.local.extents())), + _view_global(matrix.local.offsets(), matrix.local.extents()), _haloblock(matrix.begin().globmem(), matrix.pattern(), _view_global, _halo_spec, cycle_spec), _halomemory(_haloblock) { @@ -92,7 +94,7 @@ class HaloMatrixWrapper { pattern_size_t num_elems_block = 1; auto rel_dim = region.spec().relevant_dim(); auto level = region.spec().level(); - auto* off = _halomemory.pos_at(region.index()); + auto* off = _halomemory.first_element_at(region.index()); auto it = region.begin(); if(MemoryArrange == ROW_MAJOR) { @@ -108,7 +110,6 @@ class HaloMatrixWrapper { (num_blocks > 1) ? std::abs(it_dist.lpos().index - it.lpos().index) : 1; auto ds_stride = dart_storage(stride); - HaloData halo_data; dart_datatype_t stride_type; dart_type_create_strided(ds_num_elems_block.dtype, ds_stride.nelem, ds_num_elems_block.nelem, &stride_type); @@ -117,13 +118,13 @@ class HaloMatrixWrapper { _region_data.insert(std::make_pair( region.index(), Data{ region, [off, it, region_size, ds_num_elems_block, - stride_type](HaloData& data) { + stride_type](dart_handle_t& handle) { dart_get_handle(off, it.dart_gptr(), region_size, stride_type, ds_num_elems_block.dtype, - &data.handle); + &handle); }, - std::move(halo_data) })); + DART_HANDLE_NULL })); } // TODO more optimizations @@ -133,7 +134,6 @@ class HaloMatrixWrapper { auto ds_num_elems_block = dart_storage(num_elems_block); num_blocks = region_size / num_elems_block; auto it_tmp = it; - HaloData halo_data; auto start_index = it.lpos().index; std::vector block_sizes(num_blocks); std::vector block_offsets(num_blocks); @@ -155,13 +155,13 @@ class HaloMatrixWrapper { _region_data.insert(std::make_pair( region.index(), Data{ region, [off, it, ds_num_elems_block, region_size, - index_type](HaloData& data) { + index_type](dart_handle_t& handle) { dart_get_handle(off, it.dart_gptr(), region_size, index_type, ds_num_elems_block.dtype, - &data.handle); + &handle); }, - std::move(halo_data) })); + DART_HANDLE_NULL })); } } else { if(level == 1) { //|| (level == 2 && @@ -177,7 +177,6 @@ class HaloMatrixWrapper { (num_blocks > 1) ? std::abs(it_dist.lpos().index - it.lpos().index) : 1; auto ds_stride = dart_storage(stride); - HaloData halo_data; dart_datatype_t stride_type; dart_type_create_strided(ds_num_elems_block.dtype, ds_stride.nelem, @@ -187,13 +186,13 @@ class HaloMatrixWrapper { _region_data.insert(std::make_pair( region.index(), Data{ region, [off, it, region_size, ds_num_elems_block, - stride_type](HaloData& data) { + stride_type](dart_handle_t& handle) { dart_get_handle(off, it.dart_gptr(), region_size, stride_type, ds_num_elems_block.dtype, - &data.handle); + &handle); }, - std::move(halo_data) })); + DART_HANDLE_NULL })); } // TODO more optimizations else { @@ -202,7 +201,6 @@ class HaloMatrixWrapper { auto ds_num_elems_block = dart_storage(num_elems_block); num_blocks = region_size / num_elems_block; auto it_tmp = it; - HaloData halo_data; std::vector block_sizes(num_blocks); std::vector block_offsets(num_blocks); std::fill(block_sizes.begin(), block_sizes.end(), @@ -226,13 +224,13 @@ class HaloMatrixWrapper { _region_data.insert(std::make_pair( region.index(), Data{ region, [off, it, index_type, region_size, - ds_num_elems_block](HaloData& data) { + ds_num_elems_block](dart_handle_t& handle) { dart_get_handle(off, it.dart_gptr(), region_size, index_type, ds_num_elems_block.dtype, - &data.handle); + &handle); }, - std::move(halo_data) })); + DART_HANDLE_NULL })); } num_elems_block = region.region().extent(0); @@ -267,8 +265,10 @@ class HaloMatrixWrapper { * Initiates a blocking halo region update for all halo elements. */ void update() { - for(auto& region : _region_data) - update_halo_intern(region.second, false); + for(auto& region : _region_data) { + update_halo_intern(region.second); + dart_wait_local(®ion.second.handle); + } } /** @@ -277,8 +277,10 @@ class HaloMatrixWrapper { */ void update_at(region_index_t index) { auto it_find = _region_data.find(index); - if(it_find != _region_data.end()) - update_halo_intern(it_find->second, false); + if(it_find != _region_data.end()) { + update_halo_intern(it_find->second); + dart_wait_local(&it_find->second.handle); + } } /** @@ -286,7 +288,7 @@ class HaloMatrixWrapper { */ void update_async() { for(auto& region : _region_data) - update_halo_intern(region.second, true); + update_halo_intern(region.second); } /** @@ -296,7 +298,7 @@ class HaloMatrixWrapper { void update_async_at(region_index_t index) { auto it_find = _region_data.find(index); if(it_find != _region_data.end()) - update_halo_intern(it_find->second, true); + update_halo_intern(it_find->second); } /** @@ -305,7 +307,7 @@ class HaloMatrixWrapper { */ void wait() { for(auto& region : _region_data) - dart_wait_local(®ion.second.halo_data.handle); + dart_wait_local(®ion.second.handle); } /** @@ -315,7 +317,7 @@ class HaloMatrixWrapper { void wait(region_index_t index) { auto it_find = _region_data.find(index); if(it_find != _region_data.end()) - dart_wait_local(it_find->second.halo_data.handle); + dart_wait_local(it_find->second.handle); } /** @@ -342,8 +344,8 @@ class HaloMatrixWrapper { /** * Returns the underlying NArray */ - const MatrixT& matrix() const { return _matrix; } + /** * Sets all global border halo elements. set_custom_halos calls FuntionT with * all global coordinates of type: @@ -371,7 +373,7 @@ class HaloMatrixWrapper { using signed_extent_t = typename std::make_signed::type; for(const auto& region : _haloblock.boundary_regions()) { if(region.is_custom_region()) { - auto* pos_ptr = _halomemory.pos_at(region.index()); + auto* pos_ptr = _halomemory.first_element_at(region.index()); const auto& spec = region.spec(); std::array coords_offset{}; const auto& reg_ext = region.region().extents(); @@ -407,14 +409,14 @@ class HaloMatrixWrapper { } auto index = _haloblock.index_at(_view_local, coords); const auto& spec = _halo_spec.spec(index); - auto halomem_pos = _halomemory.pos_at(index); + auto* halomem_pos = _halomemory.first_element_at(index); if(spec.level() == 0 || halomem_pos == nullptr) return nullptr; if(!_halomemory.to_halo_mem_coords_check(index, coords)) return nullptr; - return _halomemory.pos_at(index) + _halomemory.offset(index, coords); + return halomem_pos + _halomemory.offset(index, coords); } /** @@ -424,22 +426,22 @@ class HaloMatrixWrapper { Element_t* halo_element_at_local(ElementCoords_t coords) { auto index = _haloblock.index_at(_view_local, coords); const auto& spec = _halo_spec.spec(index); - auto halomem_pos = _halomemory.pos_at(index); + auto* halomem_pos = _halomemory.first_element_at(index); if(spec.level() == 0 || halomem_pos == nullptr) return nullptr; if(!_halomemory.to_halo_mem_coords_check(index, coords)) return nullptr; - return _halomemory.pos_at(index) + _halomemory.offset(index, coords); + return halomem_pos + _halomemory.offset(index, coords); } /** - * Crates \ref HaloStencilOperator for a given \ref StencilSpec. + * Crates \ref StencilOperator for a given \ref StencilSpec. * Asserts whether the StencilSpec fits in the provided halo regions. */ template - HaloStencilOperator stencil_operator( + StencilOperator stencil_operator( const StencilSpecT& stencil_spec) { for(const auto& stencil : stencil_spec.specs()) { DASH_ASSERT_MSG(stencil.max() @@ -447,29 +449,22 @@ class HaloMatrixWrapper { "Stencil point extent higher than halo region extent."); } - return HaloStencilOperator( - _haloblock, _halomemory, stencil_spec, _view_local); + return StencilOperator( + &_haloblock, &_halomemory, stencil_spec, &_view_local); } private: - struct HaloData { - dart_handle_t handle = DART_HANDLE_NULL; - }; - struct Data { const Region_t& region; - std::function get_halos; - HaloData halo_data; + std::function get_halos; + dart_handle_t handle = DART_HANDLE_NULL; }; - void update_halo_intern(Data& data, bool async) { + void update_halo_intern(Data& data) { if(data.region.is_custom_region()) return; - data.get_halos(data.halo_data); - - if(!async) - dart_wait_local(&data.halo_data.handle); + data.get_halos(data.handle); } private: @@ -484,6 +479,8 @@ class HaloMatrixWrapper { std::vector _dart_types; }; +} // namespace halo + } // namespace dash #endif // DASH__HALO_HALOMATRIXWRAPPER_H diff --git a/dash/include/dash/halo/HaloStencilOperator.h b/dash/include/dash/halo/StencilOperator.h similarity index 64% rename from dash/include/dash/halo/HaloStencilOperator.h rename to dash/include/dash/halo/StencilOperator.h index b43111ef3..274a03587 100644 --- a/dash/include/dash/halo/HaloStencilOperator.h +++ b/dash/include/dash/halo/StencilOperator.h @@ -1,14 +1,17 @@ #ifndef DASH__HALO_HALOSTENCILOPERATOR_H #define DASH__HALO_HALOSTENCILOPERATOR_H -#include +#include namespace dash { + +namespace halo { + /** - * The HAloStencilOperator provides stencil specific iterator and functions for + * The StencilOperator provides stencil specific iterator and functions for * a given \ref HaloBlock and HaloMemory. * - * Provided \ref HaloStencilIterator are for the inner block, the boundary + * Provided \ref StencilIterator are for the inner block, the boundary * elements and both. The inner block iterator ensures that no stencil point * accesses halo elements or not existing elements. The stencil points of the * boundary iterator point at least to one halo element. @@ -37,7 +40,7 @@ namespace dash { * */ template -class HaloStencilOperator { +class StencilOperator { private: static constexpr auto NumStencilPoints = StencilSpecT::num_stencil_points(); static constexpr auto NumDimensions = PatternT::ndim(); @@ -48,13 +51,13 @@ class HaloStencilOperator { using pattern_index_t = typename PatternT::index_type; public: - using iterator = HaloStencilIterator; using const_iterator = const iterator; - using iterator_inner = HaloStencilIterator; using const_iterator_inner = const iterator_inner; - using iterator_bnd = HaloStencilIterator; using const_iterator_bnd = const iterator_bnd; @@ -64,29 +67,32 @@ class HaloStencilOperator { using ViewSpec_t = ViewSpec; using ElementCoords_t = std::array; + using StencilSpecViews_t = StencilSpecificViews; + public: /** * Constructor that takes a \ref HaloBlock, a \ref HaloMemory, * a \ref StencilSpec and a local \ref ViewSpec */ - HaloStencilOperator(const HaloBlock_t& haloblock, HaloMemory_t& halomemory, + StencilOperator(const HaloBlock_t* haloblock, HaloMemory_t* halomemory, const StencilSpecT& stencil_spec, - const ViewSpec_t& view_local) + const ViewSpec_t* view_local) : _halo_block(haloblock), _halo_memory(halomemory), _stencil_spec(stencil_spec), _view_local(view_local), _stencil_offsets(set_stencil_offsets()), - _local_memory((ElementT*) _halo_block.globmem().lbegin()), - _begin(_halo_block, _halo_memory, _stencil_spec, _stencil_offsets, 0), - _end(_halo_block, _halo_memory, _stencil_spec, _stencil_offsets, - _halo_block.view_inner_with_boundaries().size()), - _ibegin(_halo_block, _halo_memory, _stencil_spec, _stencil_offsets, 0), - _iend(_halo_block, _halo_memory, _stencil_spec, _stencil_offsets, - _halo_block.view_inner().size()), - _bbegin(_halo_block, _halo_memory, _stencil_spec, _stencil_offsets, 0), - _bend(_halo_block, _halo_memory, _stencil_spec, _stencil_offsets, - _halo_block.boundary_size()) {} - - HaloStencilOperator() = delete; + _local_memory((ElementT*) _halo_block->globmem().lbegin()), + _spec_views(*_halo_block, _stencil_spec, _view_local), + _begin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, 0), + _end(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, + _spec_views.inner_with_boundaries().size()), + _ibegin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, 0), + _iend(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, + _spec_views.inner().size()), + _bbegin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, 0), + _bend(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, + _spec_views.boundary_size()) {} + + StencilOperator() = delete; /// returns the begin iterator for all relevant elements (inner + boundary) iterator begin() noexcept { return _begin; } @@ -129,7 +135,7 @@ class HaloStencilOperator { /** * Returns the \ref HaloBlock */ - const HaloBlock_t& halo_block() { return _halo_block; } + const HaloBlock_t& halo_block() { return *_halo_block; } /** * Returns the stencil specification \ref StencilSpec @@ -139,7 +145,21 @@ class HaloStencilOperator { /** * Returns the halo memory management object \ref HaloMemory */ - HaloMemory_t& halo_memory() { return _halo_memory; } + HaloMemory_t& halo_memory() { return *_halo_memory; } + + const StencilSpecViews_t& spec_views() const { + return _spec_views; + } + + const ViewSpec_t& view_inner() const { + return _spec_views.inner(); + } + + const ViewSpec_t& view_inner_with_boundaries() const { + return _spec_views.inner_with_boundaries(); + } + + /** * Modifies all stencil point elements and the center within the inner view. @@ -157,19 +177,8 @@ class HaloStencilOperator { const ElementCoords_t& coords, ElementT value, ElementT coefficient_center, std::function op = [](const ElementT& lhs, const ElementT& rhs) { return rhs; }) { - auto* center = _local_memory; - pattern_index_t offset = 0; + auto* center = _local_memory + get_offset(coords); - if(MemoryArrange == ROW_MAJOR) { - offset = coords[0]; - for(auto d = 1; d < NumDimensions; ++d) - offset = offset * _view_local.extent(d) + coords[d]; - } else { - offset = coords[NumDimensions - 1]; - for(auto d = NumDimensions - 2; d >= 0; --d) - offset = offset * _view_local.extent(d) + coords[d]; - } - center += offset; *center = op(*center, coefficient_center * value); for(auto i = 0; i < NumStencilPoints; ++i) { auto& stencil_point_value = center[_stencil_offsets[i]]; @@ -196,37 +205,75 @@ class HaloStencilOperator { const ElementCoords_t& coords, ElementT value, ElementT coefficient_center, std::function op = [](const ElementT& lhs, const ElementT& rhs) { return rhs; }) { - auto* center = _local_memory; - pattern_index_t offset = 0; + auto* center = _local_memory + get_offset(coords); - if(MemoryArrange == ROW_MAJOR) { - offset = coords[0]; - for(auto d = 1; d < NumDimensions; ++d) - offset = offset * _view_local.extent(d) + coords[d]; - } else { - offset = coords[NumDimensions - 1]; - for(auto d = NumDimensions - 2; d >= 0; --d) - offset = offset * _view_local.extent(d) + coords[d]; - } - center += offset; *center = op(*center, coefficient_center * value); for(auto i = 0; i < NumStencilPoints; ++i) { bool halo = false; for(auto d = 0; d < NumDimensions; ++d) { auto coord_value = coords[d] + _stencil_spec[i][d]; - if(coord_value < 0 || coord_value >= _view_local.extent(d)) { + if(coord_value < 0 || coord_value >= _view_local->extent(d)) { halo = true; break; } } + if(halo) continue; + auto& stencil_point_value = center[_stencil_offsets[i]]; stencil_point_value = op(stencil_point_value, _stencil_spec[i].coefficient() * value); } } + ElementT get_value_at_inner_local( + const ElementCoords_t& coords, ElementT coefficient_center, + std::function op = + std::plus()) { + auto* center = _local_memory + get_offset(coords); + ElementT value = op(0, *center * coefficient_center); + + for(auto i = 0; i < NumStencilPoints; ++i) { + auto& stencil_point_value = center[_stencil_offsets[i]]; + value = op(value, _stencil_spec[i].coefficient() * stencil_point_value); + } + + return value; + } + + ElementT get_value_at_boundary_local( + const ElementCoords_t& coords, ElementT coefficient_center, + std::function op = + std::plus()) { + auto* center = _local_memory + get_offset(coords); + ElementT value = op(0, *center * coefficient_center); + + for(auto i = 0; i < NumStencilPoints; ++i) { + bool halo = false; + auto coords_stencil = coords; + for(auto d = 0; d < NumDimensions; ++d) { + coords_stencil[d] += _stencil_spec[i][d]; + if(coords_stencil[d] < 0 || coords_stencil[d] >= _view_local->extent(d)) + halo = true; + } + + if(halo) { + auto index = _halo_block->index_at(*_view_local, coords_stencil); + auto* halomem_pos = _halo_memory->first_element_at(index); + + _halo_memory->to_halo_mem_coords(index, coords_stencil); + + value = op(value, *( halomem_pos + _halo_memory->offset(index, coords_stencil))); + } else { + auto& stencil_point_value = center[_stencil_offsets[i]]; + value = op(value, _stencil_spec[i].coefficient() * stencil_point_value); + } + } + + return value; + } + private: StencilOffsets_t set_stencil_offsets() { StencilOffsets_t stencil_offs; @@ -235,11 +282,13 @@ class HaloStencilOperator { if(MemoryArrange == ROW_MAJOR) { offset = _stencil_spec[i][0]; for(auto d = 1; d < NumDimensions; ++d) - offset = _stencil_spec[i][d] + offset * _view_local.extent(d); + offset = _stencil_spec[i][d] + offset * _view_local->extent(d); } else { offset = _stencil_spec[i][NumDimensions - 1]; - for(auto d = NumDimensions - 2; d >= 0; --d) - offset = _stencil_spec[i][d] + offset * _view_local.extent(d); + for(auto d = NumDimensions - 1; d > 0;) { + --d; + offset = _stencil_spec[i][d] + offset * _view_local->extent(d); + } } stencil_offs[i] = offset; } @@ -247,13 +296,32 @@ class HaloStencilOperator { return stencil_offs; } + pattern_index_t get_offset(const ElementCoords_t& coords) { + pattern_index_t offset = 0; + + if(MemoryArrange == ROW_MAJOR) { + offset = coords[0]; + for(auto d = 1; d < NumDimensions; ++d) + offset = offset * _view_local->extent(d) + coords[d]; + } else { + offset = coords[NumDimensions - 1]; + for(auto d = NumDimensions - 1; d > 0;) { + --d; + offset = offset * _view_local->extent(d) + coords[d]; + } + } + + return offset; + } + private: - const HaloBlock_t& _halo_block; - HaloMemory_t& _halo_memory; - const StencilSpecT& _stencil_spec; - const ViewSpec_t& _view_local; + const HaloBlock_t* _halo_block; + HaloMemory_t* _halo_memory; + const StencilSpecT _stencil_spec; + const ViewSpec_t* _view_local; StencilOffsets_t _stencil_offsets; ElementT* _local_memory; + StencilSpecViews_t _spec_views; iterator _begin; iterator _end; @@ -263,5 +331,7 @@ class HaloStencilOperator { iterator_bnd _bend; }; +} // namespace halo + } // namespace dash #endif // DASH__HALO_HALOSTENCILOPERATOR_H diff --git a/dash/include/dash/halo/iterator/StencilIterator.h b/dash/include/dash/halo/iterator/StencilIterator.h new file mode 100644 index 000000000..ca4ac2f62 --- /dev/null +++ b/dash/include/dash/halo/iterator/StencilIterator.h @@ -0,0 +1,640 @@ +#ifndef DASH__HALO__ITERATOR__STENCILITERATOR_H +#define DASH__HALO__ITERATOR__STENCILITERATOR_H + +#include + +#include + +#include + +namespace dash { + +namespace halo { + +enum class StencilViewScope : std::uint8_t { INNER, BOUNDARY, ALL }; + +template +class StencilSpecificViews { +private: + static constexpr auto NumDimensions = HaloBlockT::ndim(); + + using Pattern_t = typename HaloBlockT::Pattern_t; +public: + using ViewSpec_t = typename HaloBlockT::ViewSpec_t; + using pattern_size_t = typename Pattern_t::size_type; + +public: + StencilSpecificViews(const HaloBlockT& haloblock, + const StencilSpecT& stencil_spec, + const ViewSpec_t* view_local) + : _view_local(view_local) { + auto& off_glob = haloblock.view().offsets(); + auto minmax_dist = stencil_spec.minmax_distances(); + for(auto& dist : minmax_dist) + dist.first = std::abs(dist.first); + + auto inner_off = haloblock.view_inner().offsets(); + auto inner_ext = haloblock.view_inner().extents(); + auto inner_bound_off = haloblock.view_inner_with_boundaries().offsets(); + auto inner_bound_ext = haloblock.view_inner_with_boundaries().extents(); + for(auto d = 0; d < NumDimensions; ++d) { + inner_off[d] -= off_glob[d]; + inner_bound_off[d] -= off_glob[d]; + resize_offset(inner_off[d], inner_ext[d], minmax_dist[d].first); + resize_extent(inner_off[d], inner_ext[d],_view_local->extent(d), minmax_dist[d].second); + resize_offset(inner_bound_off[d], inner_bound_ext[d], minmax_dist[d].first); + resize_extent(inner_bound_off[d], inner_bound_ext[d],_view_local->extent(d), minmax_dist[d].second); + } + _view_inner = ViewSpec_t(inner_off, inner_ext); + _view_inner_with_boundaries = ViewSpec_t(inner_bound_off, inner_bound_ext); + + const auto& bnd_elems = haloblock.boundary_elements(); + const auto& halo_ext_max = haloblock.halo_extension_max(); + _boundary_elements.reserve(bnd_elems.size()); + for(auto& view : bnd_elems) { + + auto view_off = view.offsets(); + auto view_ext = view.extents(); + for(auto d = 0; d < NumDimensions; ++d) { + view_off[d] -= off_glob[d]; + if(view_off[d] < halo_ext_max[d].first && view_ext[d] == halo_ext_max[d].first) { + view_ext[d] = minmax_dist[d].first; + continue; + } + else if(view_ext[d] == halo_ext_max[d].second) { + view_ext[d] = minmax_dist[d].second; + view_off[d] += halo_ext_max[d].second - minmax_dist[d].second; + } + else { + resize_offset(view_off[d], view_ext[d], minmax_dist[d].first); + resize_extent(view_off[d], view_ext[d], _view_local->extent(d), minmax_dist[d].second); + } + } + ViewSpec_t tmp(view_off, view_ext); + + _size_bnd_elems += tmp.size(); + _boundary_elements.push_back(std::move(tmp)); + } + } + + const ViewSpec_t& local() const { return *_view_local; } + + const ViewSpec_t& inner() const { return _view_inner; } + + const ViewSpec_t& inner_with_boundaries() const { + return _view_inner_with_boundaries; + } + + const std::vector& boundary_elements() const { + return _boundary_elements; + } + + pattern_size_t boundary_size() const { return _size_bnd_elems;} + +private: + template + void resize_offset(OffT& offset, ExtT& extent, MaxT max) { + if(offset > max) { + extent += offset - max; + offset = max; + } + } + + template + void resize_extent(OffT& offset, ExtT& extent, ExtT extent_local, MinT max) { + auto diff_ext = extent_local - offset - extent; + if(diff_ext > max) + extent += diff_ext - max; + } + +private: + const ViewSpec_t* _view_local; + ViewSpec_t _view_inner; + ViewSpec_t _view_inner_with_boundaries; + std::vector _boundary_elements; + pattern_size_t _size_bnd_elems = 0; +}; + + +/* + * Iterator with stencil points and halo access \see HaloStencilOperator. + */ +template +class StencilIterator { +private: + static constexpr auto NumDimensions = PatternT::ndim(); + static constexpr auto NumStencilPoints = StencilSpecT::num_stencil_points(); + static constexpr auto MemoryArrange = PatternT::memory_order(); + static constexpr auto FastestDimension = + MemoryArrange == ROW_MAJOR ? NumDimensions - 1 : 0; + + using Self_t = StencilIterator; + using ViewSpec_t = typename PatternT::viewspec_type; + using pattern_size_t = typename PatternT::size_type; + using signed_pattern_size_t = typename std::make_signed::type; + using RegionCoords_t = RegionCoords; + +public: + // Iterator traits + using iterator_category = std::random_access_iterator_tag; + using value_type = ElementT; + using difference_type = typename PatternT::index_type; + using pointer = ElementT*; + using reference = ElementT&; + + using HaloBlock_t = HaloBlock; + using HaloMemory_t = HaloMemory; + using pattern_index_t = typename PatternT::index_type; + using region_index_t = typename RegionCoords_t::region_index_t; + using LocalLayout_t = + CartesianIndexSpace; + using StencilP_t = StencilPoint; + using ElementCoords_t = std::array; + using StencilOffsets_t = std::array; + using StencilSpecViews_t = StencilSpecificViews; + +public: + /** + * Constructor + * + * \param local_memory Pointer to the begining of the local NArray memory + * \param halomemory \ref HaloMemory instance for loacl halo memory + * \param stencil_spec \ref StencilSpec to use + * \param stencil_offsets stencil offsets for every stencil point + * \param spec_views stencil specific views \ref StencilSpecViews + * \param idx position of the iterator + */ + StencilIterator(ElementT* local_memory, HaloMemory_t* halomemory, + const StencilSpecT* stencil_spec, + const StencilOffsets_t* stencil_offsets, + const StencilSpecViews_t* spec_views, + pattern_index_t idx) + : _halomemory(halomemory), _stencil_spec(stencil_spec), + _stencil_offsets(stencil_offsets), _spec_views(spec_views), + _local_memory(local_memory), _idx(idx), + _local_layout(spec_views->local().extents()) { + if(Scope == StencilViewScope::INNER) + _view_local = ViewSpec_t(spec_views->inner()); + else if(Scope == StencilViewScope::ALL) + _view_local = ViewSpec_t(spec_views->inner_with_boundaries()); + else + _view_local = ViewSpec_t(spec_views->local()); + + pattern_index_t _size = 0; + if(Scope == StencilViewScope::BOUNDARY) + _size = spec_views->boundary_size(); + else + _size = _view_local.size(); + + if(_idx < _size) + set_coords(); + + const auto ext_max = stencil_spec->minmax_distances(FastestDimension); + if(Scope == StencilViewScope::INNER) { + _ext_dim_reduced = std::make_pair( + _view_local.offset(FastestDimension), + _local_layout.extent(FastestDimension) - ext_max.second - 1); + } else { + _ext_dim_reduced = + std::make_pair(std::abs(ext_max.first), _view_local.extent(FastestDimension) + - ext_max.second - 1); + } + } + + /** + * Copy constructor. + */ + StencilIterator(const Self_t& other) = default; + + /** + * Assignment operator. + * + * \see DashGlobalIteratorConcept + */ + Self_t& operator=(const Self_t& other) = default; + + /** + * The number of dimensions of the iterator's underlying pattern. + * + * \see DashGlobalIteratorConcept + */ + static constexpr dim_t ndim() { return NumDimensions; } + + /** + * Dereference operator. + * + * \return A global reference to the element at the iterator's position. + */ + reference operator*() const { return *_current_lmemory_addr; } + + /** + * Subscript operator, returns global reference to element at given + * global index. + * + * \see DashGlobalIteratorConcept + */ + reference operator[](pattern_index_t n) const { + auto coords = set_coords(_idx + n); + return _local_memory[_local_layout.at(coords)]; + } + + pattern_index_t rpos() const { return _idx; } + + pattern_index_t lpos() const { return _offset; } + + const ElementCoords_t& coords() const { return _coords; }; + + bool is_halo_value(const region_index_t index_stencil) { + if(Scope == StencilViewScope::INNER) + return false; + + auto halo_coords = _coords; + const auto& stencil = (*_stencil_spec)[index_stencil]; + for(auto d = 0; d < NumDimensions; ++d) { + halo_coords[d] += stencil[d]; + if(halo_coords[d] < 0 || halo_coords[d] >= _spec_views->local().extent(d)) + return true; + } + + return false; + } + + /** + * Returns the value for a given stencil point index (index postion in + * \ref StencilSpec) + */ + ElementT value_at(const region_index_t index_stencil) { + return *(_stencil_mem_ptr[index_stencil]); + } + + /* returns the value of a given stencil point (not as efficient as + * stencil point index ) + */ + ElementT value_at(const StencilP_t& stencil) { + auto index_stencil = _stencil_spec->index(stencil); + + DASH_ASSERT_MSG(index_stencil.second, + "No valid region index for given stencil point found"); + + return value_at(index_stencil.first); + } + + /** + * Prefix increment operator. + */ + Self_t& operator++() { + ++_idx; + next_element(); + + return *this; + } + + /** + * Postfix increment operator. + */ + Self_t operator++(int) { + Self_t result = *this; + ++_idx; + next_element(); + + return result; + } + + /** + * Prefix decrement operator. + */ + Self_t& operator--() { + --_idx; + set_coords(); + + return *this; + } + + /** + * Postfix decrement operator. + */ + Self_t operator--(int) { + Self_t result = *this; + --_idx; + set_coords(); + + return result; + } + + Self_t& operator+=(pattern_index_t n) { + _idx += n; + set_coords(); + + return *this; + } + + Self_t& operator-=(pattern_index_t n) { + _idx -= n; + set_coords(); + + return *this; + } + + Self_t operator+(pattern_index_t n) const { + Self_t res{ *this }; + res += n; + + return res; + } + + Self_t operator-(pattern_index_t n) const { + Self_t res{ *this }; + res -= n; + + return res; + } + + difference_type operator-(Self_t& other) const { + return _idx - other._idx; + } + + bool operator<(const Self_t& other) const { + return compare(other, std::less()); + } + + bool operator<=(const Self_t& other) const { + return compare(other, std::less_equal()); + } + + bool operator>(const Self_t& other) const { + return compare(other, std::greater()); + } + + bool operator>=(const Self_t& other) const { + return compare(other, std::greater_equal()); + } + + bool operator==(const Self_t& other) const { + return compare(other, std::equal_to()); + } + + bool operator!=(const Self_t& other) const { + return compare(other, std::not_equal_to()); + } + +private: + /** + * Compare position of this global iterator to the position of another + * global iterator with respect to viewspec projection. + */ + template + bool compare(const Self_t& other, const GlobIndexCmpFunc& gidx_cmp) const { +#if __REMARK__ + // Usually this is a best practice check, but it's an infrequent case + // so we rather avoid this comparison: + if(this == &other) { + return true; + } +#endif + if(&_view_local == &(other._view_local) + || _view_local == other._view_local) { + return gidx_cmp(_idx, other._idx); + } + // TODO not the best solution + return false; + } + + /*void set_view_local(const ViewSpec_t& view_tmp) { + if(Scope == StencilViewScope::BOUNDARY) { + const auto& bnd_elems = _haloblock.boundary_elements(); + _bnd_elements.reserve(bnd_elems.size()); + const auto& view_offs = view_tmp.offsets(); + for(const auto& region : bnd_elems) { + auto off = region.offsets(); + for(int d = 0; d < NumDimensions; ++d) + off[d] -= view_offs[d]; + + _bnd_elements.push_back(ViewSpec_t(off, region.extents())); + } + + _view_local = ViewSpec_t(view_tmp.extents()); + } else { + const auto& view_offsets = _haloblock.view().offsets(); + auto off = view_tmp.offsets(); + for(int d = 0; d < NumDimensions; ++d) + off[d] -= view_offsets[d]; + + _view_local = ViewSpec_t(off, view_tmp.extents()); + } + }*/ + + void next_element() { + const auto& coord_fastest_dim = _coords[FastestDimension]; + + if(coord_fastest_dim >= _ext_dim_reduced.first + && coord_fastest_dim < _ext_dim_reduced.second) { + for(auto it = _stencil_mem_ptr.begin(); it != _stencil_mem_ptr.end(); + ++it) + *it += 1; + + ++_coords[FastestDimension]; + ++_current_lmemory_addr; + ++_offset; + + return; + } + + if(Scope == StencilViewScope::INNER) { + if(MemoryArrange == ROW_MAJOR) { + for(dim_t d = NumDimensions; d > 0;) { + --d; + if(_coords[d] < _view_local.extent(d) + _view_local.offset(d) - 1) { + ++_coords[d]; + break; + } else + _coords[d] = _view_local.offset(d); + } + } else { + for(dim_t d = 0; d < NumDimensions; ++d) { + if(_coords[d] < _view_local.extent(d) + _view_local.offset(d) - 1) { + ++_coords[d]; + break; + } else + _coords[d] = _view_local.offset(d); + } + } + if(MemoryArrange == ROW_MAJOR) { + _offset = _coords[0]; + for(dim_t d = 1; d < NumDimensions; ++d) + _offset = _offset * _local_layout.extent(d) + _coords[d]; + } else { + _offset = _coords[NumDimensions - 1]; + for(dim_t d = NumDimensions - 1; d > 0;) { + --d; + _offset = _offset * _local_layout.extent(d) + _coords[d]; + } + } + _current_lmemory_addr = _local_memory + _offset; + for(auto i = 0; i < NumStencilPoints; ++i) + _stencil_mem_ptr[i] = _current_lmemory_addr + (*_stencil_offsets)[i]; + } else + set_coords(); + } + + void set_coords() { + if(Scope == StencilViewScope::BOUNDARY) { + if(_region_bound == 0) { + _coords = set_coords(_idx); + } else { + const auto& bnd_elements = _spec_views->boundary_elements(); + if(_idx < _region_bound) { + const auto& region = bnd_elements[_region_number]; + + if(MemoryArrange == ROW_MAJOR) { + for(dim_t d = NumDimensions; d > 0;) { + --d; + if(_coords[d] < region.extent(d) + region.offset(d) - 1) { + ++_coords[d]; + break; + } else + _coords[d] = region.offset(d); + } + } else { + for(dim_t d = 0; d < NumDimensions; ++d) { + if(_coords[d] < region.extent(d) + region.offset(d) - 1) { + ++_coords[d]; + break; + } else + _coords[d] = region.offset(d); + } + } + } else { + ++_region_number; + if(_region_number < bnd_elements.size()) { + _region_bound += bnd_elements[_region_number].size(); + _coords = _local_layout.coords(0, bnd_elements[_region_number]); + } + } + } + } else { + _coords = set_coords(_idx); + } + + if(MemoryArrange == ROW_MAJOR) { + _offset = _coords[0]; + for(dim_t d = 1; d < NumDimensions; ++d) + _offset = _offset * _local_layout.extent(d) + _coords[d]; + } else { + _offset = _coords[NumDimensions - 1]; + for(dim_t d = NumDimensions - 1; d > 0;) { + --d; + _offset = _offset * _local_layout.extent(d) + _coords[d]; + } + } + _current_lmemory_addr = _local_memory + _offset; + if(Scope == StencilViewScope::INNER) { + for(auto i = 0; i < NumStencilPoints; ++i) + _stencil_mem_ptr[i] = _current_lmemory_addr + (*_stencil_offsets)[i]; + } else { + using signed_extent_t = typename std::make_signed::type; + std::array halo_coords{}; + std::array is_halo{}; + std::array indexes{}; + for(auto d = 0; d < NumDimensions; ++d) { + auto extent = _spec_views->local().extent(d); + + for(auto i = 0; i < NumStencilPoints; ++i) { + auto& halo_coord = halo_coords[i][d]; + halo_coord = _coords[d] + (*_stencil_spec)[i][d]; + if(halo_coord < 0) { + indexes[i] *= RegionCoords_t::REGION_INDEX_BASE; + is_halo[i] = true; + continue; + } + + if(halo_coord < static_cast(extent)) { + indexes[i] = 1 + indexes[i] * RegionCoords_t::REGION_INDEX_BASE; + continue; + } + + indexes[i] = 2 + indexes[i] * RegionCoords_t::REGION_INDEX_BASE; + is_halo[i] = true; + } + } + for(auto i = 0; i < NumStencilPoints; ++i) { + if(is_halo[i]) + _stencil_mem_ptr[i] = value_halo_at(indexes[i], halo_coords[i]); + else + _stencil_mem_ptr[i] = _current_lmemory_addr + (*_stencil_offsets)[i]; + } + } + } + + std::array set_coords(pattern_index_t idx) { + if(Scope == StencilViewScope::BOUNDARY) { + auto local_idx = idx; + for(const auto& region : _spec_views->boundary_elements()) { + _region_bound += region.size(); + if(local_idx < region.size()) { + return _local_layout.coords(local_idx, region); + } + ++_region_number; + local_idx -= region.size(); + } + DASH_ASSERT("idx >= size not implemented yet"); + return std::array{}; + } else { + if(_view_local.size() == 0) + return std::array{}; + else + return _local_layout.coords(idx, _view_local); + } + } + + ElementT* value_halo_at(region_index_t region_index, + ElementCoords_t& halo_coords) { + _halomemory->to_halo_mem_coords(region_index, halo_coords); + + return _halomemory->first_element_at(region_index) + + _halomemory->offset(region_index, halo_coords); + } + + void set_stencil_offsets(const StencilSpecT& stencil_spec) { + for(auto i = 0; i < NumStencilPoints; ++i) { + signed_pattern_size_t offset = 0; + if(MemoryArrange == ROW_MAJOR) { + offset = stencil_spec[i][0]; + for(dim_t d = 1; d < NumDimensions; ++d) + offset = stencil_spec[i][d] + offset * _local_layout.extent(d); + } else { + offset = stencil_spec[i][NumDimensions - 1]; + for(dim_t d = NumDimensions - 1; d > 0;) { + --d; + offset = stencil_spec[i][d] + offset * _local_layout.extent(d); + } + } + (*_stencil_offsets)[i] = offset; + } + } + +private: + HaloMemory_t* _halomemory; + const StencilSpecT* _stencil_spec; + const StencilOffsets_t* _stencil_offsets; + const StencilSpecViews_t* _spec_views; + ElementT* _local_memory; + ViewSpec_t _view_local; + std::array _stencil_mem_ptr; + const LocalLayout_t _local_layout; + pattern_index_t _idx{ 0 }; + // extension of the fastest index dimension minus the halo extension + std::pair _ext_dim_reduced; + signed_pattern_size_t _offset; + pattern_index_t _region_bound{ 0 }; + size_t _region_number{ 0 }; + ElementCoords_t _coords; + ElementT* _current_lmemory_addr; +}; // class StencilIterator + +} // namespace halo + +} // namespace dash + +#endif // DASH__HALO__ITERATOR__STENCILITERATOR_H + diff --git a/dash/test/halo/HaloTest.cc b/dash/test/halo/HaloTest.cc index f3ad3a3fe..573046f29 100644 --- a/dash/test/halo/HaloTest.cc +++ b/dash/test/halo/HaloTest.cc @@ -9,6 +9,8 @@ using namespace dash; +using namespace dash::halo; + TEST_F(HaloTest, GlobalBoundarySpec) { using GlobBoundSpec_t = GlobalBoundarySpec<3>; @@ -255,8 +257,6 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) dash::fill(sum_halo.begin(), sum_halo.end(),0); auto* sum_local = sum_halo.lbegin(); - halo_wrapper.update_async(); - auto stencil_op = halo_wrapper.stencil_operator(stencil_spec); auto it_iend = stencil_op.iend(); @@ -267,8 +267,7 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) *sum_local += *it; } - halo_wrapper.wait(); - + halo_wrapper.update(); auto it_bend = stencil_op.bend(); for(auto it = stencil_op.bbegin(); it != it_bend; ++it) { for(auto i = 0; i < stencil_spec.num_stencil_points(); ++i) @@ -1177,19 +1176,21 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMultiStencil) } } } - for(auto i = 3; i < ext_per_dim - 3; ++i) { - for(auto j = 3; j < ext_per_dim_check - 3; ++j) { - for(auto k = 3; k < ext_per_dim_check - 3; ++k) { + for(auto j = 3; j < ext_per_dim_check - 3; ++j) { + for(auto k = 3; k < ext_per_dim_check - 3; ++k) { + for(auto i = 2; i < ext_per_dim - 2; ++i) sum_check_spec_1 += matrix_check[i][j][k] + matrix_check[i-2][j][k] + matrix_check[i+2][j][k] + matrix_check[i][j-2][k] + matrix_check[i][j+2][k] + matrix_check[i][j][k-2] + matrix_check[i][j][k+2]; + for(auto i = 1; i < ext_per_dim - 1; ++i) sum_check_spec_2 += matrix_check[i-1][j-1][k-1] + matrix_check[i-1][j-1][k+1] + matrix_check[i-1][j+1][k-1] + matrix_check[i-1][j+1][k+1] + matrix_check[i][j][k] + matrix_check[i+1][j-1][k-1] + matrix_check[i+1][j-1][k+1] + matrix_check[i+1][j+1][k-1] + matrix_check[i+1][j+1][k+1]; + for(auto i = 3; i < ext_per_dim - 3; ++i) sum_check_spec_3 += matrix_check[i-3][j-3][k-3] + matrix_check[i-2][j-2][k-2] + matrix_check[i-1][j-1][k-1] + matrix_check[i-3][j-3][k+3] + matrix_check[i-2][j-2][k+2] + matrix_check[i-1][j-1][k+1] + @@ -1200,7 +1201,6 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMultiStencil) matrix_check[i+3][j-3][k+3] + matrix_check[i+2][j-2][k+2] + matrix_check[i+1][j-1][k+1] + matrix_check[i+3][j+3][k-3] + matrix_check[i+2][j+2][k-2] + matrix_check[i+1][j+1][k-1] + matrix_check[i+3][j+3][k+3] + matrix_check[i+2][j+2][k+2] + matrix_check[i+1][j+1][k+1]; - } } } From 4507c55c55de3d46506b38f28cf0401252874393 Mon Sep 17 00:00:00 2001 From: Denis Huenich Date: Tue, 17 Apr 2018 16:08:58 +0200 Subject: [PATCH 2/6] overloaded streaming operator fo most classes in Halo.h, changed views in haloblock to local offsets, stencil operator now offers method for region view specific boundary iterators --- dash/include/dash/halo/Halo.h | 279 +++++++++++++++--- dash/include/dash/halo/HaloMatrixWrapper.h | 18 +- dash/include/dash/halo/StencilOperator.h | 58 +++- .../dash/halo/iterator/StencilIterator.h | 209 ++++++++----- dash/test/halo/HaloTest.cc | 43 ++- 5 files changed, 459 insertions(+), 148 deletions(-) diff --git a/dash/include/dash/halo/Halo.h b/dash/include/dash/halo/Halo.h index 94032143a..e1143e1a2 100644 --- a/dash/include/dash/halo/Halo.h +++ b/dash/include/dash/halo/Halo.h @@ -85,6 +85,22 @@ class StencilPoint : public Dimensional { CoeffT _coefficient = 1.0; }; // StencilPoint +template +std::ostream& operator<<( std::ostream & os, + const StencilPoint& stencil_point) { + os << "dash::halo::StencilPoint<" << NumDimensions << ">" + << "(coefficient = " << stencil_point.coefficient << " - points: "; + for (auto d = 0; d < NumDimensions; ++d) { + if (d > 0) { + os << ","; + } + os << stencil_point[d]; + } + os << ")"; + + return os; +} + /** * A collection of stencil points (\ref Stencil) * e.g. StencilSpec, 2,2>({StencilPoint<2>(-1,0), @@ -201,6 +217,22 @@ class StencilSpec { StencilArray_t _specs{}; }; // StencilSpec +template +std::ostream& operator<<(std::ostream & os, + const StencilSpec & specs) { + os << "dash::halo::StencilSpec<" << NumStencilPoints << ">" + << "("; + for (auto i = 0; i < NumStencilPoints; ++i) { + if (i > 0) { + os << ","; + } + os << specs[i]; + } + os << ")"; + + return os; +} + /** * Global boundary Halo properties */ @@ -213,6 +245,17 @@ enum class BoundaryProp : uint8_t { CUSTOM }; +static std::ostream& operator<<( std::ostream & os, const BoundaryProp& prop) { + if(prop == BoundaryProp::NONE) + os << "NONE"; + else if(prop == BoundaryProp::CYCLIC) + os << "CYCLIC"; + else + os << "CUSTOM"; + + return os; +} + /** * Global boundary property specification for every dimension */ @@ -241,6 +284,39 @@ class GlobalBoundarySpec : public Dimensional { : Base_t::Dimensional(value, values...) {} }; // GlobalBoundarySpec +template +std::ostream& operator<<( std::ostream & os, + const GlobalBoundarySpec& spec) { + os << "dash::halo::GlobalBoundarySpec<" << NumDimensions << ">" + << "("; + for (auto d = 0; d < NumDimensions; ++d) { + if (d > 0) { + os << ","; + } + os << spec[d]; + } + os << ")"; + + return os; +} + +/** + * Position of a \ref Region in one dimension relating to the center + */ +enum class RegionPos : bool { + PRE, + POST +}; + +static std::ostream& operator<<( std::ostream & os, const RegionPos& pos) { + if(pos == RegionPos::PRE) + os << "PRE"; + else + os << "POST"; + + return os; +} + /** * N-Dimensional region coordinates and associated indices for all possible * Halo/Boundary regions of a \ref HaloBlock. The center (all values = 1) is the @@ -319,6 +395,19 @@ class RegionCoords : public Dimensional { */ constexpr region_index_t index() const { return _index; } + static region_index_t index(dim_t dim, RegionPos pos) { + region_coord_t coord = (pos == RegionPos::PRE) ? 0 : 2; + + region_index_t index = 0; + for(dim_t d = 0; d < NumDimensions; ++d) + if(dim == d) + index = coord + index * REGION_INDEX_BASE; + else + index= 1 + index * REGION_INDEX_BASE; + + return index; + } + /** * Returns the region index for a given \ref RegionCoords * @@ -487,11 +576,11 @@ class RegionSpec : public Dimensional { template std::ostream& operator<<(std::ostream& os, - const RegionSpec& hrs) { - os << "dash::RegionSpec<" << NumDimensions << ">(" << (uint32_t) hrs[0]; + const RegionSpec& rs) { + os << "dash::RegionSpec<" << NumDimensions << ">(" << (uint32_t) rs[0]; for(auto i = 1; i < NumDimensions; ++i) - os << "," << (uint32_t) hrs[i]; - os << "), Extent:" << hrs.extent(); + os << "," << (uint32_t) rs[i]; + os << "), Extent:" << rs.extent(); return os; } @@ -585,11 +674,11 @@ class HaloSpec { template void set_region_spec(const StencilPointT& stencil) { auto index = RegionSpec_t::index(stencil); - auto max = stencil.max(); if(_specs[index].extent() == 0) ++_num_regions; + auto max = stencil.max(); if(max > _specs[index].extent()) _specs[index] = RegionSpec_t(index, max); } @@ -619,6 +708,28 @@ class HaloSpec { region_size_t _num_regions{ 0 }; }; // HaloSpec +template +std::ostream& operator<<( std::ostream & os, const HaloSpec& hs) +{ + std::ostringstream ss; + ss << "dash::halo::HaloSpec<" << NumDimensions << ">("; + bool begin = true; + for(const auto& region_spec : hs.specs()) { + if(region_spec.extent() > 0) { + if(begin) { + ss << region_spec; + begin = false; + } else { + ss << "," << region_spec; + } + } + } + ss << "; number region: " << hs.num_regions(); + ss << ")"; + + return operator<<(os, ss.str()); +} + /** * Iterator to iterate over all region elements defined by \ref Region */ @@ -933,8 +1044,11 @@ auto distance( * Provides \ref RegionIter and some region metadata like \ref RegionSpec, * size etc. */ -template +template class Region { +private: + static constexpr auto NumDimensions = PatternT::ndim(); + public: using iterator = RegionIter; using const_iterator = const iterator; @@ -962,7 +1076,7 @@ class Region { const RegionSpec_t& spec() const { return _region_spec; } - const ViewSpec_t& region() const { return _region; } + const ViewSpec_t& view() const { return _region; } constexpr pattern_size_t size() const { return _region.size(); } @@ -988,6 +1102,25 @@ class Region { iterator _end; }; // Region +template +std::ostream& operator<<( + std::ostream & os, + const Region & region) { + std::ostringstream ss; + ss << "dash::halo::Region<" << typeid(ElementT).name() << ">" + //<< "( view: " << region.view() + //<< "; region spec: " << region.spec() + //<< "; global borders: " << region.border() + //<< "; border region: " << region.is_border_region() + //<< "; custom region: " << region.is_custom_region() + //<< "; begin iterator: " << region.begin() + //<< "; end iterator: " << region.begin() + << ")"; + + return operator<<(os, ss.str()); +} + + /** * Takes the local part of the NArray and builds halo and * boundary regions. @@ -1000,7 +1133,7 @@ class HaloBlock { using Self_t = HaloBlock; using pattern_index_t = typename PatternT::index_type; using RegionSpec_t = RegionSpec; - using Region_t = Region; + using Region_t = Region; using RegionCoords_t = RegionCoords; using region_extent_t = typename RegionSpec_t::region_extent_t; @@ -1012,6 +1145,7 @@ class HaloBlock { using GlobBoundSpec_t = GlobalBoundarySpec; using pattern_size_t = typename PatternT::size_type; using ViewSpec_t = typename PatternT::viewspec_type; + using BoundaryViews_t = std::vector; using HaloSpec_t = HaloSpec; using RegionVector_t = std::vector; using region_index_t = typename RegionSpec_t::region_index_t; @@ -1028,9 +1162,11 @@ class HaloBlock { const HaloSpec_t& halo_reg_spec, const GlobBoundSpec_t& bound_spec = GlobBoundSpec_t{}) : _globmem(globmem), _pattern(pattern), _view(view), - _halo_reg_spec(halo_reg_spec) { - _view_inner = view; - _view_inner_with_boundaries = view; + _halo_reg_spec(halo_reg_spec), _view_local(_view.extents()) { + + // setup local views + _view_inner = _view_local; + _view_inner_with_boundaries = _view_local; // TODO put functionallity to HaloSpec _halo_regions.reserve(_halo_reg_spec.num_regions()); @@ -1052,7 +1188,7 @@ class HaloBlock { auto bnd_region_offsets = view.offsets(); auto bnd_region_extents = view.extents(); - for(auto d(0); d < NumDimensions; ++d) { + for(dim_t d = 0; d < NumDimensions; ++d) { if(spec[d] == 1) continue; @@ -1129,34 +1265,34 @@ class HaloBlock { * Setup for the non duplicate boundary elements and the views: inner, * boundary and inner + boundary */ - for(auto d = 0; d < NumDimensions; ++d) { - const auto view_offset = view.offset(d); - const auto view_extent = view.extent(d); + for(dim_t d = 0; d < NumDimensions; ++d) { + const auto global_offset = view.offset(d); + const auto view_extent = _view_local.extent(d); auto bnd_elem_offsets = _view.offsets(); - auto bnd_elem_extents = _view.extents(); + auto bnd_elem_extents = _view_local.extents(); bnd_elem_extents[d] = _halo_extents_max[d].first; for(auto d_tmp = 0; d_tmp < d; ++d_tmp) { - bnd_elem_offsets[d_tmp] += _halo_extents_max[d_tmp].first; + bnd_elem_offsets[d_tmp] -= _view.offset(d_tmp) - _halo_extents_max[d_tmp].first; bnd_elem_extents[d_tmp] -= _halo_extents_max[d_tmp].first + _halo_extents_max[d_tmp].second; } _view_inner.resize_dim( - d, view_offset + _halo_extents_max[d].first, + d, _halo_extents_max[d].first, view_extent - _halo_extents_max[d].first - _halo_extents_max[d].second); - if(bound_spec[d] == BoundaryProp::NONE) { - auto safe_offset = view_offset; + if(bound_spec[d] == BoundaryProp::NONE ) { + auto safe_offset = global_offset; auto safe_extent = view_extent; - if(view_offset < _halo_extents_max[d].first) { + if(global_offset < _halo_extents_max[d].first) { safe_offset = _halo_extents_max[d].first; - safe_extent -= _halo_extents_max[d].first - view_offset; + safe_extent -= _halo_extents_max[d].first - global_offset; } else { + bnd_elem_offsets[d] -= global_offset; push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, _halo_extents_max, bound_spec); } - auto check_extent = - view_offset + view_extent + _halo_extents_max[d].second; + auto check_extent = global_offset + view_extent + _halo_extents_max[d].second; if(check_extent > _pattern.extent(d)) { safe_extent -= check_extent - _pattern.extent(d); } else { @@ -1165,8 +1301,9 @@ class HaloBlock { push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, _halo_extents_max, bound_spec); } - _view_inner_with_boundaries.resize_dim(d, safe_offset, safe_extent); + _view_inner_with_boundaries.resize_dim(d, safe_offset - global_offset, safe_extent); } else { + bnd_elem_offsets[d] -= global_offset; push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, _halo_extents_max, bound_spec); bnd_elem_offsets[d] += view_extent - _halo_extents_max[d].first; @@ -1202,7 +1339,11 @@ class HaloBlock { const GlobMem_t& globmem() const { return _globmem; } /** - * Returns a specific halo region + * Returns used \ref HaloSpec + */ + const HaloSpec_t& halo_spec() const { return _halo_reg_spec; } + /** + * Returns a specific halo region and nullptr if no region exists */ const Region_t* halo_region(const region_index_t index) const { return _halo_reg_mapping[index]; @@ -1226,7 +1367,7 @@ class HaloBlock { const HaloExtsMax_t& halo_extension_max() const { return _halo_extents_max; } /** - * Returns a specific region + * Returns a specific region and nullptr if no region exists */ const Region_t* boundary_region(const region_index_t index) const { return _boundary_reg_mapping[index]; @@ -1239,12 +1380,17 @@ class HaloBlock { const RegionVector_t& boundary_regions() const { return _boundary_regions; } /** - * Returns the initial \ref ViewSpec + * Returns the initial global \ref ViewSpec */ const ViewSpec_t& view() const { return _view; } /** - * Returns a \ref ViewSpec that combines the boundary and inner view + * Returns the initial local \ref ViewSpec + */ + const ViewSpec_t& view_local() const { return _view_local; } + + /** + * Returns a local \ref ViewSpec that combines the boundary and inner view */ const ViewSpec_t& view_inner_with_boundaries() const { return _view_inner_with_boundaries; @@ -1257,10 +1403,11 @@ class HaloBlock { const ViewSpec_t& view_inner() const { return _view_inner; } /** - * Returns all boundary elements. No duplicates of elements included. + * Returns a set of local views that contains all boundary elements. + * No duplicates of elements included. */ - const std::vector& boundary_elements() const { - return _boundary_elements; + const BoundaryViews_t& boundary_views() const { + return _boundary_views; } /** @@ -1306,26 +1453,26 @@ class HaloBlock { std::array& extents, const HaloExtsMax_t& halo_exts_max, const GlobBoundSpec_t& bound_spec) { + auto tmp = offsets; for(auto d_tmp = dim + 1; d_tmp < NumDimensions; ++d_tmp) { if(bound_spec[d_tmp] == BoundaryProp::NONE) { if(offsets[d_tmp] < halo_exts_max[d_tmp].first) { offsets[d_tmp] = halo_exts_max[d_tmp].first; + tmp[d_tmp] = halo_exts_max[d_tmp].first; extents[d_tmp] -= halo_exts_max[d_tmp].first; - if((offsets[d_tmp] + extents[d_tmp] + halo_exts_max[d_tmp].second) - > _pattern.extent(d_tmp)) - extents[d_tmp] -= halo_exts_max[d_tmp].second; - continue; } auto check_extent_tmp = offsets[d_tmp] + extents[d_tmp] + halo_exts_max[d_tmp].second; - if(check_extent_tmp > _pattern.extent(d_tmp)) { + if(check_extent_tmp > _pattern.extent(d_tmp)) extents[d_tmp] -= halo_exts_max[d_tmp].second; - } } + + tmp[d_tmp] -= _view.offset(d_tmp); } - ViewSpec_t boundary_next(offsets, extents); + + ViewSpec_t boundary_next(tmp, extents); _size_bnd_elems += boundary_next.size(); - _boundary_elements.push_back(std::move(boundary_next)); + _boundary_views.push_back(std::move(boundary_next)); } private: @@ -1337,6 +1484,8 @@ class HaloBlock { const HaloSpec_t& _halo_reg_spec; + const ViewSpec_t _view_local; + ViewSpec_t _view_inner_with_boundaries; ViewSpec_t _view_inner; @@ -1349,7 +1498,7 @@ class HaloBlock { std::array _boundary_reg_mapping{}; - std::vector _boundary_elements; + BoundaryViews_t _boundary_views; pattern_size_t _size_bnd_elems = 0; @@ -1358,6 +1507,48 @@ class HaloBlock { HaloExtsMax_t _halo_extents_max{}; }; // class HaloBlock +template +std::ostream& operator<<( + std::ostream & os, + const HaloBlock& haloblock) +{ + std::ostringstream ss; + bool begin = true; + ss << "dash::halo::HaloBlock<" << typeid(ElementT).name() << ">(" + << "view global: " << haloblock.view() + << "; halo spec: " << haloblock.halo_spec() + << "; view local: " << haloblock.view_local() + << "; view inner: " << haloblock.view_inner() + << "; view inner_bnd: " << haloblock.view_inner_with_boundaries() + << "; halo regions { "; + for(const auto& region : haloblock.halo_regions()) { + if(begin) { + ss << region; + begin = false; + } else { + ss << "," << region; + } + } + ss << " } " + << "; halo elems: " << haloblock.halo_size() + << "; boundary regions: { "; + for(const auto& region : haloblock.boundary_regions()) { + if(begin) { + ss << region; + begin = false; + } else { + ss << "," << region; + } + } + ss << " } " + << "; boundary views: " << haloblock.boundary_views() + << "; boundary elems: " << haloblock.boundary_size() + << ")"; + + return operator<<(os, ss.str()); +} + + /** * Mangages the memory for all halo regions provided by the given * \ref HaloBlock @@ -1424,7 +1615,7 @@ class HaloMemory { bool to_halo_mem_coords_check(const region_index_t region_index, ElementCoords_t& coords) const { const auto& extents = - _haloblock.halo_region(region_index)->region().extents(); + _haloblock.halo_region(region_index)->view().extents(); for(auto d = 0; d < NumDimensions; ++d) { if(coords[d] < 0) coords[d] += extents[d]; @@ -1444,7 +1635,7 @@ class HaloMemory { void to_halo_mem_coords(const region_index_t region_index, ElementCoords_t& coords) const { const auto& extents = - _haloblock.halo_region(region_index)->region().extents(); + _haloblock.halo_region(region_index)->view().extents(); for(auto d = 0; d < NumDimensions; ++d) { if(coords[d] < 0) { coords[d] += extents[d]; @@ -1463,7 +1654,7 @@ class HaloMemory { pattern_size_t offset(const region_index_t region_index, const ElementCoords_t& coords) const { const auto& extents = - _haloblock.halo_region(region_index)->region().extents(); + _haloblock.halo_region(region_index)->view().extents(); pattern_size_t off = 0; if(MemoryArrange == ROW_MAJOR) { off = coords[0]; diff --git a/dash/include/dash/halo/HaloMatrixWrapper.h b/dash/include/dash/halo/HaloMatrixWrapper.h index 23bb9bad6..e60a1df84 100644 --- a/dash/include/dash/halo/HaloMatrixWrapper.h +++ b/dash/include/dash/halo/HaloMatrixWrapper.h @@ -70,7 +70,7 @@ class HaloMatrixWrapper { using pattern_size_t = typename Pattern_t::size_type; using signed_pattern_size_t = typename std::make_signed::type; using HaloSpec_t = HaloSpec; - using Region_t = Region; + using Region_t = Region; public: /** @@ -81,10 +81,10 @@ class HaloMatrixWrapper { HaloMatrixWrapper(MatrixT& matrix, const GlobBoundSpec_t& cycle_spec, const StencilSpecT&... stencil_spec) : _matrix(matrix), _cycle_spec(cycle_spec), _halo_spec(stencil_spec...), - _view_local(matrix.local.extents()), _view_global(matrix.local.offsets(), matrix.local.extents()), _haloblock(matrix.begin().globmem(), matrix.pattern(), _view_global, _halo_spec, cycle_spec), + _view_local(_haloblock.view_local()), _halomemory(_haloblock) { for(const auto& region : _haloblock.halo_regions()) { if(region.size() == 0) @@ -100,7 +100,7 @@ class HaloMatrixWrapper { if(MemoryArrange == ROW_MAJOR) { if(level == 1) { //|| (level == 2 && region.regionSpec()[0] != 1)) { for(auto i = rel_dim - 1; i < NumDimensions; ++i) - num_elems_block *= region.region().extent(i); + num_elems_block *= region.view().extent(i); size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); @@ -129,7 +129,7 @@ class HaloMatrixWrapper { } // TODO more optimizations else { - num_elems_block *= region.region().extent(NumDimensions - 1); + num_elems_block *= region.view().extent(NumDimensions - 1); size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); num_blocks = region_size / num_elems_block; @@ -167,7 +167,7 @@ class HaloMatrixWrapper { if(level == 1) { //|| (level == 2 && // region.regionSpec()[NumDimensions - 1] != 1)) { for(auto i = 0; i < rel_dim; ++i) - num_elems_block *= region.region().extent(i); + num_elems_block *= region.view().extent(i); size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); @@ -196,7 +196,7 @@ class HaloMatrixWrapper { } // TODO more optimizations else { - num_elems_block *= region.region().extent(0); + num_elems_block *= region.view().extent(0); size_t region_size = region.size(); auto ds_num_elems_block = dart_storage(num_elems_block); num_blocks = region_size / num_elems_block; @@ -233,7 +233,7 @@ class HaloMatrixWrapper { DART_HANDLE_NULL })); } - num_elems_block = region.region().extent(0); + num_elems_block = region.view().extent(0); } } } @@ -376,7 +376,7 @@ class HaloMatrixWrapper { auto* pos_ptr = _halomemory.first_element_at(region.index()); const auto& spec = region.spec(); std::array coords_offset{}; - const auto& reg_ext = region.region().extents(); + const auto& reg_ext = region.view().extents(); for(auto d = 0; d < NumDimensions; ++d) { if(spec[d] == 0) { coords_offset[d] -= reg_ext[d]; @@ -471,9 +471,9 @@ class HaloMatrixWrapper { MatrixT& _matrix; const GlobBoundSpec_t _cycle_spec; const HaloSpec_t _halo_spec; - const ViewSpec_t _view_local; const ViewSpec_t _view_global; const HaloBlock_t _haloblock; + const ViewSpec_t& _view_local; HaloMemory_t _halomemory; std::map _region_data; std::vector _dart_types; diff --git a/dash/include/dash/halo/StencilOperator.h b/dash/include/dash/halo/StencilOperator.h index 274a03587..23e871be1 100644 --- a/dash/include/dash/halo/StencilOperator.h +++ b/dash/include/dash/halo/StencilOperator.h @@ -7,6 +7,7 @@ namespace dash { namespace halo { + /** * The StencilOperator provides stencil specific iterator and functions for * a given \ref HaloBlock and HaloMemory. @@ -69,6 +70,8 @@ class StencilOperator { using StencilSpecViews_t = StencilSpecificViews; + using region_index_t = typename RegionSpec::region_index_t; + public: /** * Constructor that takes a \ref HaloBlock, a \ref HaloMemory, @@ -82,14 +85,17 @@ class StencilOperator { _stencil_offsets(set_stencil_offsets()), _local_memory((ElementT*) _halo_block->globmem().lbegin()), _spec_views(*_halo_block, _stencil_spec, _view_local), - _begin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, 0), - _end(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, - _spec_views.inner_with_boundaries().size()), - _ibegin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, 0), - _iend(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, - _spec_views.inner().size()), - _bbegin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, 0), - _bend(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, &_spec_views, + _begin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, + _spec_views.inner_with_boundaries(), *_view_local, 0), + _end(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, + _spec_views.inner_with_boundaries(), *_view_local, + _spec_views.inner_with_boundaries().size()), + _ibegin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, + _spec_views.inner(), *_view_local, 0), + _iend(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, + _spec_views.inner(), *_view_local, _spec_views.inner().size()), + _bbegin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, *_view_local, _spec_views.boundary_views(), 0), + _bend(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, *_view_local, _spec_views.boundary_views(), _spec_views.boundary_size()) {} StencilOperator() = delete; @@ -132,6 +138,40 @@ class StencilOperator { /// returns the end const iterator for all boundary elements const_iterator_bnd bend() const noexcept { return _bend; } + std::pair boundary_iterator_at(dim_t dim, RegionPos pos) { + DASH_ASSERT_LT(dim, NumDimensions, "Given dimension to great"); + const auto& bnd_views = _spec_views.boundary_views(); + pattern_size_t offset = 0; + auto it_views = std::begin(bnd_views); + for(dim_t d = 0; d < dim; ++d, ++it_views) + offset += it_views->size() + (++it_views)->size(); + + if(pos == RegionPos::POST) { + offset += it_views->size(); + ++it_views; + } + + //iterator_bnd it_begin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, *_view_local, _spec_views.boundary_views(), offset); + //iterator_bnd it_end(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, *_view_local, _spec_views.boundary_views(), offset + it_views->size()); + auto it_begin = _bbegin + offset; + + return std::make_pair(it_begin, it_begin + it_views->size()); + } + + /* + /// returns the begin iterator for all boundary elements + iterator_bnd bbegin() noexcept { return _bbegin; } + + /// returns the begin const iterator for all boundary elements + const_iterator_bnd bbegin() const noexcept { return _bbegin; } + + /// returns the end iterator for all boundary elements + iterator_bnd bend() noexcept { return _bend; } + + /// returns the end const iterator for all boundary elements + const_iterator_bnd bend() const noexcept { return _bend; } +*/ + /** * Returns the \ref HaloBlock */ @@ -317,7 +357,7 @@ class StencilOperator { private: const HaloBlock_t* _halo_block; HaloMemory_t* _halo_memory; - const StencilSpecT _stencil_spec; + const StencilSpecT _stencil_spec; const ViewSpec_t* _view_local; StencilOffsets_t _stencil_offsets; ElementT* _local_memory; diff --git a/dash/include/dash/halo/iterator/StencilIterator.h b/dash/include/dash/halo/iterator/StencilIterator.h index ca4ac2f62..5422db8ad 100644 --- a/dash/include/dash/halo/iterator/StencilIterator.h +++ b/dash/include/dash/halo/iterator/StencilIterator.h @@ -20,7 +20,8 @@ class StencilSpecificViews { using Pattern_t = typename HaloBlockT::Pattern_t; public: - using ViewSpec_t = typename HaloBlockT::ViewSpec_t; + using ViewSpec_t = typename HaloBlockT::ViewSpec_t; + using BoundaryViews_t = typename HaloBlockT::BoundaryViews_t; using pattern_size_t = typename Pattern_t::size_type; public: @@ -28,7 +29,6 @@ class StencilSpecificViews { const StencilSpecT& stencil_spec, const ViewSpec_t* view_local) : _view_local(view_local) { - auto& off_glob = haloblock.view().offsets(); auto minmax_dist = stencil_spec.minmax_distances(); for(auto& dist : minmax_dist) dist.first = std::abs(dist.first); @@ -38,8 +38,6 @@ class StencilSpecificViews { auto inner_bound_off = haloblock.view_inner_with_boundaries().offsets(); auto inner_bound_ext = haloblock.view_inner_with_boundaries().extents(); for(auto d = 0; d < NumDimensions; ++d) { - inner_off[d] -= off_glob[d]; - inner_bound_off[d] -= off_glob[d]; resize_offset(inner_off[d], inner_ext[d], minmax_dist[d].first); resize_extent(inner_off[d], inner_ext[d],_view_local->extent(d), minmax_dist[d].second); resize_offset(inner_bound_off[d], inner_bound_ext[d], minmax_dist[d].first); @@ -48,32 +46,31 @@ class StencilSpecificViews { _view_inner = ViewSpec_t(inner_off, inner_ext); _view_inner_with_boundaries = ViewSpec_t(inner_bound_off, inner_bound_ext); - const auto& bnd_elems = haloblock.boundary_elements(); - const auto& halo_ext_max = haloblock.halo_extension_max(); - _boundary_elements.reserve(bnd_elems.size()); - for(auto& view : bnd_elems) { + using RegionCoords_t = RegionCoords; + using region_index_t = typename RegionCoords_t::region_index_t; - auto view_off = view.offsets(); - auto view_ext = view.extents(); - for(auto d = 0; d < NumDimensions; ++d) { - view_off[d] -= off_glob[d]; - if(view_off[d] < halo_ext_max[d].first && view_ext[d] == halo_ext_max[d].first) { - view_ext[d] = minmax_dist[d].first; - continue; - } - else if(view_ext[d] == halo_ext_max[d].second) { - view_ext[d] = minmax_dist[d].second; - view_off[d] += halo_ext_max[d].second - minmax_dist[d].second; - } - else { - resize_offset(view_off[d], view_ext[d], minmax_dist[d].first); - resize_extent(view_off[d], view_ext[d], _view_local->extent(d), minmax_dist[d].second); - } + const auto& bnd_elems = haloblock.boundary_views(); + const auto& halo_ext_max = haloblock.halo_extension_max(); + _boundary_views.reserve(NumDimensions * 2); + auto it_views = std::begin(bnd_elems); + + for(dim_t d = 0; d < NumDimensions; ++d) { + region_index_t index = RegionCoords_t::index(d, RegionPos::PRE); + auto* region = haloblock.boundary_region(index); + if(region == nullptr || (region != nullptr && region->size() == 0)) + _boundary_views.push_back(ViewSpec_t()); + else { + push_boundary_views(*it_views, halo_ext_max, minmax_dist); + ++it_views; + } + index = RegionCoords_t::index(d, RegionPos::POST); + region = haloblock.boundary_region(index); + if(region == nullptr || (region != nullptr && region->size() == 0)) + _boundary_views.push_back(ViewSpec_t()); + else { + push_boundary_views(*it_views, halo_ext_max, minmax_dist); + ++it_views; } - ViewSpec_t tmp(view_off, view_ext); - - _size_bnd_elems += tmp.size(); - _boundary_elements.push_back(std::move(tmp)); } } @@ -85,13 +82,33 @@ class StencilSpecificViews { return _view_inner_with_boundaries; } - const std::vector& boundary_elements() const { - return _boundary_elements; + const BoundaryViews_t& boundary_views() const { + return _boundary_views; } pattern_size_t boundary_size() const { return _size_bnd_elems;} private: + template + void push_boundary_views(const ViewSpec_t& view, const MaxExtT& max_ext, const MaxDistT& max_dist) { + auto view_off = view.offsets(); + auto view_ext = view.extents(); + for(auto d = 0; d < NumDimensions; ++d) { + if(view_off[d] < max_ext[d].first && view_ext[d] == max_ext[d].first) { + view_ext[d] = max_dist[d].first; + } else if(view_ext[d] == max_ext[d].second) { + view_ext[d] = max_dist[d].second; + view_off[d] += max_ext[d].second - max_dist[d].second; + } else { + resize_offset(view_off[d], view_ext[d], max_dist[d].first); + resize_extent(view_off[d], view_ext[d], _view_local->extent(d), max_dist[d].second); + } + } + ViewSpec_t tmp(view_off, view_ext); + _size_bnd_elems += tmp.size(); + _boundary_views.push_back(std::move(tmp)); + } + template void resize_offset(OffT& offset, ExtT& extent, MaxT max) { if(offset > max) { @@ -111,10 +128,26 @@ class StencilSpecificViews { const ViewSpec_t* _view_local; ViewSpec_t _view_inner; ViewSpec_t _view_inner_with_boundaries; - std::vector _boundary_elements; + BoundaryViews_t _boundary_views; pattern_size_t _size_bnd_elems = 0; }; +template +std::ostream& operator<<( + std::ostream & os, + const StencilSpecificViews & stencil_views) +{ + std::ostringstream ss; + ss << "dash::StencilSpecificViews" + << "(local: " << stencil_views.local() + << "; inner: " << stencil_views.inner() + << "; inner_bound: " << stencil_views.inner_with_boundaries() + << "; boundary_views: " << stencil_views.boundary_views() + << "; boundary elems: " << stencil_views.boundary_size() + << ")"; + + return operator<<(os, ss.str()); +} /* * Iterator with stencil points and halo access \see HaloStencilOperator. @@ -134,6 +167,7 @@ class StencilIterator { using pattern_size_t = typename PatternT::size_type; using signed_pattern_size_t = typename std::make_signed::type; using RegionCoords_t = RegionCoords; + using HaloBlock_t = HaloBlock; public: // Iterator traits @@ -143,7 +177,6 @@ class StencilIterator { using pointer = ElementT*; using reference = ElementT&; - using HaloBlock_t = HaloBlock; using HaloMemory_t = HaloMemory; using pattern_index_t = typename PatternT::index_type; using region_index_t = typename RegionCoords_t::region_index_t; @@ -153,6 +186,7 @@ class StencilIterator { using ElementCoords_t = std::array; using StencilOffsets_t = std::array; using StencilSpecViews_t = StencilSpecificViews; + using BoundaryViews_t = typename StencilSpecViews_t::BoundaryViews_t; public: /** @@ -168,40 +202,53 @@ class StencilIterator { StencilIterator(ElementT* local_memory, HaloMemory_t* halomemory, const StencilSpecT* stencil_spec, const StencilOffsets_t* stencil_offsets, - const StencilSpecViews_t* spec_views, + const ViewSpec_t& view, + const ViewSpec_t& view_local, pattern_index_t idx) : _halomemory(halomemory), _stencil_spec(stencil_spec), - _stencil_offsets(stencil_offsets), _spec_views(spec_views), + _stencil_offsets(stencil_offsets), _view(view), _local_memory(local_memory), _idx(idx), - _local_layout(spec_views->local().extents()) { - if(Scope == StencilViewScope::INNER) - _view_local = ViewSpec_t(spec_views->inner()); - else if(Scope == StencilViewScope::ALL) - _view_local = ViewSpec_t(spec_views->inner_with_boundaries()); - else - _view_local = ViewSpec_t(spec_views->local()); - - pattern_index_t _size = 0; - if(Scope == StencilViewScope::BOUNDARY) - _size = spec_views->boundary_size(); - else - _size = _view_local.size(); - - if(_idx < _size) + _local_layout(view_local.extents()) { + + if(_idx < _view.size()) set_coords(); const auto ext_max = stencil_spec->minmax_distances(FastestDimension); if(Scope == StencilViewScope::INNER) { _ext_dim_reduced = std::make_pair( - _view_local.offset(FastestDimension), + _view.offset(FastestDimension), _local_layout.extent(FastestDimension) - ext_max.second - 1); } else { _ext_dim_reduced = - std::make_pair(std::abs(ext_max.first), _view_local.extent(FastestDimension) + std::make_pair(std::abs(ext_max.first), _view.extent(FastestDimension) - ext_max.second - 1); } } + StencilIterator(ElementT* local_memory, HaloMemory_t* halomemory, + const StencilSpecT* stencil_spec, + const StencilOffsets_t* stencil_offsets, + const ViewSpec_t& view_local, + const BoundaryViews_t& boundary_views, + pattern_index_t idx) + : _halomemory(halomemory), _stencil_spec(stencil_spec), + _stencil_offsets(stencil_offsets), _boundary_views(boundary_views), + _view(view_local.extents()), _local_memory(local_memory), _idx(idx), + _local_layout(view_local.extents()) { + + pattern_index_t size = 0; + for(const auto& view : boundary_views) + size += view.size(); + if(_idx < size) + set_coords(); + + const auto ext_max = stencil_spec->minmax_distances(FastestDimension); + + _ext_dim_reduced = + std::make_pair(std::abs(ext_max.first), _view.extent(FastestDimension) + - ext_max.second - 1); + } + /** * Copy constructor. */ @@ -253,7 +300,7 @@ class StencilIterator { const auto& stencil = (*_stencil_spec)[index_stencil]; for(auto d = 0; d < NumDimensions; ++d) { halo_coords[d] += stencil[d]; - if(halo_coords[d] < 0 || halo_coords[d] >= _spec_views->local().extent(d)) + if(halo_coords[d] < 0 || halo_coords[d] >= _local_layout.extent(d)) return true; } @@ -392,15 +439,15 @@ class StencilIterator { return true; } #endif - if(&_view_local == &(other._view_local) - || _view_local == other._view_local) { + if(&_view == &(other._view) + || _view == other._view) { return gidx_cmp(_idx, other._idx); } // TODO not the best solution return false; } - /*void set_view_local(const ViewSpec_t& view_tmp) { + /*void set_view(const ViewSpec_t& view_tmp) { if(Scope == StencilViewScope::BOUNDARY) { const auto& bnd_elems = _haloblock.boundary_elements(); _bnd_elements.reserve(bnd_elems.size()); @@ -413,14 +460,14 @@ class StencilIterator { _bnd_elements.push_back(ViewSpec_t(off, region.extents())); } - _view_local = ViewSpec_t(view_tmp.extents()); + _view = ViewSpec_t(view_tmp.extents()); } else { const auto& view_offsets = _haloblock.view().offsets(); auto off = view_tmp.offsets(); for(int d = 0; d < NumDimensions; ++d) off[d] -= view_offsets[d]; - _view_local = ViewSpec_t(off, view_tmp.extents()); + _view = ViewSpec_t(off, view_tmp.extents()); } }*/ @@ -444,19 +491,19 @@ class StencilIterator { if(MemoryArrange == ROW_MAJOR) { for(dim_t d = NumDimensions; d > 0;) { --d; - if(_coords[d] < _view_local.extent(d) + _view_local.offset(d) - 1) { + if(_coords[d] < _view.extent(d) + _view.offset(d) - 1) { ++_coords[d]; break; } else - _coords[d] = _view_local.offset(d); + _coords[d] = _view.offset(d); } } else { for(dim_t d = 0; d < NumDimensions; ++d) { - if(_coords[d] < _view_local.extent(d) + _view_local.offset(d) - 1) { + if(_coords[d] < _view.extent(d) + _view.offset(d) - 1) { ++_coords[d]; break; } else - _coords[d] = _view_local.offset(d); + _coords[d] = _view.offset(d); } } if(MemoryArrange == ROW_MAJOR) { @@ -482,9 +529,8 @@ class StencilIterator { if(_region_bound == 0) { _coords = set_coords(_idx); } else { - const auto& bnd_elements = _spec_views->boundary_elements(); if(_idx < _region_bound) { - const auto& region = bnd_elements[_region_number]; + const auto& region = _boundary_views[_region_number]; if(MemoryArrange == ROW_MAJOR) { for(dim_t d = NumDimensions; d > 0;) { @@ -492,30 +538,36 @@ class StencilIterator { if(_coords[d] < region.extent(d) + region.offset(d) - 1) { ++_coords[d]; break; - } else + } else { _coords[d] = region.offset(d); + } } } else { for(dim_t d = 0; d < NumDimensions; ++d) { if(_coords[d] < region.extent(d) + region.offset(d) - 1) { ++_coords[d]; break; - } else + } else { _coords[d] = region.offset(d); + } } } } else { - ++_region_number; - if(_region_number < bnd_elements.size()) { - _region_bound += bnd_elements[_region_number].size(); - _coords = _local_layout.coords(0, bnd_elements[_region_number]); - } + do { + ++_region_number; + if(_region_number >= _boundary_views.size()) + return; + + _region_bound += _boundary_views[_region_number].size(); + } while (_idx >= _region_bound); + _coords = _local_layout.coords(0, _boundary_views[_region_number]); } } } else { _coords = set_coords(_idx); } + // setup center point offset if(MemoryArrange == ROW_MAJOR) { _offset = _coords[0]; for(dim_t d = 1; d < NumDimensions; ++d) @@ -528,6 +580,8 @@ class StencilIterator { } } _current_lmemory_addr = _local_memory + _offset; + + //setup stencil point offsets if(Scope == StencilViewScope::INNER) { for(auto i = 0; i < NumStencilPoints; ++i) _stencil_mem_ptr[i] = _current_lmemory_addr + (*_stencil_offsets)[i]; @@ -537,7 +591,7 @@ class StencilIterator { std::array is_halo{}; std::array indexes{}; for(auto d = 0; d < NumDimensions; ++d) { - auto extent = _spec_views->local().extent(d); + auto extent = _local_layout.extent(d); for(auto i = 0; i < NumStencilPoints; ++i) { auto& halo_coord = halo_coords[i][d]; @@ -557,6 +611,7 @@ class StencilIterator { is_halo[i] = true; } } + for(auto i = 0; i < NumStencilPoints; ++i) { if(is_halo[i]) _stencil_mem_ptr[i] = value_halo_at(indexes[i], halo_coords[i]); @@ -566,10 +621,10 @@ class StencilIterator { } } - std::array set_coords(pattern_index_t idx) { + ElementCoords_t set_coords(pattern_index_t idx) { if(Scope == StencilViewScope::BOUNDARY) { auto local_idx = idx; - for(const auto& region : _spec_views->boundary_elements()) { + for(const auto& region : _boundary_views) { _region_bound += region.size(); if(local_idx < region.size()) { return _local_layout.coords(local_idx, region); @@ -580,10 +635,10 @@ class StencilIterator { DASH_ASSERT("idx >= size not implemented yet"); return std::array{}; } else { - if(_view_local.size() == 0) + if(_view.size() == 0) return std::array{}; else - return _local_layout.coords(idx, _view_local); + return _local_layout.coords(idx, _view); } } @@ -617,9 +672,9 @@ class StencilIterator { HaloMemory_t* _halomemory; const StencilSpecT* _stencil_spec; const StencilOffsets_t* _stencil_offsets; - const StencilSpecViews_t* _spec_views; + const ViewSpec_t _view; + const BoundaryViews_t _boundary_views{}; ElementT* _local_memory; - ViewSpec_t _view_local; std::array _stencil_mem_ptr; const LocalLayout_t _local_layout; pattern_index_t _idx{ 0 }; diff --git a/dash/test/halo/HaloTest.cc b/dash/test/halo/HaloTest.cc index 573046f29..b281b1ef9 100644 --- a/dash/test/halo/HaloTest.cc +++ b/dash/test/halo/HaloTest.cc @@ -258,7 +258,6 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) auto* sum_local = sum_halo.lbegin(); auto stencil_op = halo_wrapper.stencil_operator(stencil_spec); - auto it_iend = stencil_op.iend(); for(auto it = stencil_op.ibegin(); it != it_iend; ++it) { for(auto i = 0; i < stencil_spec.num_stencil_points(); ++i) @@ -314,7 +313,7 @@ long calc_sum_check(long*** matrix, T begin, T end) { } template -unsigned long calc_sum_halo(HaloWrapperT& halo_wrapper, StencilOpT stencil_op) { +unsigned long calc_sum_halo(HaloWrapperT& halo_wrapper, StencilOpT stencil_op, bool region_wise = false) { auto& stencil_spec = stencil_op.stencil_spec(); auto num_stencil_points = stencil_spec.num_stencil_points(); @@ -334,14 +333,32 @@ unsigned long calc_sum_halo(HaloWrapperT& halo_wrapper, StencilOpT stencil_op) { halo_wrapper.wait(); - auto it_bend = stencil_op.bend(); - for(auto it = stencil_op.bbegin(); it != it_bend; ++it) { - for(auto i = 0; i < num_stencil_points; ++i) - *sum_local += it.value_at(i); + if(region_wise) { + for( auto d = 0; d < 3; ++d) { + auto it_bnd = stencil_op.boundary_iterator_at(d, RegionPos::PRE); + for(auto it = it_bnd.first; it != it_bnd.second; ++it) { + for(auto i = 0; i < num_stencil_points; ++i) + *sum_local += it.value_at(i); - *sum_local += *it; - } + *sum_local += *it; + } + auto it_bnd_2 = stencil_op.boundary_iterator_at(d, RegionPos::POST); + for(auto it = it_bnd_2.first; it != it_bnd_2.second; ++it) { + for(auto i = 0; i < num_stencil_points; ++i) + *sum_local += it.value_at(i); + *sum_local += *it; + } + } + } else { + auto it_bend = stencil_op.bend(); + for(auto it = stencil_op.bbegin(); it != it_bend; ++it) { + for(auto i = 0; i < num_stencil_points; ++i) + *sum_local += it.value_at(i); + + *sum_local += *it; + } + } sum_halo.barrier(); unsigned long sum = 0; @@ -501,7 +518,6 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic3D) StencilP_t( 1, 1,-1), StencilP_t( 1, 1, 0), StencilP_t( 1, 1, 1) ); - GlobBoundSpec_t bound_spec; HaloMatrixWrapper halo_wrapper(matrix_halo, stencil_spec); auto stencil_op = halo_wrapper.stencil_operator(stencil_spec); auto sum_halo = calc_sum_halo(halo_wrapper, stencil_op); @@ -698,6 +714,7 @@ TEST_F(HaloTest, HaloMatrixWrapperCustom3D) StencilP_t( 1, 0,-1), StencilP_t( 1, 0, 0), StencilP_t( 1, 0, 1), StencilP_t( 1, 1,-1), StencilP_t( 1, 1, 0), StencilP_t( 1, 1, 1) ); + GlobBoundSpec_t bound_spec(BoundaryProp::CUSTOM, BoundaryProp::CUSTOM, BoundaryProp::CUSTOM); HaloMatrixWrapper halo_wrapper(matrix_halo, bound_spec, stencil_spec); @@ -950,6 +967,7 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMix3D) auto stencil_op = halo_wrapper.stencil_operator(stencil_spec); auto sum_halo = calc_sum_halo(halo_wrapper, stencil_op); + auto sum_halo_region = calc_sum_halo(halo_wrapper, stencil_op, true); auto sum_halo_via_stencil = calc_sum_halo_via_stencil(halo_wrapper, stencil_op); halo_wrapper_col.set_custom_halos([](const std::array& coords) { @@ -961,6 +979,7 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMix3D) if(myid == 0) { EXPECT_EQ(sum_check, sum_halo); + EXPECT_EQ(sum_check, sum_halo_region); EXPECT_EQ(sum_check, sum_halo_via_stencil); EXPECT_EQ(sum_check, sum_halo_col); } @@ -1253,10 +1272,16 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMultiStencil) auto sum_halo_spec_2 = calc_sum_halo(halo_wrapper, stencil_op_2); auto sum_halo_spec_3 = calc_sum_halo(halo_wrapper, stencil_op_3); + auto sum_halo_spec_1_region = calc_sum_halo(halo_wrapper, stencil_op_1, true); + auto sum_halo_spec_2_region = calc_sum_halo(halo_wrapper, stencil_op_2, true); + auto sum_halo_spec_3_region = calc_sum_halo(halo_wrapper, stencil_op_3, true); if(myid == 0) { EXPECT_EQ(sum_check_spec_1, sum_halo_spec_1); EXPECT_EQ(sum_check_spec_2, sum_halo_spec_2); EXPECT_EQ(sum_check_spec_3, sum_halo_spec_3); + EXPECT_EQ(sum_check_spec_1, sum_halo_spec_1_region); + EXPECT_EQ(sum_check_spec_2, sum_halo_spec_2_region); + EXPECT_EQ(sum_check_spec_3, sum_halo_spec_3_region); } dash::Team::All().barrier(); From 7691d0d679ff1313c40ec1e7b5d29308c01c8f87 Mon Sep 17 00:00:00 2001 From: Denis Huenich Date: Thu, 19 Apr 2018 14:07:09 +0200 Subject: [PATCH 3/6] inner and boundary proxy classes for StencilOperator, more dokumentation --- .../ex.02.matrix.halo.heat_equation/main.cpp | 8 +- dash/examples/ex.11.halo-stencil/main.cpp | 4 +- dash/include/dash/halo/Halo.h | 185 +++---- dash/include/dash/halo/HaloMatrixWrapper.h | 20 +- dash/include/dash/halo/StencilOperator.h | 453 +++++++++++------- .../dash/halo/iterator/StencilIterator.h | 179 ++++--- dash/test/halo/HaloTest.cc | 30 +- 7 files changed, 520 insertions(+), 359 deletions(-) diff --git a/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp b/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp index b91622a16..2cfa618fc 100644 --- a/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp +++ b/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp @@ -143,8 +143,8 @@ int main(int argc, char *argv[]) } #endif // slow version - auto it_end = current_op->iend(); - for(auto it = current_op->ibegin(); it != it_end; ++it) + auto it_end = current_op->inner.end(); + for(auto it = current_op->inner.begin(); it != it_end; ++it) { auto core = *it; auto dtheta = (it.value_at(0) + it.value_at(1) - 2 * core) / (dx * dx) + @@ -156,8 +156,8 @@ int main(int argc, char *argv[]) current_halo->wait(); // Calculation of boundary Halo elements - auto it_bend = current_op->bend(); - for (auto it = current_op->bbegin(); it != it_bend; ++it) { + auto it_bend = current_op->boundary.end(); + for (auto it = current_op->boundary.begin(); it != it_bend; ++it) { auto core = *it; double dtheta = (it.value_at(0) + it.value_at(1) - 2 * core) / (dx * dx) + diff --git a/dash/examples/ex.11.halo-stencil/main.cpp b/dash/examples/ex.11.halo-stencil/main.cpp index 2c8234c98..75b5b9910 100644 --- a/dash/examples/ex.11.halo-stencil/main.cpp +++ b/dash/examples/ex.11.halo-stencil/main.cpp @@ -151,8 +151,8 @@ void smooth(HaloWrapper_t & halo_old, HaloWrapper_t & halo_new, halo_old.wait(); // Calculation of boundary Halo elements - auto bend = op_old.bend(); - for(auto it = op_old.bbegin(); it != bend; ++it) + auto bend = op_old.boundary.end(); + for(auto it = op_old.boundary.begin(); it != bend; ++it) { auto core = *it; *(nlptr+it.lpos()) = (0.40 * core) + diff --git a/dash/include/dash/halo/Halo.h b/dash/include/dash/halo/Halo.h index e1143e1a2..cfd0761ab 100644 --- a/dash/include/dash/halo/Halo.h +++ b/dash/include/dash/halo/Halo.h @@ -61,9 +61,9 @@ class StencilPoint : public Dimensional { constexpr StencilPoint( typename std::enable_if::type coefficient, - point_value_t value, Values... values) - : Base_t::Dimensional(value, (point_value_t) values...), _coefficient(coefficient) { - } + point_value_t value, Values... values) + : Base_t::Dimensional(value, (point_value_t) values...), + _coefficient(coefficient) {} // TODO as constexpr /** @@ -72,7 +72,7 @@ class StencilPoint : public Dimensional { int max() const { int max = 0; for(dim_t i(0); i < NumDimensions; ++i) - max = std::max( max, (int) std::abs(this->_values[i])); + max = std::max(max, (int) std::abs(this->_values[i])); return max; } @@ -86,12 +86,12 @@ class StencilPoint : public Dimensional { }; // StencilPoint template -std::ostream& operator<<( std::ostream & os, - const StencilPoint& stencil_point) { +std::ostream& operator<<( + std::ostream& os, const StencilPoint& stencil_point) { os << "dash::halo::StencilPoint<" << NumDimensions << ">" << "(coefficient = " << stencil_point.coefficient << " - points: "; - for (auto d = 0; d < NumDimensions; ++d) { - if (d > 0) { + for(auto d = 0; d < NumDimensions; ++d) { + if(d > 0) { os << ","; } os << stencil_point[d]; @@ -113,11 +113,11 @@ class StencilSpec { static constexpr auto NumDimensions = StencilPointT::ndim(); public: - using stencil_size_t = std::size_t; - using stencil_index_t = std::size_t; - using StencilArray_t = std::array; - using StencilPoint_t = StencilPointT; - using point_value_t = typename StencilPoint_t::point_value_t; + using stencil_size_t = std::size_t; + using stencil_index_t = std::size_t; + using StencilArray_t = std::array; + using StencilPoint_t = StencilPointT; + using point_value_t = typename StencilPoint_t::point_value_t; using MaxDistanceDim_t = std::pair; using MaxDistanceAll_t = std::array; @@ -177,6 +177,10 @@ class StencilSpec { return std::make_pair(0, false); } + /** + * Returns the minimal and maximal distances of all stencil points for all + * dimensions. + */ MaxDistanceAll_t minmax_distances() const { MaxDistanceAll_t max_dist{}; for(const auto& stencil_point : _specs) { @@ -193,6 +197,10 @@ class StencilSpec { return max_dist; } + /** + * Returns the minimal and maximal distances of all stencil points for the + * given dimension. + */ MaxDistanceDim_t minmax_distances(dim_t dim) const { MaxDistanceDim_t max_dist{}; for(const auto& stencil_point : _specs) { @@ -218,12 +226,12 @@ class StencilSpec { }; // StencilSpec template -std::ostream& operator<<(std::ostream & os, - const StencilSpec & specs) { +std::ostream& operator<<( + std::ostream& os, const StencilSpec& specs) { os << "dash::halo::StencilSpec<" << NumStencilPoints << ">" << "("; - for (auto i = 0; i < NumStencilPoints; ++i) { - if (i > 0) { + for(auto i = 0; i < NumStencilPoints; ++i) { + if(i > 0) { os << ","; } os << specs[i]; @@ -245,7 +253,7 @@ enum class BoundaryProp : uint8_t { CUSTOM }; -static std::ostream& operator<<( std::ostream & os, const BoundaryProp& prop) { +static std::ostream& operator<<(std::ostream& os, const BoundaryProp& prop) { if(prop == BoundaryProp::NONE) os << "NONE"; else if(prop == BoundaryProp::CYCLIC) @@ -285,12 +293,12 @@ class GlobalBoundarySpec : public Dimensional { }; // GlobalBoundarySpec template -std::ostream& operator<<( std::ostream & os, - const GlobalBoundarySpec& spec) { +std::ostream& operator<<(std::ostream& os, + const GlobalBoundarySpec& spec) { os << "dash::halo::GlobalBoundarySpec<" << NumDimensions << ">" << "("; - for (auto d = 0; d < NumDimensions; ++d) { - if (d > 0) { + for(auto d = 0; d < NumDimensions; ++d) { + if(d > 0) { os << ","; } os << spec[d]; @@ -304,11 +312,13 @@ std::ostream& operator<<( std::ostream & os, * Position of a \ref Region in one dimension relating to the center */ enum class RegionPos : bool { + /// Region before center PRE, + /// Region behind center POST }; -static std::ostream& operator<<( std::ostream & os, const RegionPos& pos) { +static std::ostream& operator<<(std::ostream& os, const RegionPos& pos) { if(pos == RegionPos::PRE) os << "PRE"; else @@ -403,7 +413,7 @@ class RegionCoords : public Dimensional { if(dim == d) index = coord + index * REGION_INDEX_BASE; else - index= 1 + index * REGION_INDEX_BASE; + index = 1 + index * REGION_INDEX_BASE; return index; } @@ -577,7 +587,7 @@ class RegionSpec : public Dimensional { template std::ostream& operator<<(std::ostream& os, const RegionSpec& rs) { - os << "dash::RegionSpec<" << NumDimensions << ">(" << (uint32_t) rs[0]; + os << "dash::halo::RegionSpec<" << NumDimensions << ">(" << (uint32_t) rs[0]; for(auto i = 1; i < NumDimensions; ++i) os << "," << (uint32_t) rs[i]; os << "), Extent:" << rs.extent(); @@ -678,7 +688,7 @@ class HaloSpec { if(_specs[index].extent() == 0) ++_num_regions; - auto max = stencil.max(); + auto max = stencil.max(); if(max > _specs[index].extent()) _specs[index] = RegionSpec_t(index, max); } @@ -709,25 +719,23 @@ class HaloSpec { }; // HaloSpec template -std::ostream& operator<<( std::ostream & os, const HaloSpec& hs) -{ - std::ostringstream ss; - ss << "dash::halo::HaloSpec<" << NumDimensions << ">("; +std::ostream& operator<<(std::ostream& os, const HaloSpec& hs) { + os << "dash::halo::HaloSpec<" << NumDimensions << ">("; bool begin = true; for(const auto& region_spec : hs.specs()) { if(region_spec.extent() > 0) { if(begin) { - ss << region_spec; + os << region_spec; begin = false; } else { - ss << "," << region_spec; + os << "," << region_spec; } } } - ss << "; number region: " << hs.num_regions(); - ss << ")"; + os << "; number region: " << hs.num_regions(); + os << ")"; - return operator<<(os, ss.str()); + return os; } /** @@ -875,7 +883,7 @@ class RegionIter { return _pattern->local_index(glob_coords(_idx)); } - const ViewSpec_t viewspec() const { return _region_view; } + const ViewSpec_t view() const { return _region_view; } inline bool is_relative() const noexcept { return true; } @@ -1020,13 +1028,11 @@ template std::ostream& operator<<( std::ostream& os, - const RegionIter& i) { - std::ostringstream ss; - dash::GlobPtr ptr(i); - ss << "dash::HaloBlockIter<" << typeid(ElementT).name() << ">(" - << "idx:" << i._idx << ", " - << "gptr:" << ptr << ")"; - return operator<<(os, ss.str()); + const RegionIter& it) { + os << "dash::halo::RegionIter<" << typeid(ElementT).name() << ">(" + << "; idx: " << it.rpos() << "; view: " << it.view() << ")"; + + return os; } template -std::ostream& operator<<( - std::ostream & os, - const Region & region) { - std::ostringstream ss; - ss << "dash::halo::Region<" << typeid(ElementT).name() << ">" - //<< "( view: " << region.view() - //<< "; region spec: " << region.spec() - //<< "; global borders: " << region.border() - //<< "; border region: " << region.is_border_region() - //<< "; custom region: " << region.is_custom_region() - //<< "; begin iterator: " << region.begin() - //<< "; end iterator: " << region.begin() - << ")"; - - return operator<<(os, ss.str()); -} +std::ostream& operator<<(std::ostream& os, + const Region& region) { + os << "dash::halo::Region<" << typeid(ElementT).name() << ">" + << "( view: " << region.view() << "; region spec: " << region.spec() + << "; border regions: {"; + const auto& border = region.border(); + for(auto d = 0; d < border.size(); ++d) { + if(d == 0) + os << border[d]; + else + os << "," << border[d]; + } + os << "}" + << "; is border: " << region.is_border_region() + << "; is custom: " << region.is_custom_region() + << "; begin iterator: " << region.begin() + << "; end iterator: " << region.begin() << ")"; + return os; +} /** * Takes the local part of the NArray and builds halo and @@ -1163,7 +1172,6 @@ class HaloBlock { const GlobBoundSpec_t& bound_spec = GlobBoundSpec_t{}) : _globmem(globmem), _pattern(pattern), _view(view), _halo_reg_spec(halo_reg_spec), _view_local(_view.extents()) { - // setup local views _view_inner = _view_local; _view_inner_with_boundaries = _view_local; @@ -1267,13 +1275,14 @@ class HaloBlock { */ for(dim_t d = 0; d < NumDimensions; ++d) { const auto global_offset = view.offset(d); - const auto view_extent = _view_local.extent(d); + const auto view_extent = _view_local.extent(d); auto bnd_elem_offsets = _view.offsets(); auto bnd_elem_extents = _view_local.extents(); bnd_elem_extents[d] = _halo_extents_max[d].first; for(auto d_tmp = 0; d_tmp < d; ++d_tmp) { - bnd_elem_offsets[d_tmp] -= _view.offset(d_tmp) - _halo_extents_max[d_tmp].first; + bnd_elem_offsets[d_tmp] -= + _view.offset(d_tmp) - _halo_extents_max[d_tmp].first; bnd_elem_extents[d_tmp] -= _halo_extents_max[d_tmp].first + _halo_extents_max[d_tmp].second; } @@ -1281,10 +1290,10 @@ class HaloBlock { _view_inner.resize_dim( d, _halo_extents_max[d].first, view_extent - _halo_extents_max[d].first - _halo_extents_max[d].second); - if(bound_spec[d] == BoundaryProp::NONE ) { + if(bound_spec[d] == BoundaryProp::NONE) { auto safe_offset = global_offset; auto safe_extent = view_extent; - if(global_offset < _halo_extents_max[d].first) { + if(global_offset < _halo_extents_max[d].first) { safe_offset = _halo_extents_max[d].first; safe_extent -= _halo_extents_max[d].first - global_offset; } else { @@ -1292,7 +1301,8 @@ class HaloBlock { push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, _halo_extents_max, bound_spec); } - auto check_extent = global_offset + view_extent + _halo_extents_max[d].second; + auto check_extent = + global_offset + view_extent + _halo_extents_max[d].second; if(check_extent > _pattern.extent(d)) { safe_extent -= check_extent - _pattern.extent(d); } else { @@ -1301,7 +1311,8 @@ class HaloBlock { push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, _halo_extents_max, bound_spec); } - _view_inner_with_boundaries.resize_dim(d, safe_offset - global_offset, safe_extent); + _view_inner_with_boundaries.resize_dim(d, safe_offset - global_offset, + safe_extent); } else { bnd_elem_offsets[d] -= global_offset; push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, _halo_extents_max, @@ -1406,9 +1417,7 @@ class HaloBlock { * Returns a set of local views that contains all boundary elements. * No duplicates of elements included. */ - const BoundaryViews_t& boundary_views() const { - return _boundary_views; - } + const BoundaryViews_t& boundary_views() const { return _boundary_views; } /** * Number of halo elements @@ -1458,7 +1467,7 @@ class HaloBlock { if(bound_spec[d_tmp] == BoundaryProp::NONE) { if(offsets[d_tmp] < halo_exts_max[d_tmp].first) { offsets[d_tmp] = halo_exts_max[d_tmp].first; - tmp[d_tmp] = halo_exts_max[d_tmp].first; + tmp[d_tmp] = halo_exts_max[d_tmp].first; extents[d_tmp] -= halo_exts_max[d_tmp].first; } auto check_extent_tmp = @@ -1508,13 +1517,10 @@ class HaloBlock { }; // class HaloBlock template -std::ostream& operator<<( - std::ostream & os, - const HaloBlock& haloblock) -{ - std::ostringstream ss; +std::ostream& operator<<(std::ostream& os, + const HaloBlock& haloblock) { bool begin = true; - ss << "dash::halo::HaloBlock<" << typeid(ElementT).name() << ">(" + os << "dash::halo::HaloBlock<" << typeid(ElementT).name() << ">(" << "view global: " << haloblock.view() << "; halo spec: " << haloblock.halo_spec() << "; view local: " << haloblock.view_local() @@ -1523,32 +1529,29 @@ std::ostream& operator<<( << "; halo regions { "; for(const auto& region : haloblock.halo_regions()) { if(begin) { - ss << region; + os << region; begin = false; } else { - ss << "," << region; + os << "," << region; } } - ss << " } " - << "; halo elems: " << haloblock.halo_size() - << "; boundary regions: { "; + os << " } " + << "; halo elems: " << haloblock.halo_size() << "; boundary regions: { "; for(const auto& region : haloblock.boundary_regions()) { if(begin) { - ss << region; + os << region; begin = false; } else { - ss << "," << region; + os << "," << region; } } - ss << " } " + os << " } " << "; boundary views: " << haloblock.boundary_views() - << "; boundary elems: " << haloblock.boundary_size() - << ")"; + << "; boundary elems: " << haloblock.boundary_size() << ")"; - return operator<<(os, ss.str()); + return os; } - /** * Mangages the memory for all halo regions provided by the given * \ref HaloBlock @@ -1591,7 +1594,9 @@ class HaloMemory { * \return Pointer to the first halo element or nullptr if the * region doesn't exist */ - Element_t* first_element_at(region_index_t index) { return _halo_offsets[index]; } + Element_t* first_element_at(region_index_t index) { + return _halo_offsets[index]; + } /** * Pointer to the first halo element @@ -1662,7 +1667,7 @@ class HaloMemory { off = off * extents[d] + coords[d]; } else { off = coords[NumDimensions - 1]; - for(dim_t d = NumDimensions - 1; d > 0; ) { + for(dim_t d = NumDimensions - 1; d > 0;) { --d; off = off * extents[d] + coords[d]; } diff --git a/dash/include/dash/halo/HaloMatrixWrapper.h b/dash/include/dash/halo/HaloMatrixWrapper.h index e60a1df84..0f993e98d 100644 --- a/dash/include/dash/halo/HaloMatrixWrapper.h +++ b/dash/include/dash/halo/HaloMatrixWrapper.h @@ -84,8 +84,7 @@ class HaloMatrixWrapper { _view_global(matrix.local.offsets(), matrix.local.extents()), _haloblock(matrix.begin().globmem(), matrix.pattern(), _view_global, _halo_spec, cycle_spec), - _view_local(_haloblock.view_local()), - _halomemory(_haloblock) { + _view_local(_haloblock.view_local()), _halomemory(_haloblock) { for(const auto& region : _haloblock.halo_regions()) { if(region.size() == 0) continue; @@ -94,8 +93,8 @@ class HaloMatrixWrapper { pattern_size_t num_elems_block = 1; auto rel_dim = region.spec().relevant_dim(); auto level = region.spec().level(); - auto* off = _halomemory.first_element_at(region.index()); - auto it = region.begin(); + auto* off = _halomemory.first_element_at(region.index()); + auto it = region.begin(); if(MemoryArrange == ROW_MAJOR) { if(level == 1) { //|| (level == 2 && region.regionSpec()[0] != 1)) { @@ -176,7 +175,7 @@ class HaloMatrixWrapper { pattern_size_t stride = (num_blocks > 1) ? std::abs(it_dist.lpos().index - it.lpos().index) : 1; - auto ds_stride = dart_storage(stride); + auto ds_stride = dart_storage(stride); dart_datatype_t stride_type; dart_type_create_strided(ds_num_elems_block.dtype, ds_stride.nelem, @@ -444,9 +443,10 @@ class HaloMatrixWrapper { StencilOperator stencil_operator( const StencilSpecT& stencil_spec) { for(const auto& stencil : stencil_spec.specs()) { - DASH_ASSERT_MSG(stencil.max() - <= _halo_spec.extent(RegionSpec::index(stencil)), - "Stencil point extent higher than halo region extent."); + DASH_ASSERT_MSG( + stencil.max() + <= _halo_spec.extent(RegionSpec::index(stencil)), + "Stencil point extent higher than halo region extent."); } return StencilOperator( @@ -455,9 +455,9 @@ class HaloMatrixWrapper { private: struct Data { - const Region_t& region; + const Region_t& region; std::function get_halos; - dart_handle_t handle = DART_HANDLE_NULL; + dart_handle_t handle = DART_HANDLE_NULL; }; void update_halo_intern(Data& data) { diff --git a/dash/include/dash/halo/StencilOperator.h b/dash/include/dash/halo/StencilOperator.h index 23e871be1..75610f7ca 100644 --- a/dash/include/dash/halo/StencilOperator.h +++ b/dash/include/dash/halo/StencilOperator.h @@ -7,6 +7,218 @@ namespace dash { namespace halo { +// Forward declaration +template +class StencilOperator; + +/** + * Proxy StencilOperator for inner elements only + */ +template +class StencilOperatorInner { +private: + static constexpr auto NumStencilPoints = StencilSpecT::num_stencil_points(); + static constexpr auto NumDimensions = PatternT::ndim(); + + using StencilOperator_t = StencilOperator; + +public: + using ViewSpec_t = typename StencilOperator_t::ViewSpec_t; + using ElementCoords_t = typename StencilOperator_t::ElementCoords_t; + using iterator = typename StencilOperator_t::iterator_inner; + using const_iterator = const iterator; + +public: + StencilOperatorInner(const StencilOperator_t* stencil_op) + : _stencil_op(stencil_op) {} + + /** + * Returns the begin iterator for all inner elements + */ + iterator begin() noexcept { return _stencil_op->_ibegin; } + + /** + * Returns the begin const iterator for all inner elements + */ + const_iterator begin() const noexcept { return _stencil_op->_ibegin; } + + /** + * Returns the end iterator for all inner elements + */ + iterator end() noexcept { return _stencil_op->_iend; } + + /** + * Returns the end const iterator for all inner elements + */ + const_iterator end() const noexcept { return _stencil_op->_iend; } + + /** + * Returns a view for all inner elements + */ + const ViewSpec_t& view() const { return _stencil_op->_spec_views.inner(); } + + /** + * Modifies all stencil point elements and the center within the inner view. + * The stencil points are multiplied with their coefficent (\ref StencilPoint) + * and the center is multiplied with the given center coefficient. The + * results then modifies the center/stencil point elements via the given + * operation. + * + * \param coords center coordinate + * \param value base value for all points + * \param coefficient for center + * \param op operation to use (e.g. std::plus). default: replace + */ + void set_values_at( + const ElementCoords_t& coords, ElementT value, ElementT coefficient_center, + std::function op = + [](const ElementT& lhs, const ElementT& rhs) { return rhs; }) { + auto* center = _stencil_op->_local_memory + _stencil_op->get_offset(coords); + + *center = op(*center, coefficient_center * value); + for(auto i = 0; i < NumStencilPoints; ++i) { + auto& stencil_point_value = center[_stencil_op->_stencil_offsets[i]]; + stencil_point_value = + op(stencil_point_value, + _stencil_op->_stencil_spec[i].coefficient() * value); + } + } + +private: + const StencilOperator_t* _stencil_op; +}; + +/** + * Proxy StencilOperator for boundary elements only + */ +template +class StencilOperatorBoundary { +private: + static constexpr auto NumStencilPoints = StencilSpecT::num_stencil_points(); + static constexpr auto NumDimensions = PatternT::ndim(); + + using StencilOperator_t = StencilOperator; + using pattern_size_t = typename StencilOperator_t::pattern_size_t; + using StencilSpecViews_t = typename StencilOperator_t::StencilSpecViews_t; + +public: + using ViewSpec_t = typename StencilOperator_t::ViewSpec_t; + using ElementCoords_t = typename StencilOperator_t::ElementCoords_t; + using iterator = typename StencilOperator_t::iterator_bnd; + using const_iterator = const iterator; + using BoundaryViews_t = typename StencilSpecViews_t::BoundaryViews_t; + +public: + StencilOperatorBoundary(const StencilOperator_t* stencil_op) + : _stencil_op(stencil_op) {} + + /** + * Returns the begin iterator for all boundary elements + */ + iterator begin() noexcept { return _stencil_op->_bbegin; } + + /** + * Returns the begin const iterator for all boundary elements + */ + const_iterator begin() const noexcept { return _stencil_op->_bbegin; } + + /** + * Returns the end iterator for all boundary elements + */ + iterator end() noexcept { return _stencil_op->_bend; } + + /** + * Returns the end const iterator for all boundary elements + */ + const_iterator end() const noexcept { return _stencil_op->_bend; } + + /** + * Returns all boundary views including all boundary elements (no dublicates) + * Unlike the view methods in \ref StencilOperator and + * \ref StencilOperatorInner this method returns a container with + * views. + */ + const BoundaryViews_t& view() const { + return _stencil_op->spec_views.boundary_views(); + } + + /** + * Returns the number of all boundary elements (no dublicates) + */ + pattern_size_t boundary_size() const { + return _stencil_op->spec_views.boundary_size(); + } + + /** + * Modifies all stencil point elements and the center wth halo check. + * If a stencil point points to a halo element or a non existing element + * no operation is performed for this one. + * The stencil points are multiplied with their coefficent (\ref StencilPoint) + * and the center is multiplied with the given center coefficient. The + * results then modifies the center/stencil point elements via the given + * operation. + * + * \param coords center coordinate + * \param value base value for all points + * \param coefficient for center + * \param op operation to use (e.g. std::plus). default: replace + */ + void set_values_at( + const ElementCoords_t& coords, ElementT value, ElementT coefficient_center, + std::function op = + [](const ElementT& lhs, const ElementT& rhs) { return rhs; }) { + auto* center = _stencil_op->_local_memory + _stencil_op->get_offset(coords); + + *center = op(*center, coefficient_center * value); + for(auto i = 0; i < NumStencilPoints; ++i) { + bool halo = false; + for(auto d = 0; d < NumDimensions; ++d) { + auto coord_value = coords[d] + _stencil_op->_stencil_spec[i][d]; + if(coord_value < 0 + || coord_value >= _stencil_op->_view_local->extent(d)) { + halo = true; + break; + } + } + + if(halo) + continue; + + auto& stencil_point_value = center[_stencil_op->_stencil_offsets[i]]; + stencil_point_value = + op(stencil_point_value, + _stencil_op->_stencil_spec[i].coefficient() * value); + } + } + + /** + * Returns a pair of boundary iterators (first: begin, second: end) for a + * given dimension and \ref RegionPos. Iterators of higher dimensions will + * not iterate over elements which were already covered in lower dimensions. + * Using all iterators for all dimensions and \ref RegionPos has the same + * effect as using bbegin and bend. + */ + std::pair iterator_at(dim_t dim, RegionPos pos) { + DASH_ASSERT_LT(dim, NumDimensions, "Given dimension to great"); + const auto& bnd_views = _stencil_op->_spec_views.boundary_views(); + pattern_size_t offset = 0; + auto it_views = std::begin(bnd_views); + for(dim_t d = 0; d < dim; ++d, ++it_views) + offset += it_views->size() + (++it_views)->size(); + + if(pos == RegionPos::POST) { + offset += it_views->size(); + ++it_views; + } + + auto it_begin = _stencil_op->_bbegin + offset; + + return std::make_pair(it_begin, it_begin + it_views->size()); + } + +private: + const StencilOperator_t* _stencil_op; +}; /** * The StencilOperator provides stencil specific iterator and functions for @@ -51,16 +263,25 @@ class StencilOperator { using signed_pattern_size_t = typename std::make_signed::type; using pattern_index_t = typename PatternT::index_type; + template + friend class StencilOperatorInner; + + template + friend class StencilOperatorBoundary; + public: - using iterator = StencilIterator; + using Inner_t = StencilOperatorInner; + using Bnd_t = StencilOperatorBoundary; + + using iterator = + StencilIterator; using const_iterator = const iterator; - using iterator_inner = StencilIterator; - using const_iterator_inner = const iterator_inner; - using iterator_bnd = StencilIterator; - using const_iterator_bnd = const iterator_bnd; + using iterator_inner = + StencilIterator; + using const_iterator_inner = const iterator; + using iterator_bnd = StencilIterator; + using const_iterator_bnd = const iterator; using StencilOffsets_t = typename iterator::StencilOffsets_t; using HaloBlock_t = HaloBlock; @@ -68,9 +289,9 @@ class StencilOperator { using ViewSpec_t = ViewSpec; using ElementCoords_t = std::array; - using StencilSpecViews_t = StencilSpecificViews; + using StencilSpecViews_t = StencilSpecificViews; - using region_index_t = typename RegionSpec::region_index_t; + using region_index_t = typename RegionSpec::region_index_t; public: /** @@ -78,100 +299,50 @@ class StencilOperator { * a \ref StencilSpec and a local \ref ViewSpec */ StencilOperator(const HaloBlock_t* haloblock, HaloMemory_t* halomemory, - const StencilSpecT& stencil_spec, - const ViewSpec_t* view_local) - : _halo_block(haloblock), _halo_memory(halomemory), - _stencil_spec(stencil_spec), _view_local(view_local), - _stencil_offsets(set_stencil_offsets()), + const StencilSpecT& stencil_spec, + const ViewSpec_t* view_local) + : inner(this), boundary(this), _halo_block(haloblock), + _halo_memory(halomemory), _stencil_spec(stencil_spec), + _view_local(view_local), _stencil_offsets(set_stencil_offsets()), _local_memory((ElementT*) _halo_block->globmem().lbegin()), _spec_views(*_halo_block, _stencil_spec, _view_local), _begin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, - _spec_views.inner_with_boundaries(), *_view_local, 0), + *_view_local, _spec_views.inner_with_boundaries(), 0), _end(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, - _spec_views.inner_with_boundaries(), *_view_local, - _spec_views.inner_with_boundaries().size()), + *_view_local, _spec_views.inner_with_boundaries(), + _spec_views.inner_with_boundaries().size()), _ibegin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, - _spec_views.inner(), *_view_local, 0), + *_view_local, _spec_views.inner(), 0), _iend(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, - _spec_views.inner(), *_view_local, _spec_views.inner().size()), - _bbegin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, *_view_local, _spec_views.boundary_views(), 0), - _bend(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, *_view_local, _spec_views.boundary_views(), + *_view_local, _spec_views.inner(), _spec_views.inner().size()), + _bbegin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, + *_view_local, _spec_views.boundary_views(), 0), + _bend(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, + *_view_local, _spec_views.boundary_views(), _spec_views.boundary_size()) {} - StencilOperator() = delete; - - /// returns the begin iterator for all relevant elements (inner + boundary) + /** + * Returns the begin iterator for all relevant elements (inner + boundary) + */ iterator begin() noexcept { return _begin; } - /// returns the begin const iterator for all relevant elements - /// (inner + boundary) + /** + * Returns the begin const iterator for all relevant elements + * (inner + boundary) + */ const_iterator begin() const noexcept { return _begin; } - /// returns the end iterator for all relevant elements (inner + boundary) + /** + * Returns the end iterator for all relevant elements (inner + boundary) + */ iterator end() noexcept { return _end; } - /// returns the end const iterator for all relevant elements (inner + - /// boundary) + /** + * Returns the end const iterator for all relevant elements (inner + + * boundary) + */ const_iterator end() const noexcept { return _end; } - /// returns the begin iterator for all inner elements - iterator_inner ibegin() noexcept { return _ibegin; } - - /// returns the begin const iterator for all inner elements - const_iterator_inner ibegin() const noexcept { return _ibegin; } - - /// returns the end iterator for all inner elements - iterator_inner iend() noexcept { return _iend; } - - /// returns the end const iterator for all inner elements - const_iterator_inner iend() const noexcept { return _iend; } - - /// returns the begin iterator for all boundary elements - iterator_bnd bbegin() noexcept { return _bbegin; } - - /// returns the begin const iterator for all boundary elements - const_iterator_bnd bbegin() const noexcept { return _bbegin; } - - /// returns the end iterator for all boundary elements - iterator_bnd bend() noexcept { return _bend; } - - /// returns the end const iterator for all boundary elements - const_iterator_bnd bend() const noexcept { return _bend; } - - std::pair boundary_iterator_at(dim_t dim, RegionPos pos) { - DASH_ASSERT_LT(dim, NumDimensions, "Given dimension to great"); - const auto& bnd_views = _spec_views.boundary_views(); - pattern_size_t offset = 0; - auto it_views = std::begin(bnd_views); - for(dim_t d = 0; d < dim; ++d, ++it_views) - offset += it_views->size() + (++it_views)->size(); - - if(pos == RegionPos::POST) { - offset += it_views->size(); - ++it_views; - } - - //iterator_bnd it_begin(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, *_view_local, _spec_views.boundary_views(), offset); - //iterator_bnd it_end(_local_memory, _halo_memory, &_stencil_spec, &_stencil_offsets, *_view_local, _spec_views.boundary_views(), offset + it_views->size()); - auto it_begin = _bbegin + offset; - - return std::make_pair(it_begin, it_begin + it_views->size()); - } - - /* - /// returns the begin iterator for all boundary elements - iterator_bnd bbegin() noexcept { return _bbegin; } - - /// returns the begin const iterator for all boundary elements - const_iterator_bnd bbegin() const noexcept { return _bbegin; } - - /// returns the end iterator for all boundary elements - iterator_bnd bend() noexcept { return _bend; } - - /// returns the end const iterator for all boundary elements - const_iterator_bnd bend() const noexcept { return _bend; } -*/ - /** * Returns the \ref HaloBlock */ @@ -187,86 +358,17 @@ class StencilOperator { */ HaloMemory_t& halo_memory() { return *_halo_memory; } - const StencilSpecViews_t& spec_views() const { - return _spec_views; - } - - const ViewSpec_t& view_inner() const { - return _spec_views.inner(); - } - - const ViewSpec_t& view_inner_with_boundaries() const { - return _spec_views.inner_with_boundaries(); - } - - - /** - * Modifies all stencil point elements and the center within the inner view. - * The stencil points are multiplied with their coefficent (\ref StencilPoint) - * and the center is multiplied with the given center coefficient. The - * results then modifies the center/stencil point elements via the given - * operation. - * - * \param coords center coordinate - * \param value base value for all points - * \param coefficient for center - * \param op operation to use (e.g. std::plus). default: replace + * Returns the \ref StencilSpecificView */ - void set_value_at_inner_local( - const ElementCoords_t& coords, ElementT value, ElementT coefficient_center, - std::function op = - [](const ElementT& lhs, const ElementT& rhs) { return rhs; }) { - auto* center = _local_memory + get_offset(coords); - - *center = op(*center, coefficient_center * value); - for(auto i = 0; i < NumStencilPoints; ++i) { - auto& stencil_point_value = center[_stencil_offsets[i]]; - stencil_point_value = - op(stencil_point_value, _stencil_spec[i].coefficient() * value); - } - } + const StencilSpecViews_t& spec_views() const { return _spec_views; } /** - * Modifies all stencil point elements and the center wth halo check. - * If a stencil point points to a halo element or a non existing element - * no operation is performed for this one. - * The stencil points are multiplied with their coefficent (\ref StencilPoint) - * and the center is multiplied with the given center coefficient. The - * results then modifies the center/stencil point elements via the given - * operation. - * - * \param coords center coordinate - * \param value base value for all points - * \param coefficient for center - * \param op operation to use (e.g. std::plus). default: replace + * Returns a \ref SpecView including inner and boundary elements */ - void set_value_at_boundary_local( - const ElementCoords_t& coords, ElementT value, ElementT coefficient_center, - std::function op = - [](const ElementT& lhs, const ElementT& rhs) { return rhs; }) { - auto* center = _local_memory + get_offset(coords); - - *center = op(*center, coefficient_center * value); - for(auto i = 0; i < NumStencilPoints; ++i) { - bool halo = false; - for(auto d = 0; d < NumDimensions; ++d) { - auto coord_value = coords[d] + _stencil_spec[i][d]; - if(coord_value < 0 || coord_value >= _view_local->extent(d)) { - halo = true; - break; - } - } - - if(halo) - continue; - - auto& stencil_point_value = center[_stencil_offsets[i]]; - stencil_point_value = - op(stencil_point_value, _stencil_spec[i].coefficient() * value); - } - } + const ViewSpec_t& view() const { return _spec_views.inner_with_boundaries(); } + /* ElementT get_value_at_inner_local( const ElementCoords_t& coords, ElementT coefficient_center, std::function op = @@ -304,15 +406,16 @@ class StencilOperator { _halo_memory->to_halo_mem_coords(index, coords_stencil); - value = op(value, *( halomem_pos + _halo_memory->offset(index, coords_stencil))); - } else { - auto& stencil_point_value = center[_stencil_offsets[i]]; - value = op(value, _stencil_spec[i].coefficient() * stencil_point_value); + value = op(value, *( halomem_pos + _halo_memory->offset(index, + coords_stencil))); } else { auto& stencil_point_value = + center[_stencil_offsets[i]]; value = op(value, _stencil_spec[i].coefficient() + * stencil_point_value); } } return value; } + */ private: StencilOffsets_t set_stencil_offsets() { @@ -354,14 +457,18 @@ class StencilOperator { return offset; } +public: + Inner_t inner; + Bnd_t boundary; + private: - const HaloBlock_t* _halo_block; - HaloMemory_t* _halo_memory; - const StencilSpecT _stencil_spec; - const ViewSpec_t* _view_local; - StencilOffsets_t _stencil_offsets; - ElementT* _local_memory; - StencilSpecViews_t _spec_views; + const HaloBlock_t* _halo_block; + HaloMemory_t* _halo_memory; + const StencilSpecT _stencil_spec; + const ViewSpec_t* _view_local; + StencilOffsets_t _stencil_offsets; + ElementT* _local_memory; + StencilSpecViews_t _spec_views; iterator _begin; iterator _end; diff --git a/dash/include/dash/halo/iterator/StencilIterator.h b/dash/include/dash/halo/iterator/StencilIterator.h index 5422db8ad..f4e6d6aa3 100644 --- a/dash/include/dash/halo/iterator/StencilIterator.h +++ b/dash/include/dash/halo/iterator/StencilIterator.h @@ -11,59 +11,88 @@ namespace dash { namespace halo { -enum class StencilViewScope : std::uint8_t { INNER, BOUNDARY, ALL }; +/** + * View property of the StencilIterator + */ +enum class StencilViewScope : std::uint8_t { + /// inner elements only + INNER, + /// Boundary elements only + BOUNDARY, + /// Inner and boundary elements + ALL +}; + +static std::ostream& operator<<(std::ostream& os, + const StencilViewScope& scope) { + if(scope == StencilViewScope::INNER) + os << "INNER"; + else if(scope == StencilViewScope::BOUNDARY) + os << "BOUNDARY"; + else + os << "ALL"; + + return os; +} +/** + * Adapts all views \ref HaloBlock provides to the given \ref StencilSpec. + */ template class StencilSpecificViews { private: static constexpr auto NumDimensions = HaloBlockT::ndim(); using Pattern_t = typename HaloBlockT::Pattern_t; + public: using ViewSpec_t = typename HaloBlockT::ViewSpec_t; using BoundaryViews_t = typename HaloBlockT::BoundaryViews_t; using pattern_size_t = typename Pattern_t::size_type; public: - StencilSpecificViews(const HaloBlockT& haloblock, - const StencilSpecT& stencil_spec, - const ViewSpec_t* view_local) + StencilSpecificViews(const HaloBlockT& haloblock, + const StencilSpecT& stencil_spec, + const ViewSpec_t* view_local) : _view_local(view_local) { auto minmax_dist = stencil_spec.minmax_distances(); for(auto& dist : minmax_dist) dist.first = std::abs(dist.first); - auto inner_off = haloblock.view_inner().offsets(); - auto inner_ext = haloblock.view_inner().extents(); + auto inner_off = haloblock.view_inner().offsets(); + auto inner_ext = haloblock.view_inner().extents(); auto inner_bound_off = haloblock.view_inner_with_boundaries().offsets(); auto inner_bound_ext = haloblock.view_inner_with_boundaries().extents(); for(auto d = 0; d < NumDimensions; ++d) { resize_offset(inner_off[d], inner_ext[d], minmax_dist[d].first); - resize_extent(inner_off[d], inner_ext[d],_view_local->extent(d), minmax_dist[d].second); - resize_offset(inner_bound_off[d], inner_bound_ext[d], minmax_dist[d].first); - resize_extent(inner_bound_off[d], inner_bound_ext[d],_view_local->extent(d), minmax_dist[d].second); + resize_extent(inner_off[d], inner_ext[d], _view_local->extent(d), + minmax_dist[d].second); + resize_offset(inner_bound_off[d], inner_bound_ext[d], + minmax_dist[d].first); + resize_extent(inner_bound_off[d], inner_bound_ext[d], + _view_local->extent(d), minmax_dist[d].second); } - _view_inner = ViewSpec_t(inner_off, inner_ext); + _view_inner = ViewSpec_t(inner_off, inner_ext); _view_inner_with_boundaries = ViewSpec_t(inner_bound_off, inner_bound_ext); using RegionCoords_t = RegionCoords; using region_index_t = typename RegionCoords_t::region_index_t; - const auto& bnd_elems = haloblock.boundary_views(); + const auto& bnd_elems = haloblock.boundary_views(); const auto& halo_ext_max = haloblock.halo_extension_max(); _boundary_views.reserve(NumDimensions * 2); auto it_views = std::begin(bnd_elems); for(dim_t d = 0; d < NumDimensions; ++d) { - region_index_t index = RegionCoords_t::index(d, RegionPos::PRE); - auto* region = haloblock.boundary_region(index); + region_index_t index = RegionCoords_t::index(d, RegionPos::PRE); + auto* region = haloblock.boundary_region(index); if(region == nullptr || (region != nullptr && region->size() == 0)) _boundary_views.push_back(ViewSpec_t()); else { push_boundary_views(*it_views, halo_ext_max, minmax_dist); ++it_views; } - index = RegionCoords_t::index(d, RegionPos::POST); + index = RegionCoords_t::index(d, RegionPos::POST); region = haloblock.boundary_region(index); if(region == nullptr || (region != nullptr && region->size() == 0)) _boundary_views.push_back(ViewSpec_t()); @@ -74,23 +103,37 @@ class StencilSpecificViews { } } - const ViewSpec_t& local() const { return *_view_local; } + /** + * Returns \ref ViewSpec including all elements (locally) + */ + const ViewSpec_t& view() const { return *_view_local; } + /** + * Returns \ref ViewSpec including all inner elements + */ const ViewSpec_t& inner() const { return _view_inner; } + /** + * Returns \ref ViewSpec including all inner and boundary elements + */ const ViewSpec_t& inner_with_boundaries() const { return _view_inner_with_boundaries; } - const BoundaryViews_t& boundary_views() const { - return _boundary_views; - } + /** + * Returns all boundary views including all boundary elements (no dublicates) + */ + const BoundaryViews_t& boundary_views() const { return _boundary_views; } - pattern_size_t boundary_size() const { return _size_bnd_elems;} + /** + * Returns the number of all boundary elements (no dublicates) + */ + pattern_size_t boundary_size() const { return _size_bnd_elems; } private: - template - void push_boundary_views(const ViewSpec_t& view, const MaxExtT& max_ext, const MaxDistT& max_dist) { + template + void push_boundary_views(const ViewSpec_t& view, const MaxExtT& max_ext, + const MaxDistT& max_dist) { auto view_off = view.offsets(); auto view_ext = view.extents(); for(auto d = 0; d < NumDimensions; ++d) { @@ -101,7 +144,8 @@ class StencilSpecificViews { view_off[d] += max_ext[d].second - max_dist[d].second; } else { resize_offset(view_off[d], view_ext[d], max_dist[d].first); - resize_extent(view_off[d], view_ext[d], _view_local->extent(d), max_dist[d].second); + resize_extent(view_off[d], view_ext[d], _view_local->extent(d), + max_dist[d].second); } } ViewSpec_t tmp(view_off, view_ext); @@ -109,7 +153,7 @@ class StencilSpecificViews { _boundary_views.push_back(std::move(tmp)); } - template + template void resize_offset(OffT& offset, ExtT& extent, MaxT max) { if(offset > max) { extent += offset - max; @@ -117,7 +161,7 @@ class StencilSpecificViews { } } - template + template void resize_extent(OffT& offset, ExtT& extent, ExtT extent_local, MinT max) { auto diff_ext = extent_local - offset - extent; if(diff_ext > max) @@ -125,32 +169,32 @@ class StencilSpecificViews { } private: - const ViewSpec_t* _view_local; - ViewSpec_t _view_inner; - ViewSpec_t _view_inner_with_boundaries; - BoundaryViews_t _boundary_views; - pattern_size_t _size_bnd_elems = 0; + const ViewSpec_t* _view_local; + ViewSpec_t _view_inner; + ViewSpec_t _view_inner_with_boundaries; + BoundaryViews_t _boundary_views; + pattern_size_t _size_bnd_elems = 0; }; template std::ostream& operator<<( - std::ostream & os, - const StencilSpecificViews & stencil_views) -{ + std::ostream& os, + const StencilSpecificViews& stencil_views) { std::ostringstream ss; - ss << "dash::StencilSpecificViews" + ss << "dash::halo::StencilSpecificViews" << "(local: " << stencil_views.local() << "; inner: " << stencil_views.inner() << "; inner_bound: " << stencil_views.inner_with_boundaries() << "; boundary_views: " << stencil_views.boundary_views() - << "; boundary elems: " << stencil_views.boundary_size() - << ")"; + << "; boundary elems: " << stencil_views.boundary_size() << ")"; return operator<<(os, ss.str()); } /* - * Iterator with stencil points and halo access \see HaloStencilOperator. + * Stencil specific iterator to iterate over a given scope of elements. + * The iterator provides element access via stencil points and for boundary + * elements halo element access. */ template @@ -162,8 +206,8 @@ class StencilIterator { static constexpr auto FastestDimension = MemoryArrange == ROW_MAJOR ? NumDimensions - 1 : 0; - using Self_t = StencilIterator; - using ViewSpec_t = typename PatternT::viewspec_type; + using Self_t = StencilIterator; + using ViewSpec_t = typename PatternT::viewspec_type; using pattern_size_t = typename PatternT::size_type; using signed_pattern_size_t = typename std::make_signed::type; using RegionCoords_t = RegionCoords; @@ -185,8 +229,8 @@ class StencilIterator { using StencilP_t = StencilPoint; using ElementCoords_t = std::array; using StencilOffsets_t = std::array; - using StencilSpecViews_t = StencilSpecificViews; - using BoundaryViews_t = typename StencilSpecViews_t::BoundaryViews_t; + using StencilSpecViews_t = StencilSpecificViews; + using BoundaryViews_t = typename StencilSpecViews_t::BoundaryViews_t; public: /** @@ -196,20 +240,19 @@ class StencilIterator { * \param halomemory \ref HaloMemory instance for loacl halo memory * \param stencil_spec \ref StencilSpec to use * \param stencil_offsets stencil offsets for every stencil point - * \param spec_views stencil specific views \ref StencilSpecViews + * \param view_local local \ref SpecView including all local elements + * \param view_scope \ref ViewSpec to use * \param idx position of the iterator */ StencilIterator(ElementT* local_memory, HaloMemory_t* halomemory, - const StencilSpecT* stencil_spec, - const StencilOffsets_t* stencil_offsets, - const ViewSpec_t& view, - const ViewSpec_t& view_local, - pattern_index_t idx) + const StencilSpecT* stencil_spec, + const StencilOffsets_t* stencil_offsets, + const ViewSpec_t& view_local, const ViewSpec_t& view_scope, + pattern_index_t idx) : _halomemory(halomemory), _stencil_spec(stencil_spec), - _stencil_offsets(stencil_offsets), _view(view), + _stencil_offsets(stencil_offsets), _view(view_scope), _local_memory(local_memory), _idx(idx), _local_layout(view_local.extents()) { - if(_idx < _view.size()) set_coords(); @@ -220,22 +263,31 @@ class StencilIterator { _local_layout.extent(FastestDimension) - ext_max.second - 1); } else { _ext_dim_reduced = - std::make_pair(std::abs(ext_max.first), _view.extent(FastestDimension) - - ext_max.second - 1); + std::make_pair(std::abs(ext_max.first), + _view.extent(FastestDimension) - ext_max.second - 1); } } + /** + * Constructor + * + * \param local_memory Pointer to the begining of the local NArray memory + * \param halomemory \ref HaloMemory instance for halo memory elements + * \param stencil_spec \ref StencilSpec to use + * \param stencil_offsets stencil offsets for every stencil point + * \param view_local local \ref SpecView including all local elements + * \param boundary_views all relevant boundary views + * \param idx position of the iterator + */ StencilIterator(ElementT* local_memory, HaloMemory_t* halomemory, - const StencilSpecT* stencil_spec, - const StencilOffsets_t* stencil_offsets, - const ViewSpec_t& view_local, - const BoundaryViews_t& boundary_views, - pattern_index_t idx) + const StencilSpecT* stencil_spec, + const StencilOffsets_t* stencil_offsets, + const ViewSpec_t& view_local, + const BoundaryViews_t& boundary_views, pattern_index_t idx) : _halomemory(halomemory), _stencil_spec(stencil_spec), _stencil_offsets(stencil_offsets), _boundary_views(boundary_views), _view(view_local.extents()), _local_memory(local_memory), _idx(idx), _local_layout(view_local.extents()) { - pattern_index_t size = 0; for(const auto& view : boundary_views) size += view.size(); @@ -245,8 +297,8 @@ class StencilIterator { const auto ext_max = stencil_spec->minmax_distances(FastestDimension); _ext_dim_reduced = - std::make_pair(std::abs(ext_max.first), _view.extent(FastestDimension) - - ext_max.second - 1); + std::make_pair(std::abs(ext_max.first), + _view.extent(FastestDimension) - ext_max.second - 1); } /** @@ -397,9 +449,7 @@ class StencilIterator { return res; } - difference_type operator-(Self_t& other) const { - return _idx - other._idx; - } + difference_type operator-(Self_t& other) const { return _idx - other._idx; } bool operator<(const Self_t& other) const { return compare(other, std::less()); @@ -439,8 +489,7 @@ class StencilIterator { return true; } #endif - if(&_view == &(other._view) - || _view == other._view) { + if(&_view == &(other._view) || _view == other._view) { return gidx_cmp(_idx, other._idx); } // TODO not the best solution @@ -559,7 +608,7 @@ class StencilIterator { return; _region_bound += _boundary_views[_region_number].size(); - } while (_idx >= _region_bound); + } while(_idx >= _region_bound); _coords = _local_layout.coords(0, _boundary_views[_region_number]); } } @@ -581,7 +630,7 @@ class StencilIterator { } _current_lmemory_addr = _local_memory + _offset; - //setup stencil point offsets + // setup stencil point offsets if(Scope == StencilViewScope::INNER) { for(auto i = 0; i < NumStencilPoints; ++i) _stencil_mem_ptr[i] = _current_lmemory_addr + (*_stencil_offsets)[i]; diff --git a/dash/test/halo/HaloTest.cc b/dash/test/halo/HaloTest.cc index b281b1ef9..9f09dcf86 100644 --- a/dash/test/halo/HaloTest.cc +++ b/dash/test/halo/HaloTest.cc @@ -256,10 +256,10 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) dash::Array sum_halo(dash::size()); dash::fill(sum_halo.begin(), sum_halo.end(),0); auto* sum_local = sum_halo.lbegin(); - auto stencil_op = halo_wrapper.stencil_operator(stencil_spec); - auto it_iend = stencil_op.iend(); - for(auto it = stencil_op.ibegin(); it != it_iend; ++it) { + + auto it_iend = stencil_op.inner.end(); + for(auto it = stencil_op.inner.begin(); it != it_iend; ++it) { for(auto i = 0; i < stencil_spec.num_stencil_points(); ++i) *sum_local += it.value_at(i); @@ -267,8 +267,8 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) } halo_wrapper.update(); - auto it_bend = stencil_op.bend(); - for(auto it = stencil_op.bbegin(); it != it_bend; ++it) { + auto it_bend = stencil_op.boundary.end(); + for(auto it = stencil_op.boundary.begin(); it != it_bend; ++it) { for(auto i = 0; i < stencil_spec.num_stencil_points(); ++i) *sum_local += it.value_at(i); @@ -323,8 +323,8 @@ unsigned long calc_sum_halo(HaloWrapperT& halo_wrapper, StencilOpT stencil_op, b dash::fill(sum_halo.begin(), sum_halo.end(),0); auto* sum_local = sum_halo.lbegin(); - auto it_iend = stencil_op.iend(); - for(auto it = stencil_op.ibegin(); it != it_iend; ++it) { + auto it_iend = stencil_op.inner.end(); + for(auto it = stencil_op.inner.begin(); it != it_iend; ++it) { for(auto i = 0; i < num_stencil_points; ++i) *sum_local += it.value_at(i); @@ -335,14 +335,14 @@ unsigned long calc_sum_halo(HaloWrapperT& halo_wrapper, StencilOpT stencil_op, b if(region_wise) { for( auto d = 0; d < 3; ++d) { - auto it_bnd = stencil_op.boundary_iterator_at(d, RegionPos::PRE); + auto it_bnd = stencil_op.boundary.iterator_at(d, RegionPos::PRE); for(auto it = it_bnd.first; it != it_bnd.second; ++it) { for(auto i = 0; i < num_stencil_points; ++i) *sum_local += it.value_at(i); *sum_local += *it; } - auto it_bnd_2 = stencil_op.boundary_iterator_at(d, RegionPos::POST); + auto it_bnd_2 = stencil_op.boundary.iterator_at(d, RegionPos::POST); for(auto it = it_bnd_2.first; it != it_bnd_2.second; ++it) { for(auto i = 0; i < num_stencil_points; ++i) *sum_local += it.value_at(i); @@ -351,8 +351,8 @@ unsigned long calc_sum_halo(HaloWrapperT& halo_wrapper, StencilOpT stencil_op, b } } } else { - auto it_bend = stencil_op.bend(); - for(auto it = stencil_op.bbegin(); it != it_bend; ++it) { + auto it_bend = stencil_op.boundary.end(); + for(auto it = stencil_op.boundary.begin(); it != it_bend; ++it) { for(auto i = 0; i < num_stencil_points; ++i) *sum_local += it.value_at(i); @@ -382,8 +382,8 @@ unsigned long calc_sum_halo_via_stencil(HaloWrapperT& halo_wrapper, StencilOpT s dash::fill(sum_halo.begin(), sum_halo.end(),0); auto* sum_local = sum_halo.lbegin(); - auto it_iend = stencil_op.iend(); - for(auto it = stencil_op.ibegin(); it != it_iend; ++it) { + auto it_iend = stencil_op.inner.end(); + for(auto it = stencil_op.inner.begin(); it != it_iend; ++it) { for(auto i = 0; i < num_stencil_points; ++i) *sum_local += it.value_at(stencil_spec[i]); @@ -392,8 +392,8 @@ unsigned long calc_sum_halo_via_stencil(HaloWrapperT& halo_wrapper, StencilOpT s halo_wrapper.wait(); - auto it_bend = stencil_op.bend(); - for(auto it = stencil_op.bbegin(); it != it_bend; ++it) { + auto it_bend = stencil_op.boundary.end(); + for(auto it = stencil_op.boundary.begin(); it != it_bend; ++it) { for(auto i = 0; i < num_stencil_points; ++i) *sum_local += it.value_at(stencil_spec[i]); From f6ba5a7aef540648306df92723fff86bbbcbfe5c Mon Sep 17 00:00:00 2001 From: Denis Huenich Date: Thu, 19 Apr 2018 14:14:27 +0200 Subject: [PATCH 4/6] removed HaloStencilIterator.h --- .../dash/halo/iterator/HaloStencilIterator.h | 517 ------------------ 1 file changed, 517 deletions(-) delete mode 100644 dash/include/dash/halo/iterator/HaloStencilIterator.h diff --git a/dash/include/dash/halo/iterator/HaloStencilIterator.h b/dash/include/dash/halo/iterator/HaloStencilIterator.h deleted file mode 100644 index 8b8e439cd..000000000 --- a/dash/include/dash/halo/iterator/HaloStencilIterator.h +++ /dev/null @@ -1,517 +0,0 @@ -#ifndef DASH__HALO__ITERATOR__HALOSTENCILITERATOR_H -#define DASH__HALO__ITERATOR__HALOSTENCILITERATOR_H - -#include - -#include - -#include - -namespace dash { - -enum class StencilViewScope : std::uint8_t { INNER, BOUNDARY, ALL }; -/* - * Iterator with stencil points and halo access \see HaloStencilOperator. - */ -template -class HaloStencilIterator { -private: - static constexpr auto NumDimensions = PatternT::ndim(); - static constexpr auto NumStencilPoints = StencilSpecT::num_stencil_points(); - static constexpr auto MemoryArrange = PatternT::memory_order(); - static constexpr auto FastestDimension = - MemoryArrange == ROW_MAJOR ? NumDimensions - 1 : 0; - - using Self_t = HaloStencilIterator; - using ViewSpec_t = typename PatternT::viewspec_type; - using pattern_size_t = typename PatternT::size_type; - using signed_pattern_size_t = typename std::make_signed::type; - using RegionCoords_t = RegionCoords; - -public: - // Iterator traits - using iterator_category = std::random_access_iterator_tag; - using value_type = ElementT; - using difference_type = typename PatternT::index_type; - using pointer = ElementT*; - using reference = ElementT&; - - using HaloBlock_t = HaloBlock; - using HaloMemory_t = HaloMemory; - using pattern_index_t = typename PatternT::index_type; - using region_index_t = typename RegionCoords_t::region_index_t; - using LocalLayout_t = - CartesianIndexSpace; - using StencilP_t = StencilPoint; - using ElementCoords_t = std::array; - using StencilOffsets_t = std::array; - -public: - /** - * Constructor - * - * \param haloblock \ref HaloBlock instance to use - * \param stencil_spec \ref StencilSpec to use - * \param stencil_offsets stencil offsets for every stencil point - * \param idx position of the iterator - */ - HaloStencilIterator(const HaloBlock_t& haloblock, HaloMemory_t& halomemory, - const StencilSpecT& stencil_spec, - const StencilOffsets_t& stencil_offsets, - pattern_index_t idx) - : _haloblock(haloblock), _halomemory(halomemory), _stencil_spec(stencil_spec), - _stencil_offsets(stencil_offsets), - _local_memory((ElementT*) _haloblock.globmem().lbegin()), - _local_layout(_haloblock.pattern().local_memory_layout()), _idx(idx) { - if(Scope == StencilViewScope::INNER) - set_view_local(_haloblock.view_inner()); - - if(Scope == StencilViewScope::ALL) - set_view_local(_haloblock.view_inner_with_boundaries()); - - if(Scope == StencilViewScope::BOUNDARY) - set_view_local(_haloblock.view()); - - pattern_index_t _size = 0; - if(Scope == StencilViewScope::BOUNDARY) - _size = _haloblock.boundary_size(); - else - _size = _view_local.size(); - - if(_idx < _size) - set_coords(); - - const auto& ext_max = haloblock.halo_extension_max(FastestDimension); - if(Scope == StencilViewScope::INNER) { - _ext_dim_reduced = std::make_pair( - _view_local.offset(FastestDimension), - _local_layout.extent(FastestDimension) - ext_max.second - 1); - } else { - _ext_dim_reduced = - std::make_pair(ext_max.first, _view_local.extent(FastestDimension) - - ext_max.second - 1); - } - } - - /** - * Copy constructor. - */ - HaloStencilIterator(const Self_t& other) = default; - - /** - * Assignment operator. - * - * \see DashGlobalIteratorConcept - */ - Self_t& operator=(const Self_t& other) = default; - - /** - * The number of dimensions of the iterator's underlying pattern. - * - * \see DashGlobalIteratorConcept - */ - static constexpr dim_t ndim() { return NumDimensions; } - - /** - * Dereference operator. - * - * \return A global reference to the element at the iterator's position. - */ - reference operator*() const { return *_current_lmemory_addr; } - - /** - * Subscript operator, returns global reference to element at given - * global index. - * - * \see DashGlobalIteratorConcept - */ - reference operator[](pattern_index_t n) const { - auto coords = set_coords(_idx + n); - return _local_memory[_local_layout.at(coords)]; - } - - pattern_index_t rpos() const { return _idx; } - - pattern_index_t lpos() const { return _offset; } - - const ElementCoords_t& coords() const { return _coords; }; - - bool is_halo_value(const region_index_t index_stencil) { - if(Scope == StencilViewScope::INNER) - return false; - - auto halo_coords = _coords; - const auto& stencil = _stencil_spec[index_stencil]; - for(auto d = 0; d < NumDimensions; ++d) { - halo_coords[d] += stencil[d]; - if(halo_coords[d] < 0 || halo_coords[d] >= _haloblock.view().extent(d)) - return true; - } - - return false; - } - - /** - * Returns the value for a given stencil point index (index postion in - * \ref StencilSpec) - */ - ElementT value_at(const region_index_t index_stencil) { - return *(_stencil_mem_ptr[index_stencil]); - } - - /* returns the value of a given stencil point (not as efficient as - * stencil point index ) - */ - ElementT value_at(const StencilP_t& stencil) { - auto index_stencil = _stencil_spec.index(stencil); - - DASH_ASSERT_MSG(index_stencil.second, - "No valid region index for given stencil point found"); - - return value_at(index_stencil.first); - } - - /** - * Prefix increment operator. - */ - Self_t& operator++() { - ++_idx; - next_element(); - - return *this; - } - - /** - * Postfix increment operator. - */ - Self_t operator++(int) { - Self_t result = *this; - ++_idx; - next_element(); - - return result; - } - - /** - * Prefix decrement operator. - */ - Self_t& operator--() { - --_idx; - set_coords(); - - return *this; - } - - /** - * Postfix decrement operator. - */ - Self_t operator--(int) { - Self_t result = *this; - --_idx; - set_coords(); - - return result; - } - - Self_t& operator+=(pattern_index_t n) { - _idx += n; - set_coords(); - - return *this; - } - - Self_t& operator-=(pattern_index_t n) { - _idx -= n; - set_coords(); - - return *this; - } - - Self_t operator+(pattern_index_t n) const { - Self_t res{ *this }; - res += n; - - return res; - } - - Self_t operator-(pattern_index_t n) const { - Self_t res{ *this }; - res -= n; - - return res; - } - - bool operator<(const Self_t& other) const { - return compare(other, std::less()); - } - - bool operator<=(const Self_t& other) const { - return compare(other, std::less_equal()); - } - - bool operator>(const Self_t& other) const { - return compare(other, std::greater()); - } - - bool operator>=(const Self_t& other) const { - return compare(other, std::greater_equal()); - } - - bool operator==(const Self_t& other) const { - return compare(other, std::equal_to()); - } - - bool operator!=(const Self_t& other) const { - return compare(other, std::not_equal_to()); - } - -private: - /** - * Compare position of this global iterator to the position of another - * global iterator with respect to viewspec projection. - */ - template - bool compare(const Self_t& other, const GlobIndexCmpFunc& gidx_cmp) const { -#if __REMARK__ - // Usually this is a best practice check, but it's an infrequent case - // so we rather avoid this comparison: - if(this == &other) { - return true; - } -#endif - if(&_view_local == &(other._view_local) - || _view_local == other._view_local) { - return gidx_cmp(_idx, other._idx); - } - // TODO not the best solution - return false; - } - - void set_view_local(const ViewSpec_t& view_tmp) { - if(Scope == StencilViewScope::BOUNDARY) { - const auto& bnd_elems = _haloblock.boundary_elements(); - _bnd_elements.reserve(bnd_elems.size()); - const auto& view_offs = view_tmp.offsets(); - for(const auto& region : bnd_elems) { - auto off = region.offsets(); - for(int d = 0; d < NumDimensions; ++d) - off[d] -= view_offs[d]; - - _bnd_elements.push_back(ViewSpec_t(off, region.extents())); - } - - _view_local = ViewSpec_t(view_tmp.extents()); - } else { - const auto& view_offsets = _haloblock.view().offsets(); - auto off = view_tmp.offsets(); - for(int d = 0; d < NumDimensions; ++d) - off[d] -= view_offsets[d]; - - _view_local = ViewSpec_t(off, view_tmp.extents()); - } - } - - void next_element() { - const auto& coord_fastest_dim = _coords[FastestDimension]; - - if(coord_fastest_dim >= _ext_dim_reduced.first - && coord_fastest_dim < _ext_dim_reduced.second) { - for(auto it = _stencil_mem_ptr.begin(); it != _stencil_mem_ptr.end(); - ++it) - *it += 1; - - ++_coords[FastestDimension]; - ++_current_lmemory_addr; - ++_offset; - - return; - } - - if(Scope == StencilViewScope::INNER) { - if(MemoryArrange == ROW_MAJOR) { - for(auto i = NumDimensions - 1; i >= 0; --i) { - if(_coords[i] < _view_local.extent(i) + _view_local.offset(i) - 1) { - ++_coords[i]; - break; - } else - _coords[i] = _view_local.offset(i); - } - } else { - for(auto i = 0; i < NumDimensions; ++i) { - if(_coords[i] < _view_local.extent(i) + _view_local.offset(i) - 1) { - ++_coords[i]; - break; - } else - _coords[i] = _view_local.offset(i); - } - } - if(MemoryArrange == ROW_MAJOR) { - _offset = _coords[0]; - for(auto d = 1; d < NumDimensions; ++d) - _offset = _offset * _local_layout.extent(d) + _coords[d]; - } else { - _offset = _coords[NumDimensions - 1]; - for(auto d = NumDimensions - 2; d >= 0; --d) - _offset = _offset * _local_layout.extent(d) + _coords[d]; - } - _current_lmemory_addr = _local_memory + _offset; - for(auto i = 0; i < NumStencilPoints; ++i) - _stencil_mem_ptr[i] = _current_lmemory_addr + _stencil_offsets[i]; - } else - set_coords(); - } - - void set_coords() { - if(Scope == StencilViewScope::BOUNDARY) { - if(_region_bound == 0) { - _coords = set_coords(_idx); - } else { - if(_idx < _region_bound) { - const auto& region = _bnd_elements[_region_number]; - if(MemoryArrange == ROW_MAJOR) { - for(auto i = NumDimensions - 1; i >= 0; --i) { - if(_coords[i] < region.extent(i) + region.offset(i) - 1) { - ++_coords[i]; - break; - } else - _coords[i] = region.offset(i); - } - } else { - for(auto i = 0; i < NumDimensions; ++i) { - if(_coords[i] < region.extent(i) + region.offset(i) - 1) { - ++_coords[i]; - break; - } else - _coords[i] = region.offset(i); - } - } - } else { - ++_region_number; - if(_region_number < _bnd_elements.size()) { - _region_bound += _bnd_elements[_region_number].size(); - _coords = _local_layout.coords(0, _bnd_elements[_region_number]); - } - } - } - } else { - _coords = set_coords(_idx); - } - - if(MemoryArrange == ROW_MAJOR) { - _offset = _coords[0]; - for(auto d = 1; d < NumDimensions; ++d) - _offset = _offset * _local_layout.extent(d) + _coords[d]; - } else { - _offset = _coords[NumDimensions - 1]; - for(auto d = NumDimensions - 2; d >= 0; --d) - _offset = _offset * _local_layout.extent(d) + _coords[d]; - } - _current_lmemory_addr = _local_memory + _offset; - if(Scope == StencilViewScope::INNER) { - for(auto i = 0; i < NumStencilPoints; ++i) - _stencil_mem_ptr[i] = _current_lmemory_addr + _stencil_offsets[i]; - } else { - using signed_extent_t = typename std::make_signed::type; - std::array halo_coords{}; - std::array is_halo{}; - std::array indexes{}; - for(auto d = 0; d < NumDimensions; ++d) { - auto extent = _haloblock.view().extent(d); - - for(auto i = 0; i < NumStencilPoints; ++i) { - auto& halo_coord = halo_coords[i][d]; - halo_coord = _coords[d] + _stencil_spec[i][d]; - if(halo_coord < 0) { - indexes[i] *= RegionCoords_t::REGION_INDEX_BASE; - is_halo[i] = true; - continue; - } - - if(halo_coord < static_cast(extent)) { - indexes[i] = 1 + indexes[i] * RegionCoords_t::REGION_INDEX_BASE; - continue; - } - - indexes[i] = 2 + indexes[i] * RegionCoords_t::REGION_INDEX_BASE; - is_halo[i] = true; - } - } - for(auto i = 0; i < NumStencilPoints; ++i) { - if(is_halo[i]) - _stencil_mem_ptr[i] = value_halo_at(indexes[i], halo_coords[i]); - else - _stencil_mem_ptr[i] = _current_lmemory_addr + _stencil_offsets[i]; - } - } - } - - std::array set_coords(pattern_index_t idx) { - if(Scope == StencilViewScope::BOUNDARY) { - auto local_idx = idx; - for(const auto& region : _bnd_elements) { - _region_bound += region.size(); - if(local_idx < region.size()) { - return _local_layout.coords(local_idx, region); - } - ++_region_number; - local_idx -= region.size(); - } - DASH_ASSERT("idx >= size not implemented yet"); - return std::array{}; - } else { - if(_view_local.size() == 0) - return std::array{}; - else - return _local_layout.coords(idx, _view_local); - } - } - - ElementT* value_halo_at(region_index_t region_index, - ElementCoords_t& halo_coords) { - _halomemory.to_halo_mem_coords(region_index, halo_coords); - - return _halomemory.pos_at(region_index) - + _halomemory.offset(region_index, halo_coords); - } - - void set_stencil_offsets(const StencilSpecT& stencil_spec) { - for(auto i = 0; i < NumStencilPoints; ++i) { - signed_pattern_size_t offset = 0; - if(MemoryArrange == ROW_MAJOR) { - offset = stencil_spec[i][0]; - for(auto d = 1; d < NumDimensions; ++d) - offset = stencil_spec[i][d] + offset * _local_layout.extent(d); - } else { - offset = stencil_spec[i][NumDimensions - 1]; - for(auto d = NumDimensions - 2; d >= 0; --d) - offset = stencil_spec[i][d] + offset * _local_layout.extent(d); - } - _stencil_offsets[i] = offset; - } - } - -private: - const HaloBlock_t& _haloblock; - HaloMemory_t& _halomemory; - const StencilSpecT& _stencil_spec; - const StencilOffsets_t& _stencil_offsets; - ElementT* _local_memory; - ViewSpec_t _view_local; - std::vector _bnd_elements; - std::array _stencil_mem_ptr; - const LocalLayout_t& _local_layout; - pattern_index_t _idx{ 0 }; - // extension of the fastest index dimension minus the halo extension - std::pair _ext_dim_reduced; - signed_pattern_size_t _offset; - pattern_index_t _region_bound{ 0 }; - size_t _region_number{ 0 }; - ElementCoords_t _coords; - ElementT* _current_lmemory_addr; -}; // class HaloStencilIterator - -} // namespace dash - -#endif // DASH__HALO__ITERATOR__HALOSTENCILITERATOR_H - From fb3a9bb28577cb6fb80f6f205a44db68bae922a1 Mon Sep 17 00:00:00 2001 From: Denis Huenich Date: Fri, 20 Apr 2018 16:10:09 +0200 Subject: [PATCH 5/6] HaloMemory returns now iterator, mino fixes --- dash/include/dash/halo/Halo.h | 73 +++++++++++++++---- dash/include/dash/halo/HaloMatrixWrapper.h | 63 ++++++++-------- .../dash/halo/iterator/StencilIterator.h | 4 +- 3 files changed, 93 insertions(+), 47 deletions(-) diff --git a/dash/include/dash/halo/Halo.h b/dash/include/dash/halo/Halo.h index cfd0761ab..0910d55c4 100644 --- a/dash/include/dash/halo/Halo.h +++ b/dash/include/dash/halo/Halo.h @@ -405,6 +405,9 @@ class RegionCoords : public Dimensional { */ constexpr region_index_t index() const { return _index; } + /** + * Returns a region index for a given dimension and \ref RegionPos + */ static region_index_t index(dim_t dim, RegionPos pos) { region_coord_t coord = (pos == RegionPos::PRE) ? 0 : 2; @@ -837,7 +840,7 @@ class RegionIter { GlobIter global() const { auto g_idx = gpos(); - return GlobIter(_globmem, &_pattern, g_idx); + return GlobIter(_globmem, *_pattern, g_idx); } ElementT* local() const { @@ -1571,46 +1574,84 @@ class HaloMemory { using Element_t = typename HaloBlockT::Element_t; using ElementCoords_t = std::array; + using HaloBuffer_t = std::vector; using region_index_t = typename RegionCoords_t::region_index_t; using pattern_size_t = typename Pattern_t::size_type; + using iterator = typename HaloBuffer_t::iterator; + using const_iterator = const iterator; + + using MemRange_t = std::pair; + public: /** * Constructor */ HaloMemory(const HaloBlockT& haloblock) : _haloblock(haloblock) { _halobuffer.resize(haloblock.halo_size()); - auto* offset = _halobuffer.data(); + auto it = _halobuffer.begin(); for(const auto& region : haloblock.halo_regions()) { - _halo_offsets[region.index()] = offset; - offset += region.size(); + _halo_offsets[region.index()] = it; + it += region.size(); } } /** - * Pointer to the first halo element for the given region index + * Iterator to the first halo element for the given region index * \param index halo region index - * \return Pointer to the first halo element or nullptr if the - * region doesn't exist + * \return Iterator to the first halo element. If no region exists the + * end iterator will be returned. */ - Element_t* first_element_at(region_index_t index) { + iterator first_element_at(region_index_t index) { return _halo_offsets[index]; } /** - * Pointer to the first halo element - * - * \return Pointer to the first halo element + * iReturns the range of all halo elements for the given region index. + * \param index halo region index + * \return Pair of iterator. First points ot the beginning and second to the + * end. + */ + MemRange_t range_at(region_index_t index) { + auto it = _halo_offsets[index]; + if(it == _halobuffer.end()) + return std::make_pair(it,it); + + auto* region = _haloblock.halo_region(index); + + DASH_ASSERT_MSG(region != nullptr, + "HaloMemory manages memory for a region that seemed to be empty."); + + return std::make_pair(it, it + region->size()); + } + + /** + * Returns an iterator to the first halo element + */ + iterator begin() { return _halobuffer.begin(); } + + /** + * Returns a const iterator to the first halo element + */ + const_iterator begin() const { return _halobuffer.begin(); } + + /** + * Returns an iterator to the end of the halo elements + */ + iterator end() { return _halobuffer.end(); } + + /** + * Returns a const iterator to the end of the halo elements */ - Element_t* first_element() { return _halobuffer.data(); } + const_iterator end() const { return _halobuffer.end(); } /** * Container storing all halo elements * * \return Reference to the container storing all halo elements */ - const std::vector& buffer() const { return _halobuffer; } + const HaloBuffer_t& buffer() const { return _halobuffer; } /** * Converts coordinates to halo memory coordinates for a given @@ -1677,9 +1718,9 @@ class HaloMemory { } private: - const HaloBlockT& _haloblock; - std::vector _halobuffer; - std::array _halo_offsets{}; + const HaloBlockT& _haloblock; + HaloBuffer_t _halobuffer; + std::array _halo_offsets{}; }; // class HaloMemory } // namespace halo diff --git a/dash/include/dash/halo/HaloMatrixWrapper.h b/dash/include/dash/halo/HaloMatrixWrapper.h index 0f993e98d..a9b3dfe9b 100644 --- a/dash/include/dash/halo/HaloMatrixWrapper.h +++ b/dash/include/dash/halo/HaloMatrixWrapper.h @@ -93,7 +93,7 @@ class HaloMatrixWrapper { pattern_size_t num_elems_block = 1; auto rel_dim = region.spec().relevant_dim(); auto level = region.spec().level(); - auto* off = _halomemory.first_element_at(region.index()); + auto* off = &*(_halomemory.first_element_at(region.index())); auto it = region.begin(); if(MemoryArrange == ROW_MAJOR) { @@ -266,8 +266,8 @@ class HaloMatrixWrapper { void update() { for(auto& region : _region_data) { update_halo_intern(region.second); - dart_wait_local(®ion.second.handle); } + wait(); } /** @@ -296,8 +296,9 @@ class HaloMatrixWrapper { */ void update_async_at(region_index_t index) { auto it_find = _region_data.find(index); - if(it_find != _region_data.end()) + if(it_find != _region_data.end()) { update_halo_intern(it_find->second); + } } /** @@ -305,8 +306,9 @@ class HaloMatrixWrapper { * halo updates. */ void wait() { - for(auto& region : _region_data) + for(auto& region : _region_data) { dart_wait_local(®ion.second.handle); + } } /** @@ -372,7 +374,6 @@ class HaloMatrixWrapper { using signed_extent_t = typename std::make_signed::type; for(const auto& region : _haloblock.boundary_regions()) { if(region.is_custom_region()) { - auto* pos_ptr = _halomemory.first_element_at(region.index()); const auto& spec = region.spec(); std::array coords_offset{}; const auto& reg_ext = region.view().extents(); @@ -385,12 +386,20 @@ class HaloMatrixWrapper { coords_offset[d] = reg_ext[d]; } - auto it_reg_end = region.end(); - for(auto it = region.begin(); it != it_reg_end; ++it) { + auto range_mem = _halomemory.range_at(region.index()); + auto it_mem = range_mem.first; + auto it_reg_end = region.end(); + DASH_ASSERT_MSG( + std::distance(range_mem.first, range_mem.second) == region.size(), + "Range distance of the HaloMemory is unequal region size"); + + for(auto it = region.begin(); it != it_reg_end; ++it, ++it_mem) { auto coords = it.gcoords(); - for(auto d = 0; d < NumDimensions; ++d) + for(auto d = 0; d < NumDimensions; ++d) { coords[d] += coords_offset[d]; - *(pos_ptr + it.rpos()) = f(coords); + } + + *it_mem = f(coords); } } } @@ -401,21 +410,13 @@ class HaloMatrixWrapper { * element exists. This also means that only a unit connected to the given * coordinate will return a halo value. All others will return nullptr. */ - Element_t* halo_element_at_global(ElementCoords_t coords) { + Element_t* halo_element_at_global(const ElementCoords_t coords) { const auto& offsets = _view_global.offsets(); for(auto d = 0; d < NumDimensions; ++d) { coords[d] -= offsets[d]; } - auto index = _haloblock.index_at(_view_local, coords); - const auto& spec = _halo_spec.spec(index); - auto* halomem_pos = _halomemory.first_element_at(index); - if(spec.level() == 0 || halomem_pos == nullptr) - return nullptr; - - if(!_halomemory.to_halo_mem_coords_check(index, coords)) - return nullptr; - return halomem_pos + _halomemory.offset(index, coords); + return halo_element_at(coords); } /** @@ -423,16 +424,7 @@ class HaloMatrixWrapper { * element exists. */ Element_t* halo_element_at_local(ElementCoords_t coords) { - auto index = _haloblock.index_at(_view_local, coords); - const auto& spec = _halo_spec.spec(index); - auto* halomem_pos = _halomemory.first_element_at(index); - if(spec.level() == 0 || halomem_pos == nullptr) - return nullptr; - - if(!_halomemory.to_halo_mem_coords_check(index, coords)) - return nullptr; - - return halomem_pos + _halomemory.offset(index, coords); + return halo_element_at(coords); } /** @@ -467,6 +459,19 @@ class HaloMatrixWrapper { data.get_halos(data.handle); } + Element_t* halo_element_at(ElementCoords_t& coords) { + auto index = _haloblock.index_at(_view_local, coords); + const auto& spec = _halo_spec.spec(index); + auto range_mem = _halomemory.range_at(index); + if(spec.level() == 0 || range_mem.first == range_mem.second) + return nullptr; + + if(!_halomemory.to_halo_mem_coords_check(index, coords)) + return nullptr; + + return &*(range_mem.first + _halomemory.offset(index, coords)); + } + private: MatrixT& _matrix; const GlobBoundSpec_t _cycle_spec; diff --git a/dash/include/dash/halo/iterator/StencilIterator.h b/dash/include/dash/halo/iterator/StencilIterator.h index f4e6d6aa3..e387306b4 100644 --- a/dash/include/dash/halo/iterator/StencilIterator.h +++ b/dash/include/dash/halo/iterator/StencilIterator.h @@ -695,8 +695,8 @@ class StencilIterator { ElementCoords_t& halo_coords) { _halomemory->to_halo_mem_coords(region_index, halo_coords); - return _halomemory->first_element_at(region_index) - + _halomemory->offset(region_index, halo_coords); + return &*(_halomemory->first_element_at(region_index) + + _halomemory->offset(region_index, halo_coords)); } void set_stencil_offsets(const StencilSpecT& stencil_spec) { From a5d493e1e31433b33885b21e3bbc35983bbdefda Mon Sep 17 00:00:00 2001 From: Denis Huenich Date: Fri, 20 Apr 2018 16:14:52 +0200 Subject: [PATCH 6/6] added braces --- dash/include/dash/halo/HaloMatrixWrapper.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dash/include/dash/halo/HaloMatrixWrapper.h b/dash/include/dash/halo/HaloMatrixWrapper.h index a9b3dfe9b..8c0929478 100644 --- a/dash/include/dash/halo/HaloMatrixWrapper.h +++ b/dash/include/dash/halo/HaloMatrixWrapper.h @@ -286,8 +286,9 @@ class HaloMatrixWrapper { * Initiates an asychronous halo region update for all halo elements. */ void update_async() { - for(auto& region : _region_data) + for(auto& region : _region_data) { update_halo_intern(region.second); + } } /**