From 9d719762758c0a789f4dbdc05dd6acbc580ac4a9 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 8 Nov 2024 15:30:29 -0800 Subject: [PATCH 1/5] Use AMReX FFT for IGF Solver This replaces the implementation using HeFFTe. A new runtime parameter ablastr.do_serial_fft is added. The default is false. If it's true, we use only one process to do FFT. --- .azure-pipelines.yml | 12 - .github/workflows/cuda.yml | 13 +- .github/workflows/dependencies/hip.sh | 13 - .github/workflows/hip.yml | 6 +- CMakeLists.txt | 37 --- Docs/source/install/cmake.rst | 2 - Docs/source/install/dependencies.rst | 3 +- .../open_bc_poisson_solver/CMakeLists.txt | 12 - ...puts_test_3d_open_bc_poisson_solver_heffte | 1 - GNUmakefile | 1 - .../fields/IntegratedGreenFunctionSolver.cpp | 275 +++--------------- .../machines/desktop/spack-macos-openmp.yaml | 1 - Tools/machines/desktop/spack-ubuntu-cuda.yaml | 1 - .../machines/desktop/spack-ubuntu-openmp.yaml | 1 - Tools/machines/desktop/spack-ubuntu-rocm.yaml | 1 - .../install_a100_dependencies.sh | 39 --- .../lonestar6_warpx_a100.profile.example | 2 - .../install_cpu_dependencies.sh | 39 --- .../install_gpu_dependencies.sh | 43 --- .../perlmutter_cpu_warpx.profile.example | 2 - .../perlmutter_gpu_warpx.profile.example | 2 - .../tioga-llnl/install_mi300a_dependencies.sh | 42 --- .../tioga_mi300a_warpx.profile.example | 2 - cmake/WarpXFunctions.cmake | 5 - cmake/dependencies/AMReX.cmake | 15 +- setup.py | 2 - 26 files changed, 54 insertions(+), 518 deletions(-) delete mode 100644 Examples/Tests/open_bc_poisson_solver/inputs_test_3d_open_bc_poisson_solver_heffte diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index 62d8a0a424d..d22097a208f 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -38,7 +38,6 @@ jobs: # Cartesian 3D cartesian_3d: WARPX_CMAKE_FLAGS: -DWarpX_DIMS=3 -DWarpX_FFT=ON -DWarpX_PYTHON=ON - WARPX_HEFFTE: 'TRUE' # Cylindrical RZ cylindrical_rz: WARPX_CMAKE_FLAGS: -DWarpX_DIMS=RZ -DWarpX_FFT=ON -DWarpX_PYTHON=ON @@ -121,17 +120,6 @@ jobs: -DCMAKE_CXX_STANDARD=17 \ -Duse_cmake_find_lapack=ON -Dbuild_tests=OFF -DCMAKE_VERBOSE_MAKEFILE=ON fi - if [ "${WARPX_HEFFTE:-FALSE}" == "TRUE" ]; then - cmake-easyinstall --prefix=/usr/local git+https://github.com/icl-utk-edu/heffte.git@v2.4.0 \ - -DCMAKE_CXX_COMPILER_LAUNCHER=$(which ccache) \ - -DCMAKE_CXX_STANDARD=17 -DHeffte_ENABLE_DOXYGEN=OFF \ - -DHeffte_ENABLE_FFTW=ON -DHeffte_ENABLE_TESTING=OFF \ - -DHeffte_ENABLE_CUDA=OFF -DHeffte_ENABLE_ROCM=OFF \ - -DHeffte_ENABLE_ONEAPI=OFF -DHeffte_ENABLE_MKL=OFF \ - -DHeffte_ENABLE_PYTHON=OFF -DHeffte_ENABLE_FORTRAN=OFF \ - -DHeffte_ENABLE_MAGMA=OFF \ - -DCMAKE_VERBOSE_MAKEFILE=ON - fi # Python modules required for test analysis python3 -m pip install --upgrade -r Regression/requirements.txt python3 -m pip cache purge diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index a10306789cb..b8ff4804e96 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -62,16 +62,6 @@ jobs: -DBUILD_CLI_TOOLS=OFF \ -DCMAKE_CXX_COMPILER_LAUNCHER=$(which ccache) \ -DCMAKE_VERBOSE_MAKEFILE=ON - cmake-easyinstall --prefix=/usr/local \ - git+https://github.com/icl-utk-edu/heffte.git@v2.4.0 \ - -DCMAKE_CXX_COMPILER_LAUNCHER=$(which ccache) \ - -DCMAKE_CXX_STANDARD=17 -DHeffte_ENABLE_DOXYGEN=OFF \ - -DHeffte_ENABLE_FFTW=OFF -DHeffte_ENABLE_TESTING=OFF \ - -DHeffte_ENABLE_CUDA=ON -DHeffte_ENABLE_ROCM=OFF \ - -DHeffte_ENABLE_ONEAPI=OFF -DHeffte_ENABLE_MKL=OFF \ - -DHeffte_ENABLE_PYTHON=OFF -DHeffte_ENABLE_FORTRAN=OFF \ - -DHeffte_ENABLE_MAGMA=OFF \ - -DCMAKE_VERBOSE_MAKEFILE=ON - name: build WarpX run: | export CCACHE_COMPRESS=1 @@ -92,7 +82,6 @@ jobs: -DWarpX_openpmd_internal=OFF \ -DWarpX_PRECISION=SINGLE \ -DWarpX_FFT=ON \ - -DWarpX_HEFFTE=ON \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON cmake --build build_sp -j 4 @@ -137,7 +126,7 @@ jobs: which nvcc || echo "nvcc not in PATH!" git clone https://github.com/AMReX-Codes/amrex.git ../amrex - cd ../amrex && git checkout --detach 4b703fec6c2ff983e465c8cef0cc4947231edb07 && cd - + cd ../amrex && git checkout --detach 294b6fee6f0c7f44693eac14e6b0c0702ecfd791 && cd - make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_FFT=TRUE USE_CCACHE=TRUE -j 4 ccache -s diff --git a/.github/workflows/dependencies/hip.sh b/.github/workflows/dependencies/hip.sh index 2a1b4d090bc..1154bb05e58 100755 --- a/.github/workflows/dependencies/hip.sh +++ b/.github/workflows/dependencies/hip.sh @@ -79,16 +79,3 @@ sudo curl -L -o /usr/local/bin/cmake-easyinstall https://raw.githubusercontent.c sudo chmod a+x /usr/local/bin/cmake-easyinstall export CEI_SUDO="sudo" export CEI_TMP="/tmp/cei" - -# heFFTe -# -cmake-easyinstall --prefix=/usr/local \ - git+https://github.com/icl-utk-edu/heffte.git@v2.4.0 \ - -DCMAKE_CXX_COMPILER_LAUNCHER=$(which ccache) \ - -DCMAKE_CXX_STANDARD=17 -DHeffte_ENABLE_DOXYGEN=OFF \ - -DHeffte_ENABLE_FFTW=OFF -DHeffte_ENABLE_TESTING=OFF \ - -DHeffte_ENABLE_CUDA=OFF -DHeffte_ENABLE_ROCM=ON \ - -DHeffte_ENABLE_ONEAPI=OFF -DHeffte_ENABLE_MKL=OFF \ - -DHeffte_ENABLE_PYTHON=OFF -DHeffte_ENABLE_FORTRAN=OFF \ - -DHeffte_ENABLE_MAGMA=OFF \ - -DCMAKE_VERBOSE_MAKEFILE=ON diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index 8ba39de7742..6ab4e4a8401 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -61,8 +61,7 @@ jobs: -DWarpX_MPI=ON \ -DWarpX_OPENPMD=ON \ -DWarpX_PRECISION=SINGLE \ - -DWarpX_FFT=ON \ - -DWarpX_HEFFTE=ON + -DWarpX_FFT=ON cmake --build build_sp -j 4 export WARPX_MPI=OFF @@ -122,8 +121,7 @@ jobs: -DWarpX_MPI=ON \ -DWarpX_OPENPMD=ON \ -DWarpX_PRECISION=DOUBLE \ - -DWarpX_FFT=ON \ - -DWarpX_HEFFTE=ON + -DWarpX_FFT=ON cmake --build build_2d -j 4 export WARPX_MPI=OFF diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ff14bacfa6..da62c943e19 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,6 @@ option(WarpX_LIB "Build WarpX as a library" OFF) option(WarpX_MPI "Multi-node support (message-passing)" ON) option(WarpX_OPENPMD "openPMD I/O (HDF5, ADIOS)" ON) option(WarpX_FFT "FFT-based solvers" OFF) -option(WarpX_HEFFTE "Multi-node FFT-based solvers" OFF) option(WarpX_PYTHON "Python bindings" OFF) option(WarpX_SENSEI "SENSEI in situ diagnostics" OFF) option(WarpX_QED "QED support (requires PICSAR)" ON) @@ -146,10 +145,6 @@ mark_as_advanced(WarpX_MPI_THREAD_MULTIPLE) option(WarpX_amrex_internal "Download & build AMReX" ON) -if(WarpX_HEFFTE AND NOT WarpX_MPI) - message(FATAL_ERROR "WarpX_HEFFTE (${WarpX_HEFFTE}) can only be used if WarpX_MPI is ON.") -endif() - # change the default build type to Release (or RelWithDebInfo) instead of Debug set_default_build_type("Release") @@ -197,10 +192,6 @@ option(ABLASTR_FFT "compile AnyFFT wrappers" ${WarpX_FFT}) if(WarpX_FFT) set(ABLASTR_FFT ON CACHE STRING "FFT-based solvers" FORCE) endif() -option(ABLASTR_HEFFTE "compile AnyFFT wrappers" ${WarpX_HEFFTE}) -if(WarpX_HEFFTE) - set(ABLASTR_HEFFTE ON CACHE STRING "Multi-Node FFT-based solvers" FORCE) -endif() # this defined the variable BUILD_TESTING which is ON by default include(CTest) @@ -242,23 +233,6 @@ if(WarpX_FFT) endif() endif() -# multi-node FFT -if(WarpX_HEFFTE) - if(WarpX_COMPUTE STREQUAL CUDA) - set(_heFFTe_COMPS CUDA) - elseif(WarpX_COMPUTE STREQUAL HIP) - set(_heFFTe_COMPS ROCM) - elseif(WarpX_COMPUTE STREQUAL SYCL) - set(_heFFTe_COMPS ONEAPI) - else() # NOACC, OMP - set(_heFFTe_COMPS FFTW) # or MKL - endif() - # note: we could also enforce GPUAWARE for CUDA and HIP, which can still be - # disabled at runtime - - find_package(Heffte REQUIRED COMPONENTS ${_heFFTe_COMPS}) -endif() - # Python if(WarpX_PYTHON) find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED) @@ -499,10 +473,6 @@ foreach(D IN LISTS WarpX_DIMS) endif() endif() - if(ABLASTR_HEFFTE) - target_link_libraries(ablastr_${SD} PUBLIC Heffte::Heffte) - endif() - if(WarpX_PYTHON) target_link_libraries(pyWarpX_${SD} PRIVATE pybind11::module pybind11::windows_extras) if(WarpX_PYTHON_IPO) @@ -593,13 +563,6 @@ foreach(D IN LISTS WarpX_DIMS) target_compile_definitions(ablastr_${SD} PUBLIC ABLASTR_USE_FFT) endif() - if(WarpX_HEFFTE) - target_compile_definitions(ablastr_${SD} PUBLIC WARPX_USE_HEFFTE) - endif() - if(ABLASTR_HEFFTE) - target_compile_definitions(ablastr_${SD} PUBLIC ABLASTR_USE_HEFFTE) - endif() - if(WarpX_PYTHON AND pyWarpX_VERSION_INFO) # for module __version__ target_compile_definitions(pyWarpX_${SD} PRIVATE diff --git a/Docs/source/install/cmake.rst b/Docs/source/install/cmake.rst index 41e4c40bc85..f3f881d4504 100644 --- a/Docs/source/install/cmake.rst +++ b/Docs/source/install/cmake.rst @@ -97,7 +97,6 @@ CMake Option Default & Values Descr ``WarpX_PRECISION`` SINGLE/**DOUBLE** Floating point precision (single/double) ``WarpX_PARTICLE_PRECISION`` SINGLE/**DOUBLE** Particle floating point precision (single/double), defaults to WarpX_PRECISION value if not set ``WarpX_FFT`` ON/**OFF** FFT-based solvers -``WarpX_HEFFTE`` ON/**OFF** Multi-Node FFT-based solvers ``WarpX_PYTHON`` ON/**OFF** Python bindings ``WarpX_QED`` **ON**/OFF QED support (requires PICSAR) ``WarpX_QED_TABLE_GEN`` ON/**OFF** QED table generation support (requires PICSAR and Boost) @@ -275,7 +274,6 @@ Environment Variable Default & Values Descr ``WARPX_PRECISION`` SINGLE/**DOUBLE** Floating point precision (single/double) ``WARPX_PARTICLE_PRECISION`` SINGLE/**DOUBLE** Particle floating point precision (single/double), defaults to WarpX_PRECISION value if not set ``WARPX_FFT`` ON/**OFF** FFT-based solvers -``WARPX_HEFFTE`` ON/**OFF** Multi-Node FFT-based solvers ``WARPX_QED`` **ON**/OFF PICSAR QED (requires PICSAR) ``WARPX_QED_TABLE_GEN`` ON/**OFF** QED table generation (requires PICSAR and Boost) ``BUILD_PARALLEL`` ``2`` Number of threads to use for parallel builds diff --git a/Docs/source/install/dependencies.rst b/Docs/source/install/dependencies.rst index 71a607eae6a..13e2377d568 100644 --- a/Docs/source/install/dependencies.rst +++ b/Docs/source/install/dependencies.rst @@ -28,7 +28,6 @@ Optional dependencies include: - `FFTW3 `__: for spectral solver (PSATD or IGF) support when running on CPU or SYCL - also needs the ``pkg-config`` tool on Unix -- `heFFTe 2.4.0+ `__: for multi-node spectral solver (IGF) support - `BLAS++ `__ and `LAPACK++ `__: for spectral solver (PSATD) support in RZ geometry - `Boost 1.66.0+ `__: for QED lookup tables generation support - `openPMD-api 0.15.1+ `__: we automatically download and compile a copy of openPMD-api for openPMD I/O support @@ -81,7 +80,7 @@ Conda (Linux/macOS/Windows) .. code-block:: bash - conda create -n warpx-cpu-mpich-dev -c conda-forge blaspp boost ccache cmake compilers git "heffte=*=mpi_mpich*" lapackpp "openpmd-api=*=mpi_mpich*" openpmd-viewer python make numpy pandas scipy yt "fftw=*=mpi_mpich*" pkg-config matplotlib mamba mpich mpi4py ninja pip virtualenv + conda create -n warpx-cpu-mpich-dev -c conda-forge blaspp boost ccache cmake compilers git lapackpp "openpmd-api=*=mpi_mpich*" openpmd-viewer python make numpy pandas scipy yt "fftw=*=mpi_mpich*" pkg-config matplotlib mamba mpich mpi4py ninja pip virtualenv conda activate warpx-cpu-mpich-dev # compile WarpX with -DWarpX_MPI=ON diff --git a/Examples/Tests/open_bc_poisson_solver/CMakeLists.txt b/Examples/Tests/open_bc_poisson_solver/CMakeLists.txt index d6141f0b4ab..c5ec4583da1 100644 --- a/Examples/Tests/open_bc_poisson_solver/CMakeLists.txt +++ b/Examples/Tests/open_bc_poisson_solver/CMakeLists.txt @@ -12,15 +12,3 @@ if(WarpX_FFT) OFF # dependency ) endif() - -if(WarpX_HEFFTE) - add_warpx_test( - test_3d_open_bc_poisson_solver_heffte # name - 3 # dims - 2 # nprocs - inputs_test_3d_open_bc_poisson_solver_heffte # inputs - analysis.py # analysis - diags/diag1000001 # output - OFF # dependency - ) -endif() diff --git a/Examples/Tests/open_bc_poisson_solver/inputs_test_3d_open_bc_poisson_solver_heffte b/Examples/Tests/open_bc_poisson_solver/inputs_test_3d_open_bc_poisson_solver_heffte deleted file mode 100644 index 4f0a50df037..00000000000 --- a/Examples/Tests/open_bc_poisson_solver/inputs_test_3d_open_bc_poisson_solver_heffte +++ /dev/null @@ -1 +0,0 @@ -FILE = inputs_test_3d_open_bc_poisson_solver diff --git a/GNUmakefile b/GNUmakefile index 1cc78403c7b..6298dd83369 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -38,7 +38,6 @@ USE_OPENPMD = FALSE WarpxBinDir = Bin USE_FFT = FALSE -USE_HEFFTE = FALSE USE_RZ = FALSE USE_EB = FALSE diff --git a/Source/ablastr/fields/IntegratedGreenFunctionSolver.cpp b/Source/ablastr/fields/IntegratedGreenFunctionSolver.cpp index 546326d7fe0..6cfb8328bdd 100644 --- a/Source/ablastr/fields/IntegratedGreenFunctionSolver.cpp +++ b/Source/ablastr/fields/IntegratedGreenFunctionSolver.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include @@ -18,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -25,13 +25,9 @@ #include #include #include +#include #include -#if defined(ABLASTR_USE_FFT) && defined(ABLASTR_USE_HEFFTE) -#include -#endif - - namespace ablastr::fields { void @@ -42,10 +38,6 @@ computePhiIGF ( amrex::MultiFab const & rho, { using namespace amrex::literals; - BL_PROFILE_VAR_NS("ablastr::fields::computePhiIGF: FFTs", timer_ffts); - BL_PROFILE_VAR_NS("ablastr::fields::computePhiIGF: FFT plans", timer_plans); - BL_PROFILE_VAR_NS("ablastr::fields::computePhiIGF: parallel copies", timer_pcopies); - BL_PROFILE("ablastr::fields::computePhiIGF"); // Define box that encompasses the full domain @@ -53,240 +45,47 @@ computePhiIGF ( amrex::MultiFab const & rho, domain.surroundingNodes(); // get nodal points, since `phi` and `rho` are nodal domain.grow( phi.nGrowVect() ); // include guard cells - int const nx = domain.length(0); - int const ny = domain.length(1); - int const nz = domain.length(2); - - // Allocate 2x wider arrays for the convolution of rho with the Green function - amrex::Box const realspace_box = amrex::Box( - {domain.smallEnd(0), domain.smallEnd(1), domain.smallEnd(2)}, - {2*nx-1+domain.smallEnd(0), 2*ny-1+domain.smallEnd(1), 2*nz-1+domain.smallEnd(2)}, - amrex::IntVect::TheNodeVector() ); + // Do we grow the domain in the z-direction in the 2D mode? + bool const do_2d_fft = false; -#if !defined(ABLASTR_USE_HEFFTE) - // Without distributed FFTs (i.e. without heFFTe): - // allocate the 2x wider array on a single box - amrex::BoxArray const realspace_ba = amrex::BoxArray( realspace_box ); - // Define a distribution mapping for the global FFT, with only one box - amrex::DistributionMapping dm_global_fft; - dm_global_fft.define( realspace_ba ); -#elif defined(ABLASTR_USE_HEFFTE) - // With distributed FFTs (i.e. with heFFTe): - // Define a new distribution mapping which is decomposed purely along z - // and has one box per MPI rank - int const nprocs = amrex::ParallelDescriptor::NProcs(); - amrex::BoxArray realspace_ba; - amrex::DistributionMapping dm_global_fft; + // Specify the number of processes for FFT. Can be any posistive number + // including 1. + int nprocs = amrex::ParallelDescriptor::NProcs(); { - int realspace_nx = realspace_box.length(0); - int realspace_ny = realspace_box.length(1); - int realspace_nz = realspace_box.length(2); - int minsize_z = realspace_nz / nprocs; - int nleft_z = realspace_nz - minsize_z*nprocs; - - AMREX_ALWAYS_ASSERT(realspace_nz >= nprocs); - // We are going to split realspace_box in such a way that the first - // nleft boxes has minsize_z+1 nodes and the others minsize - // nodes. We do it this way instead of BoxArray::maxSize to make - // sure there are exactly nprocs boxes and there are no overlaps. - amrex::BoxList bl(amrex::IndexType::TheNodeType()); - for (int iproc = 0; iproc < nprocs; ++iproc) { - int zlo, zhi; - if (iproc < nleft_z) { - zlo = iproc*(minsize_z+1); - zhi = zlo + minsize_z; - - } else { - zlo = iproc*minsize_z + nleft_z; - zhi = zlo + minsize_z - 1; - - } - amrex::Box tbx(amrex::IntVect(0,0,zlo),amrex::IntVect(realspace_nx-1,realspace_ny-1,zhi),amrex::IntVect(1)); - - tbx.shift(realspace_box.smallEnd()); - bl.push_back(tbx); - } - realspace_ba.define(std::move(bl)); - amrex::Vector pmap(nprocs); - std::iota(pmap.begin(), pmap.end(), 0); - dm_global_fft.define(std::move(pmap)); + amrex::ParmParse const pp("ablastr"); + bool do_serial_fft = false; + pp.query("do_serial_fft", do_serial_fft); + if (do_serial_fft) { nprocs = 1; }; } -#endif - - // Allocate required arrays - amrex::MultiFab tmp_rho = amrex::MultiFab(realspace_ba, dm_global_fft, 1, 0); - tmp_rho.setVal(0); - amrex::MultiFab tmp_G = amrex::MultiFab(realspace_ba, dm_global_fft, 1, 0); - tmp_G.setVal(0); - - BL_PROFILE_VAR_START(timer_pcopies); - // Copy from rho to tmp_rho - tmp_rho.ParallelCopy( rho, 0, 0, 1, amrex::IntVect::TheZeroVector(), amrex::IntVect::TheZeroVector() ); - BL_PROFILE_VAR_STOP(timer_pcopies); - -#if !defined(ABLASTR_USE_HEFFTE) - // Without distributed FFTs (i.e. without heFFTe): - // We loop over the original box (not the 2x wider one), and the other quadrants by periodicity - amrex::BoxArray const& igf_compute_box = amrex::BoxArray( domain ); -#else - // With distributed FFTs (i.e. with heFFTe): - // We loop over the full 2x wider box, since 1 MPI rank does not necessarily own the data for the other quadrants - amrex::BoxArray const& igf_compute_box = tmp_G.boxArray(); -#endif - - // Compute the integrated Green function -#ifdef AMREX_USE_OMP -#pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) -#endif - for (amrex::MFIter mfi(igf_compute_box, dm_global_fft, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi) { - - amrex::Box const bx = mfi.tilebox(); - amrex::IntVect const lo = realspace_box.smallEnd(); - amrex::IntVect const hi = realspace_box.bigEnd(); - - // Fill values of the Green function - amrex::Real const dx = cell_size[0]; - amrex::Real const dy = cell_size[1]; - amrex::Real const dz = cell_size[2]; - - amrex::Array4 const tmp_G_arr = tmp_G.array(mfi); - amrex::ParallelFor( bx, - [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept - { - int const i0 = i - lo[0]; - int const j0 = j - lo[1]; - int const k0 = k - lo[2]; - amrex::Real const x = i0*dx; - amrex::Real const y = j0*dy; - amrex::Real const z = k0*dz; - -#if !defined(ABLASTR_USE_HEFFTE) - // Without distributed FFTs (i.e. without heFFTe): - amrex::Real const G_value = SumOfIntegratedPotential(x , y , z , dx, dy, dz); - tmp_G_arr(i,j,k) = G_value; - // Fill the rest of the array by periodicity - if (i0>0) {tmp_G_arr(hi[0]+1-i0, j , k ) = G_value;} - if (j0>0) {tmp_G_arr(i , hi[1]+1-j0, k ) = G_value;} - if (k0>0) {tmp_G_arr(i , j , hi[2]+1-k0) = G_value;} - if ((i0>0)&&(j0>0)) {tmp_G_arr(hi[0]+1-i0, hi[1]+1-j0, k ) = G_value;} - if ((j0>0)&&(k0>0)) {tmp_G_arr(i , hi[1]+1-j0, hi[2]+1-k0) = G_value;} - if ((i0>0)&&(k0>0)) {tmp_G_arr(hi[0]+1-i0, j , hi[2]+1-k0) = G_value;} - if ((i0>0)&&(j0>0)&&(k0>0)) {tmp_G_arr(hi[0]+1-i0, hi[1]+1-j0, hi[2]+1-k0) = G_value;} -#else - // With distributed FFTs (i.e. with heFFTe): - amrex::Real x_hi = dx*(hi[0]+2); - amrex::Real y_hi = dy*(hi[1]+2); - amrex::Real z_hi = dz*(hi[2]+2); - if ((i0< nx)&&(j0< ny)&&(k0< nz)) { tmp_G_arr(i,j,k) = SumOfIntegratedPotential(x , y , z , dx, dy, dz); } - if ((i0< nx)&&(j0> ny)&&(k0< nz)) { tmp_G_arr(i,j,k) = SumOfIntegratedPotential(x , y_hi-y, z , dx, dy, dz); } - if ((i0< nx)&&(j0< ny)&&(k0> nz)) { tmp_G_arr(i,j,k) = SumOfIntegratedPotential(x , y , z_hi-z, dx, dy, dz); } - if ((i0> nx)&&(j0> ny)&&(k0< nz)) { tmp_G_arr(i,j,k) = SumOfIntegratedPotential(x_hi-x, y_hi-y, z , dx, dy, dz); } - if ((i0< nx)&&(j0> ny)&&(k0> nz)) { tmp_G_arr(i,j,k) = SumOfIntegratedPotential(x , y_hi-y, z_hi-z, dx, dy, dz); } - if ((i0> nx)&&(j0< ny)&&(k0> nz)) { tmp_G_arr(i,j,k) = SumOfIntegratedPotential(x_hi-x, y , z_hi-z, dx, dy, dz); } - if ((i0> nx)&&(j0> ny)&&(k0> nz)) { tmp_G_arr(i,j,k) = SumOfIntegratedPotential(x_hi-x, y_hi-y, z_hi-z, dx, dy, dz); } - if ((i0> nx)&&(j0< ny)&&(k0< nz)) { tmp_G_arr(i,j,k) = SumOfIntegratedPotential(x_hi-x, y , z , dx, dy, dz); } -#endif - } - ); + static std::unique_ptr> obc_solver; + if (!obc_solver) { + amrex::ExecOnFinalize([&] () { obc_solver.reset(); }); } - - // Prepare to perform global FFT - // Since there is 1 MPI rank per box, here each MPI rank obtains its local box and the associated boxid - const int local_boxid = amrex::ParallelDescriptor::MyProc(); // because of how we made the DistributionMapping - if (local_boxid < realspace_ba.size()) { - // When not using heFFTe, there is only one box (the global box) - // It is taken care of my MPI rank 0 ; other ranks have no work (hence the if condition) - - const amrex::Box local_nodal_box = realspace_ba[local_boxid]; - amrex::Box local_box(local_nodal_box.smallEnd(), local_nodal_box.bigEnd()); - local_box.shift(-realspace_box.smallEnd()); // This simplifies the setup because the global lo is zero now - // Since we the domain decompostion is in the z-direction, setting up c_local_box is simple. - amrex::Box c_local_box = local_box; - c_local_box.setBig(0, local_box.length(0)/2+1); - - // Allocate array in spectral space - using SpectralField = amrex::BaseFab< amrex::GpuComplex< amrex::Real > > ; - SpectralField tmp_rho_fft(c_local_box, 1, amrex::The_Device_Arena()); - SpectralField tmp_G_fft(c_local_box, 1, amrex::The_Device_Arena()); - tmp_rho_fft.shift(realspace_box.smallEnd()); - tmp_G_fft.shift(realspace_box.smallEnd()); - - // Create FFT plans - BL_PROFILE_VAR_START(timer_plans); -#if !defined(ABLASTR_USE_HEFFTE) - const amrex::IntVect fft_size = realspace_ba[local_boxid].length(); - ablastr::math::anyfft::FFTplan forward_plan_rho = ablastr::math::anyfft::CreatePlan( - fft_size, tmp_rho[local_boxid].dataPtr(), - reinterpret_cast(tmp_rho_fft.dataPtr()), - ablastr::math::anyfft::direction::R2C, AMREX_SPACEDIM); - ablastr::math::anyfft::FFTplan forward_plan_G = ablastr::math::anyfft::CreatePlan( - fft_size, tmp_G[local_boxid].dataPtr(), - reinterpret_cast(tmp_G_fft.dataPtr()), - ablastr::math::anyfft::direction::R2C, AMREX_SPACEDIM); - ablastr::math::anyfft::FFTplan backward_plan = ablastr::math::anyfft::CreatePlan( - fft_size, tmp_G[local_boxid].dataPtr(), - reinterpret_cast( tmp_G_fft.dataPtr()), - ablastr::math::anyfft::direction::C2R, AMREX_SPACEDIM); -#elif defined(ABLASTR_USE_HEFFTE) -#if defined(AMREX_USE_CUDA) - heffte::fft3d_r2c fft -#elif defined(AMREX_USE_HIP) - heffte::fft3d_r2c fft -#else - heffte::fft3d_r2c fft -#endif - ({{local_box.smallEnd(0), local_box.smallEnd(1), local_box.smallEnd(2)}, - {local_box.bigEnd(0), local_box.bigEnd(1), local_box.bigEnd(2)}}, - {{c_local_box.smallEnd(0), c_local_box.smallEnd(1), c_local_box.smallEnd(2)}, - {c_local_box.bigEnd(0), c_local_box.bigEnd(1), c_local_box.bigEnd(2)}}, - 0, amrex::ParallelDescriptor::Communicator()); - using heffte_complex = typename heffte::fft_output::type; - heffte_complex* rho_fft_data = (heffte_complex*) tmp_rho_fft.dataPtr(); - heffte_complex* G_fft_data = (heffte_complex*) tmp_G_fft.dataPtr(); -#endif - BL_PROFILE_VAR_STOP(timer_plans); - - // Perform forward FFTs - BL_PROFILE_VAR_START(timer_ffts); -#if !defined(ABLASTR_USE_HEFFTE) - ablastr::math::anyfft::Execute(forward_plan_rho); - ablastr::math::anyfft::Execute(forward_plan_G); -#elif defined(ABLASTR_USE_HEFFTE) - fft.forward(tmp_rho[local_boxid].dataPtr(), rho_fft_data); - fft.forward(tmp_G[local_boxid].dataPtr(), G_fft_data); -#endif - BL_PROFILE_VAR_STOP(timer_ffts); - - // Multiply tmp_G_fft and tmp_rho_fft in spectral space - // Store the result in-place in Gtmp_G_fft, to save memory - tmp_G_fft.template mult(tmp_rho_fft, 0, 0, 1); - amrex::Gpu::streamSynchronize(); - - // Perform backward FFT - BL_PROFILE_VAR_START(timer_ffts); -#if !defined(ABLASTR_USE_HEFFTE) - ablastr::math::anyfft::Execute(backward_plan); -#elif defined(ABLASTR_USE_HEFFTE) - fft.backward(G_fft_data, tmp_G[local_boxid].dataPtr()); -#endif - BL_PROFILE_VAR_STOP(timer_ffts); - -#if !defined(ABLASTR_USE_HEFFTE) - // Loop to destroy FFT plans - ablastr::math::anyfft::DestroyPlan(forward_plan_G); - ablastr::math::anyfft::DestroyPlan(forward_plan_rho); - ablastr::math::anyfft::DestroyPlan(backward_plan); -#endif + if (!obc_solver || obc_solver->Domain() != domain) { + amrex::FFT::Info info{}; + if (do_2d_fft) { info.setBatchMode(true); } + info.setNumProcs(nprocs); + obc_solver = std::make_unique>(domain, info); } - // Normalize, since (FFT + inverse FFT) results in a factor N - const amrex::Real normalization = 1._rt / realspace_box.numPts(); - tmp_G.mult( normalization ); - - BL_PROFILE_VAR_START(timer_pcopies); - // Copy from tmp_G to phi - phi.ParallelCopy( tmp_G, 0, 0, 1, amrex::IntVect::TheZeroVector(), phi.nGrowVect()); - BL_PROFILE_VAR_STOP(timer_pcopies); + auto const& lo = domain.smallEnd(); + amrex::Real const dx = cell_size[0]; + amrex::Real const dy = cell_size[1]; + amrex::Real const dz = cell_size[2]; + + obc_solver->setGreensFunction( + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> amrex::Real + { + int const i0 = i - lo[0]; + int const j0 = j - lo[1]; + int const k0 = k - lo[2]; + amrex::Real const x = i0*dx; + amrex::Real const y = j0*dy; + amrex::Real const z = k0*dz; + return SumOfIntegratedPotential(x, y, z, dx, dy, dz); + }); + + obc_solver->solve(phi, rho); } } // namespace ablastr::fields diff --git a/Tools/machines/desktop/spack-macos-openmp.yaml b/Tools/machines/desktop/spack-macos-openmp.yaml index 3ea78625b78..820cf7069fd 100644 --- a/Tools/machines/desktop/spack-macos-openmp.yaml +++ b/Tools/machines/desktop/spack-macos-openmp.yaml @@ -23,7 +23,6 @@ spack: - conduit ~fortran - fftw - hdf5 ~fortran - - heffte ~cuda +fftw - lapackpp ~cuda ~rocm ^blaspp ~cuda +openmp ~rocm - mpi - llvm-openmp diff --git a/Tools/machines/desktop/spack-ubuntu-cuda.yaml b/Tools/machines/desktop/spack-ubuntu-cuda.yaml index 19b9ae12e24..08d0c95ee4b 100644 --- a/Tools/machines/desktop/spack-ubuntu-cuda.yaml +++ b/Tools/machines/desktop/spack-ubuntu-cuda.yaml @@ -25,7 +25,6 @@ spack: - cuda - fftw - hdf5 - - heffte - lapackpp - mpi - pkgconfig diff --git a/Tools/machines/desktop/spack-ubuntu-openmp.yaml b/Tools/machines/desktop/spack-ubuntu-openmp.yaml index 1eb7d4074a7..b658f1e009d 100644 --- a/Tools/machines/desktop/spack-ubuntu-openmp.yaml +++ b/Tools/machines/desktop/spack-ubuntu-openmp.yaml @@ -22,7 +22,6 @@ spack: - ecp-data-vis-sdk +adios2 +ascent +hdf5 +sensei - fftw - hdf5 - - heffte ~cuda +fftw - lapackpp ~cuda ~rocm ^blaspp ~cuda +openmp ~rocm - mpi - pkgconfig diff --git a/Tools/machines/desktop/spack-ubuntu-rocm.yaml b/Tools/machines/desktop/spack-ubuntu-rocm.yaml index 7eee1baa13c..45c9b0f776e 100644 --- a/Tools/machines/desktop/spack-ubuntu-rocm.yaml +++ b/Tools/machines/desktop/spack-ubuntu-rocm.yaml @@ -21,7 +21,6 @@ spack: - cmake - ecp-data-vis-sdk +adios2 +ascent +hdf5 +sensei - hdf5 - - heffte - hip - lapackpp - llvm-amdgpu diff --git a/Tools/machines/lonestar6-tacc/install_a100_dependencies.sh b/Tools/machines/lonestar6-tacc/install_a100_dependencies.sh index cd29664a978..fd3a2d3f756 100755 --- a/Tools/machines/lonestar6-tacc/install_a100_dependencies.sh +++ b/Tools/machines/lonestar6-tacc/install_a100_dependencies.sh @@ -96,45 +96,6 @@ CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B ${build_dir}/lap cmake --build ${build_dir}/lapackpp-a100-build --target install --parallel 16 rm -rf ${build_dir}/lapackpp-a100-build -# heFFTe -if [ -d $HOME/src/heffte ] -then - cd $HOME/src/heffte - git fetch --prune - git checkout v2.4.0 - cd - -else - git clone -b v2.4.0 https://github.com/icl-utk-edu/heffte.git ${HOME}/src/heffte -fi -rm -rf ${HOME}/src/heffte-a100-build -cmake \ - -S ${HOME}/src/heffte \ - -B ${build_dir}/heffte-a100-build \ - -DBUILD_SHARED_LIBS=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON \ - -DCMAKE_INSTALL_PREFIX=${SW_DIR}/heffte-2.4.0 \ - -DHeffte_DISABLE_GPU_AWARE_MPI=OFF \ - -DHeffte_ENABLE_AVX=OFF \ - -DHeffte_ENABLE_AVX512=OFF \ - -DHeffte_ENABLE_FFTW=OFF \ - -DHeffte_ENABLE_CUDA=ON \ - -DHeffte_ENABLE_ROCM=OFF \ - -DHeffte_ENABLE_ONEAPI=OFF \ - -DHeffte_ENABLE_MKL=OFF \ - -DHeffte_ENABLE_DOXYGEN=OFF \ - -DHeffte_SEQUENTIAL_TESTING=OFF \ - -DHeffte_ENABLE_TESTING=OFF \ - -DHeffte_ENABLE_TRACING=OFF \ - -DHeffte_ENABLE_PYTHON=OFF \ - -DHeffte_ENABLE_FORTRAN=OFF \ - -DHeffte_ENABLE_SWIG=OFF \ - -DHeffte_ENABLE_MAGMA=OFF -cmake --build ${build_dir}/heffte-a100-build --target install --parallel 16 -rm -rf ${build_dir}/heffte-a100-build - - # Python ###################################################################### # python3 -m pip install --upgrade pip diff --git a/Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example b/Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example index 148299f281c..57c98da9b4a 100644 --- a/Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example +++ b/Tools/machines/lonestar6-tacc/lonestar6_warpx_a100.profile.example @@ -20,13 +20,11 @@ export CMAKE_PREFIX_PATH=${SW_DIR}/c-blosc-1.21.1:${CMAKE_PREFIX_PATH} export CMAKE_PREFIX_PATH=${SW_DIR}/adios2-2.8.3:${CMAKE_PREFIX_PATH} export CMAKE_PREFIX_PATH=${SW_DIR}/blaspp-2024.05.31:${CMAKE_PREFIX_PATH} export CMAKE_PREFIX_PATH=${SW_DIR}/lapackpp-2024.05.31:${CMAKE_PREFIX_PATH} -export CMAKE_PREFIX_PATH=${SW_DIR}/heffte-2.4.0:${CMAKE_PREFIX_PATH} export LD_LIBRARY_PATH=${SW_DIR}/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${SW_DIR}/adios2-2.8.3/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${SW_DIR}/blaspp-2024.05.31/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${SW_DIR}/lapackpp-2024.05.31/lib64:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=${SW_DIR}/heffte-2.4.0/lib64:$LD_LIBRARY_PATH export PATH=${SW_DIR}/adios2-2.8.3/bin:${PATH} diff --git a/Tools/machines/perlmutter-nersc/install_cpu_dependencies.sh b/Tools/machines/perlmutter-nersc/install_cpu_dependencies.sh index 437300b8303..7608cb3f666 100755 --- a/Tools/machines/perlmutter-nersc/install_cpu_dependencies.sh +++ b/Tools/machines/perlmutter-nersc/install_cpu_dependencies.sh @@ -107,45 +107,6 @@ CXX=$(which CC) CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B cmake --build ${build_dir}/lapackpp-pm-cpu-build --target install --parallel 16 rm -rf ${build_dir}/lapackpp-pm-cpu-build -# heFFTe -if [ -d $HOME/src/heffte ] -then - cd $HOME/src/heffte - git fetch --prune - git checkout v2.4.0 - cd - -else - git clone -b v2.4.0 https://github.com/icl-utk-edu/heffte.git ${HOME}/src/heffte -fi -rm -rf ${HOME}/src/heffte-pm-cpu-build -cmake \ - -S ${HOME}/src/heffte \ - -B ${build_dir}/heffte-pm-cpu-build \ - -DBUILD_SHARED_LIBS=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON \ - -DCMAKE_INSTALL_PREFIX=${SW_DIR}/heffte-2.4.0 \ - -DHeffte_DISABLE_GPU_AWARE_MPI=ON \ - -DHeffte_ENABLE_AVX=ON \ - -DHeffte_ENABLE_AVX512=OFF \ - -DHeffte_ENABLE_FFTW=ON \ - -DHeffte_ENABLE_CUDA=OFF \ - -DHeffte_ENABLE_ROCM=OFF \ - -DHeffte_ENABLE_ONEAPI=OFF \ - -DHeffte_ENABLE_MKL=OFF \ - -DHeffte_ENABLE_DOXYGEN=OFF \ - -DHeffte_SEQUENTIAL_TESTING=OFF \ - -DHeffte_ENABLE_TESTING=OFF \ - -DHeffte_ENABLE_TRACING=OFF \ - -DHeffte_ENABLE_PYTHON=OFF \ - -DHeffte_ENABLE_FORTRAN=OFF \ - -DHeffte_ENABLE_SWIG=OFF \ - -DHeffte_ENABLE_MAGMA=OFF -cmake --build ${build_dir}/heffte-pm-cpu-build --target install --parallel 16 -rm -rf ${build_dir}/heffte-pm-cpu-build - - # Python ###################################################################### # python3 -m pip install --upgrade pip diff --git a/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh b/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh index c77f075a3a8..d08ca7457d4 100755 --- a/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh +++ b/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh @@ -107,49 +107,6 @@ CXX=$(which CC) CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B cmake --build ${build_dir}/lapackpp-pm-gpu-build --target install --parallel 16 rm -rf ${build_dir}/lapackpp-pm-gpu-build -# heFFTe -if [ -d $HOME/src/heffte ] -then - cd $HOME/src/heffte - git fetch --prune - git checkout v2.4.0 - cd - -else - git clone -b v2.4.0 https://github.com/icl-utk-edu/heffte.git ${HOME}/src/heffte -fi -rm -rf ${HOME}/src/heffte-pm-gpu-build -cmake \ - -S ${HOME}/src/heffte \ - -B ${build_dir}/heffte-pm-gpu-build \ - -DBUILD_SHARED_LIBS=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON \ - -DCMAKE_INSTALL_PREFIX=${SW_DIR}/heffte-2.4.0 \ - -DHeffte_DISABLE_GPU_AWARE_MPI=OFF \ - -DHeffte_ENABLE_AVX=OFF \ - -DHeffte_ENABLE_AVX512=OFF \ - -DHeffte_ENABLE_FFTW=OFF \ - -DHeffte_ENABLE_CUDA=ON \ - -DHeffte_ENABLE_ROCM=OFF \ - -DHeffte_ENABLE_ONEAPI=OFF \ - -DHeffte_ENABLE_MKL=OFF \ - -DHeffte_ENABLE_DOXYGEN=OFF \ - -DHeffte_SEQUENTIAL_TESTING=OFF \ - -DHeffte_ENABLE_TESTING=OFF \ - -DHeffte_ENABLE_TRACING=OFF \ - -DHeffte_ENABLE_PYTHON=OFF \ - -DHeffte_ENABLE_FORTRAN=OFF \ - -DHeffte_ENABLE_SWIG=OFF \ - -DHeffte_ENABLE_MAGMA=OFF -cmake --build ${build_dir}/heffte-pm-gpu-build --target install --parallel 16 -rm -rf ${build_dir}/heffte-pm-gpu-build - -# work-around for heFFTe 2.4.0 bug with NVCC -# https://github.com/icl-utk-edu/heffte/pull/54 -sed -i 's/__AVX__/NOTDEFINED_DONOTUSE/g' ${SW_DIR}/heffte-2.4.0/include/stock_fft/heffte_stock_vec_types.h - - # Python ###################################################################### # python3 -m pip install --upgrade pip diff --git a/Tools/machines/perlmutter-nersc/perlmutter_cpu_warpx.profile.example b/Tools/machines/perlmutter-nersc/perlmutter_cpu_warpx.profile.example index 94d598abf5b..99817924ad6 100644 --- a/Tools/machines/perlmutter-nersc/perlmutter_cpu_warpx.profile.example +++ b/Tools/machines/perlmutter-nersc/perlmutter_cpu_warpx.profile.example @@ -19,13 +19,11 @@ export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/c-blosc-1.21.1 export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/adios2-2.8.3:$CMAKE_PREFIX_PATH export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/blaspp-2024.05.31:$CMAKE_PREFIX_PATH export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/lapackpp-2024.05.31:$CMAKE_PREFIX_PATH -export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/heffte-2.4.0:$CMAKE_PREFIX_PATH export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/adios2-2.8.3/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/blaspp-2024.05.31/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/lapackpp-2024.05.31/lib64:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/heffte-2.4.0/lib64:$LD_LIBRARY_PATH export PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/adios2-2.8.3/bin:${PATH} diff --git a/Tools/machines/perlmutter-nersc/perlmutter_gpu_warpx.profile.example b/Tools/machines/perlmutter-nersc/perlmutter_gpu_warpx.profile.example index da1d55964d1..1e5325e29b9 100644 --- a/Tools/machines/perlmutter-nersc/perlmutter_gpu_warpx.profile.example +++ b/Tools/machines/perlmutter-nersc/perlmutter_gpu_warpx.profile.example @@ -23,13 +23,11 @@ export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/c-blosc-1.2 export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/adios2-2.8.3:$CMAKE_PREFIX_PATH export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/blaspp-2024.05.31:$CMAKE_PREFIX_PATH export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/lapackpp-2024.05.31:$CMAKE_PREFIX_PATH -export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/heffte-2.4.0:$CMAKE_PREFIX_PATH export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/adios2-2.8.3/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/blaspp-2024.05.31/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/lapackpp-2024.05.31/lib64:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/heffte-2.4.0/lib64:$LD_LIBRARY_PATH export PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/adios2-2.8.3/bin:${PATH} diff --git a/Tools/machines/tioga-llnl/install_mi300a_dependencies.sh b/Tools/machines/tioga-llnl/install_mi300a_dependencies.sh index 7e002838e4a..95633549698 100644 --- a/Tools/machines/tioga-llnl/install_mi300a_dependencies.sh +++ b/Tools/machines/tioga-llnl/install_mi300a_dependencies.sh @@ -143,48 +143,6 @@ cmake \ --parallel ${build_procs} rm -rf ${build_dir}/lapackpp-tioga-mi300a-build -# heFFTe -if [ -d ${SRC_DIR}/heffte ] -then - cd ${SRC_DIR}/heffte - git fetch --prune - git checkout v2.4.0 - cd - -else - git clone -b v2.4.0 https://github.com/icl-utk-edu/heffte.git ${SRC_DIR}/heffte -fi -cmake \ - --fresh \ - -S ${SRC_DIR}/heffte \ - -B ${build_dir}/heffte-build \ - -DBUILD_SHARED_LIBS=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON \ - -DCMAKE_INSTALL_PREFIX=${SW_DIR}/heffte-2.4.0 \ - -DHeffte_DISABLE_GPU_AWARE_MPI=OFF \ - -DHeffte_ENABLE_AVX=OFF \ - -DHeffte_ENABLE_AVX512=OFF \ - -DHeffte_ENABLE_FFTW=OFF \ - -DHeffte_ENABLE_CUDA=OFF \ - -DHeffte_ENABLE_ROCM=ON \ - -DHeffte_ENABLE_ONEAPI=OFF \ - -DHeffte_ENABLE_MKL=OFF \ - -DHeffte_ENABLE_DOXYGEN=OFF \ - -DHeffte_SEQUENTIAL_TESTING=OFF \ - -DHeffte_ENABLE_TESTING=OFF \ - -DHeffte_ENABLE_TRACING=OFF \ - -DHeffte_ENABLE_PYTHON=OFF \ - -DHeffte_ENABLE_FORTRAN=OFF \ - -DHeffte_ENABLE_SWIG=OFF \ - -DHeffte_ENABLE_MAGMA=OFF -cmake \ - --build ${build_dir}/heffte-build \ - --target install \ - --parallel ${build_procs} -rm -rf ${build_dir}/heffte-build - - # Python ###################################################################### # # sometimes, the Lassen PIP Index is down diff --git a/Tools/machines/tioga-llnl/tioga_mi300a_warpx.profile.example b/Tools/machines/tioga-llnl/tioga_mi300a_warpx.profile.example index e3da37c5522..53fe21844c1 100644 --- a/Tools/machines/tioga-llnl/tioga_mi300a_warpx.profile.example +++ b/Tools/machines/tioga-llnl/tioga_mi300a_warpx.profile.example @@ -31,13 +31,11 @@ export CMAKE_PREFIX_PATH=${SW_DIR}/c-blosc-2.15.1:$CMAKE_PREFIX_PATH export CMAKE_PREFIX_PATH=${SW_DIR}/adios2-2.10.1:$CMAKE_PREFIX_PATH export CMAKE_PREFIX_PATH=${SW_DIR}/blaspp-2024.05.31:$CMAKE_PREFIX_PATH export CMAKE_PREFIX_PATH=${SW_DIR}/lapackpp-2024.05.31:$CMAKE_PREFIX_PATH -export CMAKE_PREFIX_PATH=${SW_DIR}/heffte-2.4.0:$CMAKE_PREFIX_PATH export LD_LIBRARY_PATH=${SW_DIR}/c-blosc-2.15.1/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${SW_DIR}/adios2-2.10.1/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${SW_DIR}/blaspp-2024.05.31/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${SW_DIR}/lapackpp-2024.05.31/lib64:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=${SW_DIR}/heffte-2.4.0/lib64:$LD_LIBRARY_PATH export PATH=${SW_DIR}/adios2-2.10.1/bin:${PATH} diff --git a/cmake/WarpXFunctions.cmake b/cmake/WarpXFunctions.cmake index 43efd89efc5..543d0cd0ce4 100644 --- a/cmake/WarpXFunctions.cmake +++ b/cmake/WarpXFunctions.cmake @@ -313,10 +313,6 @@ function(set_warpx_binary_name D) set_property(TARGET ${tgt} APPEND_STRING PROPERTY OUTPUT_NAME ".FFT") endif() - if(WarpX_HEFFTE) - set_property(TARGET ${tgt} APPEND_STRING PROPERTY OUTPUT_NAME ".HEFFTE") - endif() - if(WarpX_EB) set_property(TARGET ${tgt} APPEND_STRING PROPERTY OUTPUT_NAME ".EB") endif() @@ -462,7 +458,6 @@ function(warpx_print_summary) message(" PARTICLE PRECISION: ${WarpX_PARTICLE_PRECISION}") message(" PRECISION: ${WarpX_PRECISION}") message(" FFT Solvers: ${WarpX_FFT}") - message(" heFFTe: ${WarpX_HEFFTE}") message(" PYTHON: ${WarpX_PYTHON}") if(WarpX_PYTHON) message(" PYTHON IPO: ${WarpX_PYTHON_IPO}") diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake index e1072d03014..d18d787b000 100644 --- a/cmake/dependencies/AMReX.cmake +++ b/cmake/dependencies/AMReX.cmake @@ -51,6 +51,12 @@ macro(find_amrex) set(AMReX_OMP OFF CACHE INTERNAL "") endif() + if(WarpX_FFT) + set(AMReX_FFT ON CACHE INTERNAL "") + else() + set(AMReX_FFT OFF CACHE INTERNAL "") + endif() + if(WarpX_EB) set(AMReX_EB ON CACHE INTERNAL "") else() @@ -243,6 +249,11 @@ macro(find_amrex) foreach(D IN LISTS WarpX_amrex_dim) set(COMPONENT_DIMS ${COMPONENT_DIMS} ${D}D) endforeach() + if(WarpX_FFT) + set(COMPONENT_FFT FFT) + else() + set(COMPONENT_FFT) + endif() if(WarpX_EB) set(COMPONENT_EB EB) else() @@ -260,7 +271,7 @@ macro(find_amrex) endif() set(COMPONENT_PRECISION ${WarpX_PRECISION} P${WarpX_PARTICLE_PRECISION}) - find_package(AMReX 24.11 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_CATALYST} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS) + find_package(AMReX 294b6fee6f0c7f44693eac14e6b0c0702ecfd791 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_CATALYST} ${COMPONENT_DIMS} ${COMPONENT_EB} ${COMPONENT_FFT} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS) # note: TINYP skipped because user-configured and optional # AMReX CMake helper scripts @@ -283,7 +294,7 @@ set(WarpX_amrex_src "" set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git" CACHE STRING "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)") -set(WarpX_amrex_branch "4b703fec6c2ff983e465c8cef0cc4947231edb07" +set(WarpX_amrex_branch "294b6fee6f0c7f44693eac14e6b0c0702ecfd791" CACHE STRING "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)") diff --git a/setup.py b/setup.py index fc99b75f2f0..cdb8a6d844e 100644 --- a/setup.py +++ b/setup.py @@ -105,7 +105,6 @@ def build_extension(self, ext): "-DWarpX_PRECISION=" + WARPX_PRECISION, "-DWarpX_PARTICLE_PRECISION=" + WARPX_PARTICLE_PRECISION, "-DWarpX_FFT:BOOL=" + WARPX_FFT, - "-DWarpX_HEFFTE:BOOL=" + WARPX_HEFFTE, "-DWarpX_PYTHON:BOOL=ON", "-DWarpX_PYTHON_IPO:BOOL=" + WARPX_PYTHON_IPO, "-DWarpX_QED:BOOL=" + WARPX_QED, @@ -208,7 +207,6 @@ def build_extension(self, ext): WARPX_PRECISION = env.pop("WARPX_PRECISION", "DOUBLE") WARPX_PARTICLE_PRECISION = env.pop("WARPX_PARTICLE_PRECISION", WARPX_PRECISION) WARPX_FFT = env.pop("WARPX_FFT", "OFF") -WARPX_HEFFTE = env.pop("WARPX_HEFFTE", "OFF") WARPX_QED = env.pop("WARPX_QED", "ON") WARPX_QED_TABLE_GEN = env.pop("WARPX_QED_TABLE_GEN", "OFF") WARPX_DIMS = env.pop("WARPX_DIMS", "1;2;RZ;3") From ec81a311c4f94c91fdfc3ce75dae33d52d44b57f Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 14 Nov 2024 09:05:20 -0800 Subject: [PATCH 2/5] Update AMReX --- .github/workflows/cuda.yml | 2 +- cmake/dependencies/AMReX.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index b8ff4804e96..9f0f16f44e9 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -126,7 +126,7 @@ jobs: which nvcc || echo "nvcc not in PATH!" git clone https://github.com/AMReX-Codes/amrex.git ../amrex - cd ../amrex && git checkout --detach 294b6fee6f0c7f44693eac14e6b0c0702ecfd791 && cd - + cd ../amrex && git checkout --detach 0165b6743355d52718d1a6fe03e24876a811a202 && cd - make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_FFT=TRUE USE_CCACHE=TRUE -j 4 ccache -s diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake index d18d787b000..d41ae38cb9a 100644 --- a/cmake/dependencies/AMReX.cmake +++ b/cmake/dependencies/AMReX.cmake @@ -271,7 +271,7 @@ macro(find_amrex) endif() set(COMPONENT_PRECISION ${WarpX_PRECISION} P${WarpX_PARTICLE_PRECISION}) - find_package(AMReX 294b6fee6f0c7f44693eac14e6b0c0702ecfd791 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_CATALYST} ${COMPONENT_DIMS} ${COMPONENT_EB} ${COMPONENT_FFT} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS) + find_package(AMReX 0165b6743355d52718d1a6fe03e24876a811a202 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_CATALYST} ${COMPONENT_DIMS} ${COMPONENT_EB} ${COMPONENT_FFT} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS) # note: TINYP skipped because user-configured and optional # AMReX CMake helper scripts @@ -294,7 +294,7 @@ set(WarpX_amrex_src "" set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git" CACHE STRING "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)") -set(WarpX_amrex_branch "294b6fee6f0c7f44693eac14e6b0c0702ecfd791" +set(WarpX_amrex_branch "0165b6743355d52718d1a6fe03e24876a811a202" CACHE STRING "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)") From 92a649d7eb7e8f8021c19a342a66a966c2dab23f Mon Sep 17 00:00:00 2001 From: Remi Lehe Date: Thu, 14 Nov 2024 08:53:07 -0800 Subject: [PATCH 3/5] Update checksum --- .../test_3d_open_bc_poisson_solver.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Regression/Checksum/benchmarks_json/test_3d_open_bc_poisson_solver.json b/Regression/Checksum/benchmarks_json/test_3d_open_bc_poisson_solver.json index af9ab3a0bdd..80561aaa4e1 100644 --- a/Regression/Checksum/benchmarks_json/test_3d_open_bc_poisson_solver.json +++ b/Regression/Checksum/benchmarks_json/test_3d_open_bc_poisson_solver.json @@ -1,19 +1,19 @@ { "lev=0": { - "Bx": 100915933.446046, + "Bx": 100915933.44604117, "By": 157610622.18548763, - "Bz": 2.76973993530483e-13, - "Ex": 4.725065270619211e+16, - "Ey": 3.0253948989388292e+16, + "Bz": 9.614441087794229e-14, + "Ex": 4.725065270619209e+16, + "Ey": 3.025394898938681e+16, "Ez": 3276573.9514776673, "rho": 10994013582437.193 }, "electron": { - "particle_momentum_x": 5.701277606055763e-19, - "particle_momentum_y": 3.6504516636842883e-19, + "particle_momentum_x": 5.7012776060557455e-19, + "particle_momentum_y": 3.650451663685222e-19, "particle_momentum_z": 1.145432768297242e-10, "particle_position_x": 17.314086912497864, - "particle_position_y": 0.25836912671877965, + "particle_position_y": 0.25836912671877954, "particle_position_z": 10066.329600000008, "particle_weight": 19969036501.910976 } From f9b3699e2a0910bdd1c329f3761126681abf4e88 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 15 Nov 2024 09:26:01 -0800 Subject: [PATCH 4/5] Add ablastr.nprocs_igf_fft --- Source/ablastr/fields/IntegratedGreenFunctionSolver.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Source/ablastr/fields/IntegratedGreenFunctionSolver.cpp b/Source/ablastr/fields/IntegratedGreenFunctionSolver.cpp index 6cfb8328bdd..b142978c8be 100644 --- a/Source/ablastr/fields/IntegratedGreenFunctionSolver.cpp +++ b/Source/ablastr/fields/IntegratedGreenFunctionSolver.cpp @@ -48,14 +48,11 @@ computePhiIGF ( amrex::MultiFab const & rho, // Do we grow the domain in the z-direction in the 2D mode? bool const do_2d_fft = false; - // Specify the number of processes for FFT. Can be any posistive number - // including 1. int nprocs = amrex::ParallelDescriptor::NProcs(); { - amrex::ParmParse const pp("ablastr"); - bool do_serial_fft = false; - pp.query("do_serial_fft", do_serial_fft); - if (do_serial_fft) { nprocs = 1; }; + amrex::ParmParse pp("ablastr"); + pp.queryAdd("nprocs_igf_fft", nprocs); + nprocs = std::max(1,std::min(nprocs, amrex::ParallelDescriptor::NProcs())); } static std::unique_ptr> obc_solver; From 99c92d2b7fcb5448e9fad3eddc7e66b7ef83c7b1 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sat, 16 Nov 2024 20:02:27 -0800 Subject: [PATCH 5/5] Update AMReX --- .github/workflows/cuda.yml | 2 +- cmake/dependencies/AMReX.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 9f0f16f44e9..8d40aba553c 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -126,7 +126,7 @@ jobs: which nvcc || echo "nvcc not in PATH!" git clone https://github.com/AMReX-Codes/amrex.git ../amrex - cd ../amrex && git checkout --detach 0165b6743355d52718d1a6fe03e24876a811a202 && cd - + cd ../amrex && git checkout --detach 456c93c7d9512f1cdffac0574973d7df41417898 && cd - make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_FFT=TRUE USE_CCACHE=TRUE -j 4 ccache -s diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake index d41ae38cb9a..491e333d712 100644 --- a/cmake/dependencies/AMReX.cmake +++ b/cmake/dependencies/AMReX.cmake @@ -271,7 +271,7 @@ macro(find_amrex) endif() set(COMPONENT_PRECISION ${WarpX_PRECISION} P${WarpX_PARTICLE_PRECISION}) - find_package(AMReX 0165b6743355d52718d1a6fe03e24876a811a202 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_CATALYST} ${COMPONENT_DIMS} ${COMPONENT_EB} ${COMPONENT_FFT} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS) + find_package(AMReX 456c93c7d9512f1cdffac0574973d7df41417898 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_CATALYST} ${COMPONENT_DIMS} ${COMPONENT_EB} ${COMPONENT_FFT} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS) # note: TINYP skipped because user-configured and optional # AMReX CMake helper scripts @@ -294,7 +294,7 @@ set(WarpX_amrex_src "" set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git" CACHE STRING "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)") -set(WarpX_amrex_branch "0165b6743355d52718d1a6fe03e24876a811a202" +set(WarpX_amrex_branch "456c93c7d9512f1cdffac0574973d7df41417898" CACHE STRING "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)")