diff --git a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem.sh b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem.sh index 26dcd2cc..7c564443 100755 --- a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem.sh +++ b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem.sh @@ -4,10 +4,10 @@ module load PrgEnv-gnu # module load gcc/11.2.0 module load cmake -module load cudatoolkit/12.2 +module load cudatoolkit # avoid bug in cray-libsci/21.08.1.2 # module load cray-libsci/22.11.1.2 -module load cray-libsci/23.12.5 +module load cray-libsci # module use /global/common/software/nersc/pe/modulefiles/latest ulimit -s unlimited #MPI settings: @@ -37,7 +37,7 @@ export SUPERLU_MPI_PROCESS_PER_GPU=$nmpipergpu # 2: this can better saturate GPU # ##NVSHMEM settings: # module load nvshmem/2.11.0 -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ export NVSHMEM_USE_GDRCOPY=1 export NVSHMEM_MPI_SUPPORT=1 export MPI_HOME=${MPICH_DIR} diff --git a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_complex.sh b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_complex.sh index 8e6fc638..a64d1b47 100755 --- a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_complex.sh +++ b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_complex.sh @@ -2,22 +2,25 @@ # #modules: module load PrgEnv-gnu -# module load gcc/11.2.0 module load cmake -module load cudatoolkit/12.2 -# avoid bug in cray-libsci/21.08.1.2 -# module load cray-libsci/22.11.1.2 -module load cray-libsci/23.12.5 +module load cudatoolkit +module load cray-libsci # module use /global/common/software/nersc/pe/modulefiles/latest ulimit -s unlimited #MPI settings: -export MPICH_GPU_SUPPORT_ENABLED=1 +# export MPICH_GPU_SUPPORT_ENABLED=1 export CRAY_ACCEL_TARGET=nvidia80 echo MPICH_GPU_SUPPORT_ENABLED=$MPICH_GPU_SUPPORT_ENABLED export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:$LD_LIBRARY_PATH -#SUPERLU settings: +# Problem setting: +dims=(4 4 4 8) +bs=64 + + + +#SUPERLU settings: export SUPERLU_LBS=GD export SUPERLU_ACC_OFFLOAD=1 # this can be 0 to do CPU tests on GPU nodes export GPU3DVERSION=0 @@ -26,9 +29,9 @@ export NEW3DSOLVE=1 export NEW3DSOLVETREECOMM=1 export SUPERLU_BIND_MPI_GPU=1 # assign GPU based on the MPI rank, assuming one MPI per GPU -# the supernode size doesn't need to specified when options.SolveOnly is used -export SUPERLU_MAXSUP=1 # max supernode size -export SUPERLU_RELAX=1 # upper bound for relaxed supernode size +# the supernode size has to be the same as -bs when options.SolveOnly is used +export SUPERLU_MAXSUP=${bs} # max supernode size +export SUPERLU_RELAX=${bs} # upper bound for relaxed supernode size export SUPERLU_MAX_BUFFER_SIZE=10000000 ## 500000000 # buffer size in words on GPU export SUPERLU_NUM_LOOKAHEADS=2 ##4, must be at least 2, see 'lookahead winSize' export SUPERLU_NUM_GPU_STREAMS=1 @@ -39,7 +42,7 @@ export SUPERLU_RANKORDER='XY' # Be careful: XY needs to be used when NOROWPERM # ##NVSHMEM settings: # module load nvshmem/2.11.0 -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ export NVSHMEM_USE_GDRCOPY=1 export NVSHMEM_MPI_SUPPORT=1 export MPI_HOME=${MPICH_DIR} @@ -72,14 +75,15 @@ else exit $EXIT_HOST fi -nprows=(2) +nprows=(1) npcols=(1) -npz=(2) +npz=(4) nrhs=(1) NTH=1 NREP=1 # NODE_VAL_TOT=1 + for ((i = 0; i < ${#npcols[@]}; i++)); do NROW=${nprows[i]} NCOL=${npcols[i]} @@ -151,11 +155,24 @@ do # export SUPERLU_ACC_SOLVE=1 # srun -n $NCORE_VAL_TOT2D -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive -c $NCOL -r $NROW -b $batch $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}_${NTH}_1rhs_2d_gpu_${SUPERLU_ACC_OFFLOAD}_nmpipergpu${nmpipergpu} -SUPERLU_ACC_OFFLOAD=0 +# SUPERLU_ACC_OFFLOAD=0 +# export GPU3DVERSION=0 +# export SUPERLU_ACC_SOLVE=0 +# echo "srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}" +# srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}_nmpipergpu${nmpipergpu} + + +SUPERLU_ACC_OFFLOAD=1 export GPU3DVERSION=0 -export SUPERLU_ACC_SOLVE=0 -echo "srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}" -srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}_nmpipergpu${nmpipergpu} +export SUPERLU_ACC_SOLVE=1 +dim0=${dims[0]} +dim1=${dims[1]} +dim2=${dims[2]} +dim3=${dims[3]} +mkdir -p qcd +srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d_qcd -bs=${bs} -dim="${dim0} ${dim1} ${dim2} ${dim3}" -grid="${NROW} ${NCOL} ${NPZ}" | tee ./qcd/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}_nmpipergpu${nmpipergpu}_bs${bs}_dims${dim0}_${dim1}_${dim2}_${dim3} + + # SUPERLU_ACC_OFFLOAD=1 # export GPU3DVERSION=1 diff --git a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_complex_openblas.sh b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_complex_openblas.sh index aa611e6f..bcf706d0 100755 --- a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_complex_openblas.sh +++ b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_complex_openblas.sh @@ -4,7 +4,7 @@ module load PrgEnv-gnu # module load gcc/11.2.0 module load cmake -module load cudatoolkit/12.2 +module load cudatoolkit module unload cray-libsci # module use /global/common/software/nersc/pe/modulefiles/latest @@ -37,7 +37,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU # ##NVSHMEM settings: # module load nvshmem/2.11.0 -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ export NVSHMEM_USE_GDRCOPY=1 export NVSHMEM_MPI_SUPPORT=1 export MPI_HOME=${MPICH_DIR} diff --git a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_openblas.sh b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_openblas.sh index cbd27ecf..1d0f6845 100755 --- a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_openblas.sh +++ b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_gcc_nvshmem_openblas.sh @@ -4,7 +4,7 @@ module load PrgEnv-gnu # module load gcc/11.2.0 module load cmake -module load cudatoolkit/12.2 +module load cudatoolkit module unload cray-libsci # module use /global/common/software/nersc/pe/modulefiles/latest @@ -40,7 +40,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU # ##NVSHMEM settings: # module load nvshmem/2.11.0 -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ export NVSHMEM_USE_GDRCOPY=1 export NVSHMEM_MPI_SUPPORT=1 export MPI_HOME=${MPICH_DIR} diff --git a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem.sh b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem.sh index 3525da47..a302c117 100755 --- a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem.sh +++ b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem.sh @@ -45,7 +45,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU # ##NVSHMEM settings: # module load nvshmem/2.11.0 -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ export NVSHMEM_USE_GDRCOPY=1 export NVSHMEM_MPI_SUPPORT=1 export MPI_HOME=${MPICH_DIR} diff --git a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem_complex.sh b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem_complex.sh index bf959033..1586e491 100755 --- a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem_complex.sh +++ b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem_complex.sh @@ -45,7 +45,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU # ##NVSHMEM settings: # module load nvshmem/2.11.0 -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ export NVSHMEM_USE_GDRCOPY=1 export NVSHMEM_MPI_SUPPORT=1 export MPI_HOME=${MPICH_DIR} diff --git a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem_single.sh b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem_single.sh index a4d038ef..fd8eadb4 100755 --- a/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem_single.sh +++ b/example_scripts/batch_script_mpi_runit_perlmutter_3dsolve_nvidia_nvshmem_single.sh @@ -45,7 +45,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU # ##NVSHMEM settings: # module load nvshmem/2.11.0 -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ export NVSHMEM_USE_GDRCOPY=1 export NVSHMEM_MPI_SUPPORT=1 export MPI_HOME=${MPICH_DIR} diff --git a/example_scripts/batch_script_mpi_runit_perlmutter_nvidia_nvshmem.sh b/example_scripts/batch_script_mpi_runit_perlmutter_nvidia_nvshmem.sh index 284c36dd..9575a5ea 100755 --- a/example_scripts/batch_script_mpi_runit_perlmutter_nvidia_nvshmem.sh +++ b/example_scripts/batch_script_mpi_runit_perlmutter_nvidia_nvshmem.sh @@ -34,7 +34,7 @@ export SUPERLU_ACC_SOLVE=1 # ##NVSHMEM settings: # module load nvshmem/2.11.0 -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ export NVSHMEM_USE_GDRCOPY=1 export NVSHMEM_MPI_SUPPORT=1 export MPI_HOME=${MPICH_DIR} diff --git a/example_scripts/batch_script_mpi_runit_perlmutter_python_gcc_nvshmem.sh b/example_scripts/batch_script_mpi_runit_perlmutter_python_gcc_nvshmem.sh index b95c7467..51a4ed4a 100755 --- a/example_scripts/batch_script_mpi_runit_perlmutter_python_gcc_nvshmem.sh +++ b/example_scripts/batch_script_mpi_runit_perlmutter_python_gcc_nvshmem.sh @@ -13,8 +13,8 @@ #modules: module load PrgEnv-gnu module load cmake -module load cudatoolkit/12.2 -module load cray-libsci/23.12.5 +module load cudatoolkit +module load cray-libsci module load python/3.11 ulimit -s unlimited #MPI settings: @@ -53,7 +53,7 @@ export SUPERLU_MPI_PROCESS_PER_GPU=$nmpipergpu # nmpipergpu>1 can better saturat ## The following is NVSHMEM settings for multi-GPU trisolve ################################################# # module load nvshmem/2.11.0 -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ export NVSHMEM_USE_GDRCOPY=1 export NVSHMEM_MPI_SUPPORT=1 export MPI_HOME=${MPICH_DIR} diff --git a/example_scripts/run_cmake_build_perlmutter_gcc_gpu_openblas.sh b/example_scripts/run_cmake_build_perlmutter_gcc_gpu_openblas.sh index 36a3e751..606032ca 100755 --- a/example_scripts/run_cmake_build_perlmutter_gcc_gpu_openblas.sh +++ b/example_scripts/run_cmake_build_perlmutter_gcc_gpu_openblas.sh @@ -41,7 +41,7 @@ module unload cray-libsci export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.5\/compat:/} export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/} -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ #NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/ cmake .. \ -DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \ diff --git a/example_scripts/run_cmake_build_perlmutter_gcc_nogpu.sh b/example_scripts/run_cmake_build_perlmutter_gcc_nogpu.sh index f7928375..5e29a894 100755 --- a/example_scripts/run_cmake_build_perlmutter_gcc_nogpu.sh +++ b/example_scripts/run_cmake_build_perlmutter_gcc_nogpu.sh @@ -4,7 +4,7 @@ module unload gpu #module load PrgEnv-gnu #module load gcc/11.2.0 -module load cmake/3.24.3 +module load cmake #module load cudatoolkit/11.7 parmetis_dir=/global/cfs/cdirs/m3894/tpl/install/parmetis/parmetis-4.0.3/n9-gcc11.2.0 @@ -23,16 +23,16 @@ cmake .. \ -DTPL_ENABLE_INTERNAL_BLASLIB=OFF \ -DTPL_ENABLE_LAPACKLIB=ON \ -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \ - -DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/default/GNU/9.1/x86_64/lib/libsci_gnu_82_mp.so \ - -DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/default/GNU/9.1/x86_64/lib/libsci_gnu_82_mp.so \ + -DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ + -DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ -DBUILD_SHARED_LIBS=OFF \ - -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX=. make pddrive make pddrive3d make f_pddrive - +make pzdrive3d_qcd # -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/include;/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/metis/include" \ diff --git a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem.sh b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem.sh index 2edb42ad..164de6c9 100755 --- a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem.sh +++ b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem.sh @@ -30,22 +30,22 @@ module load PrgEnv-gnu # module load gcc/11.2.0 module load cmake -module load cudatoolkit/12.2 +module load cudatoolkit # avoid bug in cray-libsci/21.08.1.2 # module load cray-libsci/22.11.1.2 -module load cray-libsci/23.12.5 +module load cray-libsci # module use /global/common/software/nersc/pe/modulefiles/latest # module load nvshmem/2.11.0 export MAGMA_ROOT=/global/cfs/cdirs/m2957/lib/magma_nopiv # avoid bug in cudatoolkit -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/} +# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/} # export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/} -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ #NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/ cmake .. \ -DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \ - -DCMAKE_CXX_FLAGS="-O2" \ + -DCMAKE_CXX_FLAGS="-O2 -std=c++11" \ -DCMAKE_Fortran_FLAGS="-O2" \ -DCMAKE_CXX_COMPILER=CC \ -DCMAKE_C_COMPILER=cc \ @@ -55,7 +55,7 @@ cmake .. \ -DTPL_ENABLE_LAPACKLIB=ON \ -DBUILD_SHARED_LIBS=ON \ -DTPL_ENABLE_CUDALIB=ON \ - -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \ + -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \ -DCMAKE_CUDA_ARCHITECTURES=80 \ -DCMAKE_INSTALL_PREFIX=. \ -DCMAKE_INSTALL_LIBDIR=./lib \ @@ -63,10 +63,10 @@ cmake .. \ -DTPL_ENABLE_MAGMALIB=ON \ -DTPL_MAGMA_INCLUDE_DIRS="${MAGMA_ROOT}/include" \ -DTPL_MAGMA_LIBRARIES="${MAGMA_ROOT}/lib/libmagma.so" \ - -DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ - -DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ - -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/include;/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/metis/include" \ - -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.so" \ + -DTPL_BLAS_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \ + -DTPL_LAPACK_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \ + -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/include;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/metis/include" \ + -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.so" \ -DTPL_ENABLE_COMBBLASLIB=OFF \ -DTPL_ENABLE_NVSHMEM=ON \ -DTPL_NVSHMEM_LIBRARIES="-L${CUDA_HOME}/lib64/stubs/ -lnvidia-ml -L/usr/lib64 -lgdrapi -lstdc++ -L/opt/cray/libfabric/1.20.1/lib64 -lfabric -L${NVSHMEM_HOME}/lib -lnvshmem" \ @@ -79,6 +79,7 @@ make pddrive -j16 make pddrive3d -j16 make pzdrive3d -j16 make f_pddrive +make pzdrive3d_qcd ## -DTPL_BLAS_LIBRARIES=/global/cfs/cdirs/m3894/ptlin/tpl/amd_blis/install/amd_blis-20211021-n9-gcc9.3.0/lib/libblis.a \ diff --git a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_longint.sh b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_longint.sh index bb25df84..08870d66 100755 --- a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_longint.sh +++ b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_longint.sh @@ -40,14 +40,14 @@ module load cray-libsci export MAGMA_ROOT=/global/cfs/cdirs/m2957/lib/magma_nopiv # avoid bug in cudatoolkit -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/} +# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/} # export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/} -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ #NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/ cmake .. \ -DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \ - -DCMAKE_CXX_FLAGS="-O2" \ + -DCMAKE_CXX_FLAGS="-O2 -std=c++11" \ -DCMAKE_Fortran_FLAGS="-O2" \ -DCMAKE_CXX_COMPILER=CC \ -DCMAKE_C_COMPILER=cc \ @@ -57,7 +57,7 @@ cmake .. \ -DTPL_ENABLE_LAPACKLIB=ON \ -DBUILD_SHARED_LIBS=ON \ -DTPL_ENABLE_CUDALIB=ON \ - -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \ + -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \ -DCMAKE_CUDA_ARCHITECTURES=80 \ -DCMAKE_INSTALL_PREFIX=. \ -DCMAKE_INSTALL_LIBDIR=./lib \ @@ -65,10 +65,10 @@ cmake .. \ -DTPL_ENABLE_MAGMALIB=ON \ -DTPL_MAGMA_INCLUDE_DIRS="${MAGMA_ROOT}/include" \ -DTPL_MAGMA_LIBRARIES="${MAGMA_ROOT}/lib/libmagma.so" \ - -DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ - -DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ - -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/include;/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/metis/include" \ - -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libmetis/libmetis.so" \ + -DTPL_BLAS_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \ + -DTPL_LAPACK_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \ + -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/include;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/metis/include" \ + -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libmetis/libmetis.so" \ -DTPL_ENABLE_COMBBLASLIB=OFF \ -DTPL_ENABLE_NVSHMEM=ON \ -DTPL_NVSHMEM_LIBRARIES="-L${CUDA_HOME}/lib64/stubs/ -lnvidia-ml -L/usr/lib64 -lgdrapi -lstdc++ -L/opt/cray/libfabric/1.20.1/lib64 -lfabric -L${NVSHMEM_HOME}/lib -lnvshmem" \ @@ -84,5 +84,6 @@ cmake .. \ make pddrive -j16 make pddrive3d -j16 make f_pddrive +make pzdrive3d_qcd ## -DTPL_BLAS_LIBRARIES=/global/cfs/cdirs/m3894/ptlin/tpl/amd_blis/install/amd_blis-20211021-n9-gcc9.3.0/lib/libblis.a \ diff --git a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_openblas.sh b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_openblas.sh index 37724124..3a4b8426 100755 --- a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_openblas.sh +++ b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_openblas.sh @@ -39,10 +39,10 @@ module unload cray-libsci # module load nvshmem/2.11.0 # avoid bug in cudatoolkit -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/} +# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/} # export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/} -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ #NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/ cmake .. \ -DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \ @@ -56,7 +56,7 @@ cmake .. \ -DTPL_ENABLE_LAPACKLIB=ON \ -DBUILD_SHARED_LIBS=ON \ -DTPL_ENABLE_CUDALIB=ON \ - -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \ + -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \ -DCMAKE_CUDA_ARCHITECTURES=80 \ -DCMAKE_INSTALL_PREFIX=. \ -DCMAKE_INSTALL_LIBDIR=./lib \ diff --git a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_python.sh b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_python.sh index d03769cc..57e8644f 100755 --- a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_python.sh +++ b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_python.sh @@ -30,23 +30,23 @@ module load PrgEnv-gnu # module load gcc/11.2.0 module load cmake -module load cudatoolkit/12.2 +module load cudatoolkit # avoid bug in cray-libsci/21.08.1.2 # module load cray-libsci/22.11.1.2 -module load cray-libsci/23.12.5 +module load cray-libsci # module use /global/common/software/nersc/pe/modulefiles/latest # module load nvshmem/2.11.0 module load python/3.11 export MAGMA_ROOT=/global/cfs/cdirs/m2957/lib/magma_nopiv # avoid bug in cudatoolkit -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/} +# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/} # export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/} # export PREFIX_PATH=~/.local/perlmutter/python-3.11/ -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ #NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/ cmake .. \ -DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=1 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \ @@ -60,7 +60,7 @@ cmake .. \ -DTPL_ENABLE_LAPACKLIB=ON \ -DBUILD_SHARED_LIBS=ON \ -DTPL_ENABLE_CUDALIB=ON \ - -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \ + -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \ -DCMAKE_CUDA_ARCHITECTURES=80 \ -DCMAKE_INSTALL_PREFIX=. \ -DCMAKE_INSTALL_LIBDIR=./lib \ @@ -68,10 +68,10 @@ cmake .. \ -DTPL_ENABLE_MAGMALIB=OFF \ -DTPL_MAGMA_INCLUDE_DIRS="${MAGMA_ROOT}/include" \ -DTPL_MAGMA_LIBRARIES="${MAGMA_ROOT}/lib/libmagma.so" \ - -DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ - -DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ - -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/include;/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/metis/include" \ - -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.so" \ + -DTPL_BLAS_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \ + -DTPL_LAPACK_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \ + -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/include;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/metis/include" \ + -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.so" \ -DTPL_ENABLE_COMBBLASLIB=OFF \ -DTPL_ENABLE_NVSHMEM=OFF \ -DTPL_NVSHMEM_LIBRARIES="-L${CUDA_HOME}/lib64/stubs/ -lnvidia-ml -L/usr/lib64 -lgdrapi -lstdc++ -L/opt/cray/libfabric/1.20.1/lib64 -lfabric -L${NVSHMEM_HOME}/lib -lnvshmem" \ diff --git a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_python_longint.sh b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_python_longint.sh index 660fd8cf..2f825b7d 100755 --- a/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_python_longint.sh +++ b/example_scripts/run_cmake_build_perlmutter_gcc_nvshmem_python_longint.sh @@ -30,10 +30,10 @@ module load PrgEnv-gnu # module load gcc/11.2.0 module load cmake -module load cudatoolkit/12.2 +module load cudatoolkit # avoid bug in cray-libsci/21.08.1.2 # module load cray-libsci/22.11.1.2 -module load cray-libsci/23.12.5 +module load cray-libsci # module use /global/common/software/nersc/pe/modulefiles/latest # module load nvshmem/2.11.0 module load python/3.11 @@ -41,13 +41,13 @@ module load python/3.11 export MAGMA_ROOT=/global/cfs/cdirs/m2957/lib/magma_nopiv # avoid bug in cudatoolkit -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/} +# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/} # export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/} # export PREFIX_PATH=~/.local/perlmutter/python-3.11/ -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ #NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/ cmake .. \ -DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=1 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \ @@ -61,7 +61,7 @@ cmake .. \ -DTPL_ENABLE_LAPACKLIB=ON \ -DBUILD_SHARED_LIBS=ON \ -DTPL_ENABLE_CUDALIB=ON \ - -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \ + -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \ -DCMAKE_CUDA_ARCHITECTURES=80 \ -DCMAKE_INSTALL_PREFIX=. \ -DCMAKE_INSTALL_LIBDIR=./lib \ @@ -69,10 +69,10 @@ cmake .. \ -DTPL_ENABLE_MAGMALIB=OFF \ -DTPL_MAGMA_INCLUDE_DIRS="${MAGMA_ROOT}/include" \ -DTPL_MAGMA_LIBRARIES="${MAGMA_ROOT}/lib/libmagma.so" \ - -DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ - -DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \ - -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/include;/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/metis/include" \ - -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libmetis/libmetis.so" \ + -DTPL_BLAS_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \ + -DTPL_LAPACK_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \ + -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/include;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/metis/include" \ + -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libmetis/libmetis.so" \ -DTPL_ENABLE_COMBBLASLIB=OFF \ -DTPL_ENABLE_NVSHMEM=OFF \ -DTPL_NVSHMEM_LIBRARIES="-L${CUDA_HOME}/lib64/stubs/ -lnvidia-ml -L/usr/lib64 -lgdrapi -lstdc++ -L/opt/cray/libfabric/1.20.1/lib64 -lfabric -L${NVSHMEM_HOME}/lib -lnvshmem" \ diff --git a/example_scripts/run_cmake_build_perlmutter_nvhpc_gpu.sh b/example_scripts/run_cmake_build_perlmutter_nvhpc_gpu.sh index d017b91a..66da3c24 100644 --- a/example_scripts/run_cmake_build_perlmutter_nvhpc_gpu.sh +++ b/example_scripts/run_cmake_build_perlmutter_nvhpc_gpu.sh @@ -14,13 +14,13 @@ cmake .. \ -DCMAKE_C_COMPILER=cc \ -DCMAKE_CXX_COMPILER=CC \ -DXSDK_ENABLE_Fortran=ON \ - -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \ + -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \ -DCMAKE_CUDA_ARCHITECTURES=80 \ -DTPL_ENABLE_CUDALIB=TRUE \ -DTPL_CUDA_LIBRARIES="/opt/nvidia/hpc_sdk/Linux_x86_64/23.9/cuda/12.2/targets/x86_64-linux/lib/libcudart.so" \ -DTPL_ENABLE_INTERNAL_BLASLIB=OFF \ -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \ - -DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/NVIDIA/23.3/x86_64/lib/libsci_nvidia_mp.a \ + -DTPL_BLAS_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_nvidia_mp.a \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=. \ diff --git a/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem.sh b/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem.sh index 69abfc8f..3751cad7 100644 --- a/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem.sh +++ b/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem.sh @@ -34,10 +34,10 @@ module load cray-libsci # module load nvshmem/2.11.0 export MAGMA_ROOT=/global/cfs/cdirs/m2957/lib/magma_nopiv # avoid bug in cudatoolkit -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/} +# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/} # export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/} -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ #NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/ cmake .. \ -DCMAKE_C_FLAGS=" -std=c11 -DPRNTlevel=1 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_ -I${NVSHMEM_HOME}/include" \ @@ -49,7 +49,7 @@ cmake .. \ -DTPL_ENABLE_LAPACKLIB=ON \ -DBUILD_SHARED_LIBS=OFF \ -DTPL_ENABLE_CUDALIB=ON \ - -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \ + -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \ -DCMAKE_CUDA_ARCHITECTURES=80 \ -DCMAKE_INSTALL_PREFIX=. \ -DCMAKE_INSTALL_LIBDIR=./lib \ @@ -57,8 +57,8 @@ cmake .. \ -DTPL_ENABLE_MAGMALIB=ON \ -DTPL_MAGMA_INCLUDE_DIRS="${MAGMA_ROOT}/include" \ -DTPL_MAGMA_LIBRARIES="${MAGMA_ROOT}/lib/libmagma.so" \ - -DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/NVIDIA/23.3/x86_64/lib/libsci_nvidia_mp.so \ - -DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/NVIDIA/23.3/x86_64/lib/libsci_nvidia_mp.so \ + -DTPL_BLAS_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_nvidia_mp.so \ + -DTPL_LAPACK_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_nvidia_mp.so \ -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m3894/lib/PrgEnv-nvidia/parmetis-4.0.3/include;/global/cfs/cdirs/m3894/lib/PrgEnv-nvidia/parmetis-4.0.3/metis/include" \ -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m3894/lib/PrgEnv-nvidia/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m3894/lib/PrgEnv-nvidia/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.so" \ -DTPL_ENABLE_COMBBLASLIB=OFF \ diff --git a/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem_longint.sh b/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem_longint.sh index 3b9cd944..3c3ffe11 100644 --- a/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem_longint.sh +++ b/example_scripts/run_cmake_build_perlmutter_nvidia_nvshmem_longint.sh @@ -34,10 +34,10 @@ module load cray-libsci # module load nvshmem/2.11.0 # avoid bug in cudatoolkit -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/} +# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/} # export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/} -NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ +NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/ #NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/ cmake .. \ -DCMAKE_C_FLAGS=" -std=c11 -DPRNTlevel=1 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_ -I${NVSHMEM_HOME}/include" \ @@ -49,13 +49,13 @@ cmake .. \ -DTPL_ENABLE_LAPACKLIB=ON \ -DBUILD_SHARED_LIBS=OFF \ -DTPL_ENABLE_CUDALIB=ON \ - -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \ + -DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \ -DCMAKE_CUDA_ARCHITECTURES=80 \ -DCMAKE_INSTALL_PREFIX=. \ -DCMAKE_INSTALL_LIBDIR=./lib \ -DCMAKE_BUILD_TYPE=Debug \ - -DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/NVIDIA/23.3/x86_64/lib/libsci_nvidia_mp.so \ - -DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/NVIDIA/23.3/x86_64/lib/libsci_nvidia_mp.so \ + -DTPL_BLAS_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_nvidia_mp.so \ + -DTPL_LAPACK_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_nvidia_mp.so \ -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-nvidia-longint/include;/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-nvidia-longint/metis/include" \ -DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-nvidia-longint/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-nvidia-longint/build/Linux-x86_64/libmetis/libmetis.so" \ -DTPL_ENABLE_COMBBLASLIB=OFF \