Skip to content

Commit

Permalink
update the Perlmutter build and runscripts
Browse files Browse the repository at this point in the history
  • Loading branch information
liuyangzhuan committed Feb 24, 2025
1 parent 26ee4df commit 1a7d46b
Show file tree
Hide file tree
Showing 19 changed files with 107 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
module load PrgEnv-gnu
# module load gcc/11.2.0
module load cmake
module load cudatoolkit/12.2
module load cudatoolkit
# avoid bug in cray-libsci/21.08.1.2
# module load cray-libsci/22.11.1.2
module load cray-libsci/23.12.5
module load cray-libsci
# module use /global/common/software/nersc/pe/modulefiles/latest
ulimit -s unlimited
#MPI settings:
Expand Down Expand Up @@ -37,7 +37,7 @@ export SUPERLU_MPI_PROCESS_PER_GPU=$nmpipergpu # 2: this can better saturate GPU

# ##NVSHMEM settings:
# module load nvshmem/2.11.0
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=${MPICH_DIR}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,25 @@
#
#modules:
module load PrgEnv-gnu
# module load gcc/11.2.0
module load cmake
module load cudatoolkit/12.2
# avoid bug in cray-libsci/21.08.1.2
# module load cray-libsci/22.11.1.2
module load cray-libsci/23.12.5
module load cudatoolkit
module load cray-libsci
# module use /global/common/software/nersc/pe/modulefiles/latest
ulimit -s unlimited
#MPI settings:
export MPICH_GPU_SUPPORT_ENABLED=1
# export MPICH_GPU_SUPPORT_ENABLED=1
export CRAY_ACCEL_TARGET=nvidia80
echo MPICH_GPU_SUPPORT_ENABLED=$MPICH_GPU_SUPPORT_ENABLED
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:$LD_LIBRARY_PATH
#SUPERLU settings:


# Problem setting:
dims=(4 4 4 8)
bs=64



#SUPERLU settings:
export SUPERLU_LBS=GD
export SUPERLU_ACC_OFFLOAD=1 # this can be 0 to do CPU tests on GPU nodes
export GPU3DVERSION=0
Expand All @@ -26,9 +29,9 @@ export NEW3DSOLVE=1
export NEW3DSOLVETREECOMM=1
export SUPERLU_BIND_MPI_GPU=1 # assign GPU based on the MPI rank, assuming one MPI per GPU

# the supernode size doesn't need to specified when options.SolveOnly is used
export SUPERLU_MAXSUP=1 # max supernode size
export SUPERLU_RELAX=1 # upper bound for relaxed supernode size
# the supernode size has to be the same as -bs when options.SolveOnly is used
export SUPERLU_MAXSUP=${bs} # max supernode size
export SUPERLU_RELAX=${bs} # upper bound for relaxed supernode size
export SUPERLU_MAX_BUFFER_SIZE=10000000 ## 500000000 # buffer size in words on GPU
export SUPERLU_NUM_LOOKAHEADS=2 ##4, must be at least 2, see 'lookahead winSize'
export SUPERLU_NUM_GPU_STREAMS=1
Expand All @@ -39,7 +42,7 @@ export SUPERLU_RANKORDER='XY' # Be careful: XY needs to be used when NOROWPERM

# ##NVSHMEM settings:
# module load nvshmem/2.11.0
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=${MPICH_DIR}
Expand Down Expand Up @@ -72,14 +75,15 @@ else
exit $EXIT_HOST
fi

nprows=(2)
nprows=(1)
npcols=(1)
npz=(2)
npz=(4)
nrhs=(1)
NTH=1
NREP=1
# NODE_VAL_TOT=1


for ((i = 0; i < ${#npcols[@]}; i++)); do
NROW=${nprows[i]}
NCOL=${npcols[i]}
Expand Down Expand Up @@ -151,11 +155,24 @@ do
# export SUPERLU_ACC_SOLVE=1
# srun -n $NCORE_VAL_TOT2D -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive -c $NCOL -r $NROW -b $batch $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}_${NTH}_1rhs_2d_gpu_${SUPERLU_ACC_OFFLOAD}_nmpipergpu${nmpipergpu}

SUPERLU_ACC_OFFLOAD=0
# SUPERLU_ACC_OFFLOAD=0
# export GPU3DVERSION=0
# export SUPERLU_ACC_SOLVE=0
# echo "srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}"
# srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}_nmpipergpu${nmpipergpu}


SUPERLU_ACC_OFFLOAD=1
export GPU3DVERSION=0
export SUPERLU_ACC_SOLVE=0
echo "srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}"
srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d -c $NCOL -r $NROW -d $NPZ -b $batch -i 0 -s $NRHS $CFS/m2957/liuyangz/my_research/matrix/$MAT | tee ./$MAT/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}_nmpipergpu${nmpipergpu}
export SUPERLU_ACC_SOLVE=1
dim0=${dims[0]}
dim1=${dims[1]}
dim2=${dims[2]}
dim3=${dims[3]}
mkdir -p qcd
srun -n $NCORE_VAL_TOT -c $TH_PER_RANK --cpu_bind=cores ./EXAMPLE/pzdrive3d_qcd -bs=${bs} -dim="${dim0} ${dim1} ${dim2} ${dim3}" -grid="${NROW} ${NCOL} ${NPZ}" | tee ./qcd/SLU.o_mpi_${NROW}x${NCOL}x${NPZ}_${OMP_NUM_THREADS}_3d_newest_gpusolve_${SUPERLU_ACC_SOLVE}_nrhs_${NRHS}_gpu_${SUPERLU_ACC_OFFLOAD}_cpp_${GPU3DVERSION}_nmpipergpu${nmpipergpu}_bs${bs}_dims${dim0}_${dim1}_${dim2}_${dim3}



# SUPERLU_ACC_OFFLOAD=1
# export GPU3DVERSION=1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
module load PrgEnv-gnu
# module load gcc/11.2.0
module load cmake
module load cudatoolkit/12.2
module load cudatoolkit
module unload cray-libsci
# module use /global/common/software/nersc/pe/modulefiles/latest

Expand Down Expand Up @@ -37,7 +37,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU

# ##NVSHMEM settings:
# module load nvshmem/2.11.0
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=${MPICH_DIR}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
module load PrgEnv-gnu
# module load gcc/11.2.0
module load cmake
module load cudatoolkit/12.2
module load cudatoolkit
module unload cray-libsci
# module use /global/common/software/nersc/pe/modulefiles/latest

Expand Down Expand Up @@ -40,7 +40,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU

# ##NVSHMEM settings:
# module load nvshmem/2.11.0
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=${MPICH_DIR}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU

# ##NVSHMEM settings:
# module load nvshmem/2.11.0
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=${MPICH_DIR}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU

# ##NVSHMEM settings:
# module load nvshmem/2.11.0
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=${MPICH_DIR}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export SUPERLU_N_GEMM=6000 # FLOPS threshold divide workload between CPU and GPU

# ##NVSHMEM settings:
# module load nvshmem/2.11.0
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=${MPICH_DIR}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ export SUPERLU_ACC_SOLVE=1

# ##NVSHMEM settings:
# module load nvshmem/2.11.0
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=${MPICH_DIR}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#modules:
module load PrgEnv-gnu
module load cmake
module load cudatoolkit/12.2
module load cray-libsci/23.12.5
module load cudatoolkit
module load cray-libsci
module load python/3.11
ulimit -s unlimited
#MPI settings:
Expand Down Expand Up @@ -53,7 +53,7 @@ export SUPERLU_MPI_PROCESS_PER_GPU=$nmpipergpu # nmpipergpu>1 can better saturat
## The following is NVSHMEM settings for multi-GPU trisolve
#################################################
# module load nvshmem/2.11.0
NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
export NVSHMEM_USE_GDRCOPY=1
export NVSHMEM_MPI_SUPPORT=1
export MPI_HOME=${MPICH_DIR}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ module unload cray-libsci
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.5\/compat:/}
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/}

NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
#NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/
cmake .. \
-DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \
Expand Down
10 changes: 5 additions & 5 deletions example_scripts/run_cmake_build_perlmutter_gcc_nogpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
module unload gpu
#module load PrgEnv-gnu
#module load gcc/11.2.0
module load cmake/3.24.3
module load cmake
#module load cudatoolkit/11.7

parmetis_dir=/global/cfs/cdirs/m3894/tpl/install/parmetis/parmetis-4.0.3/n9-gcc11.2.0
Expand All @@ -23,16 +23,16 @@ cmake .. \
-DTPL_ENABLE_INTERNAL_BLASLIB=OFF \
-DTPL_ENABLE_LAPACKLIB=ON \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/default/GNU/9.1/x86_64/lib/libsci_gnu_82_mp.so \
-DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/default/GNU/9.1/x86_64/lib/libsci_gnu_82_mp.so \
-DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \
-DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \
-DBUILD_SHARED_LIBS=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=.

make pddrive
make pddrive3d
make f_pddrive

make pzdrive3d_qcd


# -DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/include;/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/metis/include" \
Expand Down
21 changes: 11 additions & 10 deletions example_scripts/run_cmake_build_perlmutter_gcc_nvshmem.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,22 @@
module load PrgEnv-gnu
# module load gcc/11.2.0
module load cmake
module load cudatoolkit/12.2
module load cudatoolkit
# avoid bug in cray-libsci/21.08.1.2
# module load cray-libsci/22.11.1.2
module load cray-libsci/23.12.5
module load cray-libsci
# module use /global/common/software/nersc/pe/modulefiles/latest
# module load nvshmem/2.11.0
export MAGMA_ROOT=/global/cfs/cdirs/m2957/lib/magma_nopiv
# avoid bug in cudatoolkit
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/}
# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/}
# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/}

NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
#NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/
cmake .. \
-DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \
-DCMAKE_CXX_FLAGS="-O2" \
-DCMAKE_CXX_FLAGS="-O2 -std=c++11" \
-DCMAKE_Fortran_FLAGS="-O2" \
-DCMAKE_CXX_COMPILER=CC \
-DCMAKE_C_COMPILER=cc \
Expand All @@ -55,18 +55,18 @@ cmake .. \
-DTPL_ENABLE_LAPACKLIB=ON \
-DBUILD_SHARED_LIBS=ON \
-DTPL_ENABLE_CUDALIB=ON \
-DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \
-DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \
-DCMAKE_CUDA_ARCHITECTURES=80 \
-DCMAKE_INSTALL_PREFIX=. \
-DCMAKE_INSTALL_LIBDIR=./lib \
-DCMAKE_BUILD_TYPE=Debug \
-DTPL_ENABLE_MAGMALIB=ON \
-DTPL_MAGMA_INCLUDE_DIRS="${MAGMA_ROOT}/include" \
-DTPL_MAGMA_LIBRARIES="${MAGMA_ROOT}/lib/libmagma.so" \
-DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \
-DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \
-DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/include;/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/metis/include" \
-DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.so" \
-DTPL_BLAS_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \
-DTPL_LAPACK_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \
-DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/include;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/metis/include" \
-DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3/build/Linux-x86_64/libmetis/libmetis.so" \
-DTPL_ENABLE_COMBBLASLIB=OFF \
-DTPL_ENABLE_NVSHMEM=ON \
-DTPL_NVSHMEM_LIBRARIES="-L${CUDA_HOME}/lib64/stubs/ -lnvidia-ml -L/usr/lib64 -lgdrapi -lstdc++ -L/opt/cray/libfabric/1.20.1/lib64 -lfabric -L${NVSHMEM_HOME}/lib -lnvshmem" \
Expand All @@ -79,6 +79,7 @@ make pddrive -j16
make pddrive3d -j16
make pzdrive3d -j16
make f_pddrive
make pzdrive3d_qcd

## -DTPL_BLAS_LIBRARIES=/global/cfs/cdirs/m3894/ptlin/tpl/amd_blis/install/amd_blis-20211021-n9-gcc9.3.0/lib/libblis.a \

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ module load cray-libsci
export MAGMA_ROOT=/global/cfs/cdirs/m2957/lib/magma_nopiv

# avoid bug in cudatoolkit
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/}
# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/}
# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/}

NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
#NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/
cmake .. \
-DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \
-DCMAKE_CXX_FLAGS="-O2" \
-DCMAKE_CXX_FLAGS="-O2 -std=c++11" \
-DCMAKE_Fortran_FLAGS="-O2" \
-DCMAKE_CXX_COMPILER=CC \
-DCMAKE_C_COMPILER=cc \
Expand All @@ -57,18 +57,18 @@ cmake .. \
-DTPL_ENABLE_LAPACKLIB=ON \
-DBUILD_SHARED_LIBS=ON \
-DTPL_ENABLE_CUDALIB=ON \
-DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \
-DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \
-DCMAKE_CUDA_ARCHITECTURES=80 \
-DCMAKE_INSTALL_PREFIX=. \
-DCMAKE_INSTALL_LIBDIR=./lib \
-DCMAKE_BUILD_TYPE=Debug \
-DTPL_ENABLE_MAGMALIB=ON \
-DTPL_MAGMA_INCLUDE_DIRS="${MAGMA_ROOT}/include" \
-DTPL_MAGMA_LIBRARIES="${MAGMA_ROOT}/lib/libmagma.so" \
-DTPL_BLAS_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \
-DTPL_LAPACK_LIBRARIES=/opt/cray/pe/libsci/23.12.5/GNU/12.3/x86_64/lib/libsci_gnu_123_mp.so \
-DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/include;/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/metis/include" \
-DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/liuyangz/my_software/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libmetis/libmetis.so" \
-DTPL_BLAS_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \
-DTPL_LAPACK_LIBRARIES=$CRAY_LIBSCI_PREFIX/lib/libsci_gnu_mp.so \
-DTPL_PARMETIS_INCLUDE_DIRS="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/include;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/metis/include" \
-DTPL_PARMETIS_LIBRARIES="/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libparmetis/libparmetis.so;/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/parmetis-4.0.3-gnu-longint/build/Linux-x86_64/libmetis/libmetis.so" \
-DTPL_ENABLE_COMBBLASLIB=OFF \
-DTPL_ENABLE_NVSHMEM=ON \
-DTPL_NVSHMEM_LIBRARIES="-L${CUDA_HOME}/lib64/stubs/ -lnvidia-ml -L/usr/lib64 -lgdrapi -lstdc++ -L/opt/cray/libfabric/1.20.1/lib64 -lfabric -L${NVSHMEM_HOME}/lib -lnvshmem" \
Expand All @@ -84,5 +84,6 @@ cmake .. \
make pddrive -j16
make pddrive3d -j16
make f_pddrive
make pzdrive3d_qcd

## -DTPL_BLAS_LIBRARIES=/global/cfs/cdirs/m3894/ptlin/tpl/amd_blis/install/amd_blis-20211021-n9-gcc9.3.0/lib/libblis.a \
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ module unload cray-libsci
# module load nvshmem/2.11.0

# avoid bug in cudatoolkit
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.2\/compat:/}
# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-12.4\/compat:/}
# export LD_LIBRARY_PATH=${LD_LIBRARY_PATH//\/usr\/local\/cuda-11.7\/compat:/}

NVSHMEM_HOME=/global/cfs/cdirs/m3894/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
NVSHMEM_HOME=/global/cfs/cdirs/m2957/lib/lib/PrgEnv-gnu/nvshmem_src_2.8.0-3/build/
#NVSHMEM_HOME=${CRAY_NVIDIA_PREFIX}/comm_libs/nvshmem/
cmake .. \
-DCMAKE_C_FLAGS="-O2 -std=c11 -DPRNTlevel=0 -DPROFlevel=0 -DDEBUGlevel=0 -DAdd_" \
Expand All @@ -56,7 +56,7 @@ cmake .. \
-DTPL_ENABLE_LAPACKLIB=ON \
-DBUILD_SHARED_LIBS=ON \
-DTPL_ENABLE_CUDALIB=ON \
-DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=/opt/cray/pe/craype/2.7.30/bin/CC" \
-DCMAKE_CUDA_FLAGS="-I${NVSHMEM_HOME}/include -I${MPICH_DIR}/include -ccbin=CC" \
-DCMAKE_CUDA_ARCHITECTURES=80 \
-DCMAKE_INSTALL_PREFIX=. \
-DCMAKE_INSTALL_LIBDIR=./lib \
Expand Down
Loading

0 comments on commit 1a7d46b

Please sign in to comment.