Skip to content

Commit

Permalink
add summit nvshmem compile and run scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
nanding0701 committed Dec 15, 2022
1 parent 7543a28 commit d438914
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 19 deletions.
101 changes: 101 additions & 0 deletions example_scripts/batch_script_mpi_runit_summit_nvshmem_gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/bin/bash
#BSUB -W 01:00
#BSUB -nnodes 1
#BSUB -alloc_flags nvsolve
#BSUB -J superlu_gpu

EXIT_SUCCESS=0
EXIT_HOST=1
EXIT_PARAM=2

module load essl
module load cmake/

CUR_DIR=`pwd`
FILE_DIR=$CUR_DIR/EXAMPLE
INPUT_DIR=/ccs/home/nanding/myproject/superLU/matrix
FILE_NAME=pddrive
FILE=$FILE_DIR/$FILE_NAME
CPDIR=/ccs/home/nanding/myproject/superLU/nvshmem_new_U/run_nvshmem270_cuda1103_20221212/EXAMPLE
cp $CPDIR/pddrive $CUR_DIR/EXAMPLE/ -rfv

export NVSHMEM_MPI_LIB_NAME=libmpi_ibm.so
export NVSHMEM_LMPI=-lmpi_ibm

nprows=(1 1 1 1 1)
npcols=(1 2 3 6 12)
#matrix=(LU_C_BN_C_2by2.bin) #s1_mat_0_253872.bin) #s1_mat_0_507744.bin Li4244.bin DG_GrapheneDisorder_8192.bin LU_C_BN_C_2by2.bin) #Li4244.bin s1_mat_0_253872.bin)
matrix=(s1_mat_0_253872.bin Li4244.bin) #DG_GrapheneDisorder_8192.bin LU_C_BN_C_2by2.bin) #Li4244.bin s1_mat_0_253872.bin)
export NVSHMEM_HOME=/ccs/home/nanding/mysoftware/nvshmem270_gdr23_cuda1102_11232022
export LD_LIBRARY_PATH=$NVSHMEM_HOME/lib:$LD_LIBRARY_PATH
#export NVSHMEM_BOOTSTRAP_TWO_STAGE=1
#export NVSHMEM_BOOTSTRAP=MPI
MYDATE=$(date '+%Y-%m-%d-%H-%M-%S')
for ((i = 0; i < ${#npcols[@]}; i++)); do
NROW=${nprows[i]}
NCOL=${npcols[i]}

CORE_VAL=`expr $NCOL \* $NROW`
RANK_PER_RS=1
GPU_PER_RANK=1


PARTITION=regular
LICENSE=SCRATCH
TIME=00:20:00

if [[ $NERSC_HOST == edison ]]
then
CONSTRAINT=0
fi

if [[ $NERSC_HOST == cori ]]
then
CONSTRAINT=haswell
fi

for GPU_PER_RANK in 1
do
for NTH in 7
do

RS_VAL=`expr $CORE_VAL / $RANK_PER_RS`
MOD_VAL=`expr $CORE_VAL % $RANK_PER_RS`
if [[ $MOD_VAL -ne 0 ]]
then
RS_VAL=`expr $RS_VAL + 1`
fi
OMP_NUM_THREADS=$NTH
TH_PER_RS=`expr $NTH \* $RANK_PER_RS`
GPU_PER_RS=`expr $RANK_PER_RS \* $GPU_PER_RANK`

for MAT in ${matrix[@]} ##big.rua #A30_015_0_25356.bin
do
export NSUP=256
export NREL=256
export MAX_BUFFER_SIZE=5000000000
export OMP_NUM_THREADS=$OMP_NUM_THREADS
mkdir -p ${MAT}_summit
echo "matrix: ${MAT}, ${NROW} GPUs"
if [[ $RS_VAL -eq 1 ]];then
jsrun -n $RS_VAL -a $RANK_PER_RS -c ALL_CPUS -g ALL_GPUS -brs ./put_block
else
mya=`expr $NCOL \* $NROW`
if [[ $mya -le 6 ]];then
myc=`expr 2 \* $mya` #each nvshmem rank needs 2CPU threads
jsrun -n1 -a${mya} -c${myc} -g${mya} -r1 $FILE -c $NCOL -r $NROW $INPUT_DIR/$MAT |& tee ./${MAT}_summit/SLU.o_mpi_${NROW}x${NCOL}_OMP_${OMP_NUM_THREADS}_GPU_${mya}_${MYDATE}
#jsrun -n1 -a2 -c4 -g2 -r1 $FILE -c 1 -r 2 $INPUT_DIR/$MAT |& tee ./${MAT}_summit/SLU.o_mpi_2x1_OMP_${OMP_NUM_THREADS}_GPU_2_${MYDATE}
#jsrun -n1 -a3 -c6 -g3 -r1 $FILE -c 1 -r 3 $INPUT_DIR/$MAT |& tee ./${MAT}_summit/SLU.o_mpi_3x1_OMP_${OMP_NUM_THREADS}_GPU_3_${MYDATE}
#jsrun -n1 -a6 -c12 -g6 -r1 $FILE -c 1 -r 6 $INPUT_DIR/$MAT |& tee ./${MAT}_summit/SLU.o_mpi_6x1_OMP_${OMP_NUM_THREADS}_GPU_6_${MYDATE}
fi
if [[ $mya -gt 6 ]];then
myn=`expr $mya / 6`
jsrun -n${myn} -a6 -c12 -g6 $FILE -c $NCOL -r $NROW $INPUT_DIR/$MAT |& tee ./${MAT}_summit/SLU.o_mpi_${NROW}x${NCOL}_OMP_${OMP_NUM_THREADS}_GPU_${mya}_${MYDATE}
fi
fi
done ## matrix
done #NTH
done #GPU per RANK
done # npcol
exit $EXIT_SUCCESS

24 changes: 5 additions & 19 deletions example_scripts/run_cmake_build_summit_nvshmem_gpu.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
#!/bin/bash
#module load netlib-lapack/3.8.0
#module load gcc/6.4.0
#module swap xl gcc
module load xl
module load cmake
module load cuda
Expand All @@ -21,18 +18,12 @@ export MPI_HOME=$OLCF_SPECTRUM_MPI_ROOT
export SHMEM_HOME=$MPI_HOME
export NVSHMEM_MPI_LIB_NAME=libmpi_ibm.so
export NVSHMEM_LMPI=-lmpi_ibm
#export NVSHMEM_HOME=/ccs/home/nanding/mysoftware/nvshmem113_gdr_debug
#export NVSHMEM_HOME=/ccs/home/nanding/mysoftware/nvshmem113_gdr
export NVSHMEM_HOME=/ccs/home/nanding/mysoftware/nvshmem203_gdr_cuda1103/
export NVSHMEM_HOME=/ccs/home/nanding/mysoftware/nvshmem270_gdr23_cuda1102_11232022
#export NVSHMEM_HOME=/ccs/home/nanding/mysoftware/nvshmem203_gdr_cuda1103/
export CUDA_INC=$CUDA_INC:$NVSHMEM_HOME/include

export LD_LIBRARY_PATH=$NVSHMEM_HOME/lib:$LD_LIBRARY_PATH

#CXX=g++
CXX=mpiCC
##-qsmp=omp
#-Xcompiler -fPIC -shared -rdc=true
#-DCUDA_SEPARABLE_COMPILATION
cmake .. \
-DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS_ROOT}/include;${OLCF_CUDA_ROOT}/include" \
-DTPL_PARMETIS_LIBRARIES="${PARMETIS_ROOT}/lib/libparmetis.so;${PARMETIS_ROOT}/lib/libmetis.so" \
Expand All @@ -47,15 +38,10 @@ cmake .. \
-DTPL_ENABLE_CUDALIB=ON \
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-DCMAKE_CXX_FLAGS="-qsmp=omp -Ofast -DRELEASE ${INC_VTUNE}" \
-DCMAKE_C_FLAGS="-qsmp=omp -std=c11 -DPRNTlevel=1 -DPROFlevel=0 -DDEBUGlevel=0 -DGPU_ACC -D_USE_NVSHMEM -fopenmp -I${NVSHMEM_HOME}/include/ " \
-DCMAKE_CUDA_FLAGS="-ccbin ${CXX} -gencode arch=compute_70,code=sm_70 -I${NVSHMEM_HOME}/include/ -DENABLE_MPI_SUPPORT -DPRNTlevel=1 -std=c++11 -DPROFlevel=0 -DEBUGlevel=0 -DGPU_ACC --disable-warnings" \
-DCMAKE_EXE_LINKER_FLAGS="-lcuda -lcudadevrt -L${OLCF_CUDA_ROOT}/lib64 -lcudart_static -libverbs -L${NVSHMEM_HOME}/lib/ -lnvshmem -L${OLCF_SPECTRUM_MPI_ROOT}/lib -lmpi_ibm"
-DCMAKE_C_FLAGS="-qsmp=omp -std=c11 -DPRNTlevel=1 -DPROFlevel=0 -DDEBUGlevel=0 -DGPU_ACC -DSLU_HAVE_LAPACK -DGPU_SOLVE -D_USE_NVSHMEM -fopenmp -I${NVSHMEM_HOME}/include/ " \
-DCMAKE_CUDA_FLAGS="-ccbin ${CXX} -gencode arch=compute_70,code=sm_70 -I${NVSHMEM_HOME}/include/ -DENABLE_MPI_SUPPORT -DPRNTlevel=1 -std=c++11 -DPROFlevel=0 -DEBUGlevel=0 -DGPU_ACC -DSLU_HAVE_LAPACK -DGPU_SOLVE --disable-warnings" \
-DCMAKE_EXE_LINKER_FLAGS="-lcuda -lcudadevrt -L${OLCF_CUDA_ROOT}/lib64 -lcudart_static -L${OLCF_CUDA_ROOT}/lib64/stubs/ -lnvidia-ml -libverbs -L${NVSHMEM_HOME}/lib/ -lnvshmem -L${OLCF_SPECTRUM_MPI_ROOT}/lib -lmpi_ibm"
make pddrive
# -DCMAKE_CUDA_FLAGS="-ccbin ${CXX} -gencode=arch=compute_70,code=sm_70 -G -Xcompiler -rdynamic -I$(OMPI_DIR)/include -I${NVSHMEM_HOME}/include/ -DPRNTlevel=1 -std=c++11 -DPROFlevel=0 -DEBUGlevel=0 -DGPU_ACC --disable-warnings" \
# -DCMAKE_EXE_LINKER_FLAGS="-lcuda -L${CUDA_HOME}/lib64 -lcudart -libverbs -L${MPI_HOME}/lib ${MPI_LIBS} -L${NVSHMEM_HOME}/lib/ -lnvshmem -L${CUDA_HOME}/targets/ppc64le-linux/lib/stubs -lnvidia-ml"
# -DCMAKE_CUDA_COMPILER=nvcc \
#-L${OLCF_CUDA_ROOT}/lib64 -lcudadevrt
# -DCMAKE_CUDA_FLAGS="-DPRNTlevel=1 -std=c++11 -DPROFlevel=0 -DDEBUGlevel=0 -DGPU_ACC -gencode arch=compute_70,code=sm_70 -I${NVSHMEM_HOME}/include -I${OLCF_SPECTRUM_MPI_ROOT}/include -D_NVSHMEM_SHMEM_SUPPORT -D_NVSHMEM_MPI_SUPPORT -L${NVSHMEM_HOME}/lib/ -lnvshmem -L${OLCF_SPECTRUM_MPI_ROOT}/lib -lmpi_ibm -loshmem"



Expand Down

0 comments on commit d438914

Please sign in to comment.