diff --git a/CMakeLists.txt.orig b/CMakeLists.txt.orig index 72ab72b2..b67bb6ca 100644 --- a/CMakeLists.txt.orig +++ b/CMakeLists.txt.orig @@ -178,25 +178,6 @@ if (TPL_ENABLE_CUDALIB) ## want to use cuda message("-- Enabled support for CUDA.") enable_language(CUDA) find_package(CUDA REQUIRED) -<<<<<<< HEAD - # if (NOT CMAKE_CUDA_FLAGS) - # cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS Auto) - # message("-- Cuda Flags : '${CUDA_ARCH_FLAGS}'") - # endif() -# if (CUDA_FOUND) -# if (NOT CMAKE_CUDA_FLAGS) -# cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS Auto) -# message("-- Cuda Flags : '${CUDA_ARCH_FLAGS}'") -# endif() -# set(CUDA_NVCC_FLAGS_RELEASE "-O3 --expt-relaxed-constexpr -DNDEBUG -g ${CMAKE_CUDA_FLAGS} ${CUDA_ARCH_FLAGS}") -# set(CUDA_NVCC_FLAGS_DEBUG "-O0 --expt-relaxed-constexpr -DDEBUG -g -G ${CMAKE_CUDA_FLAGS} ${CUDA_ARCH_FLAGS}") -# list(APPEND CMAKE_CUDA_FLAGS ${CUDA_ARCH_FLAGS}) -# message("-- Cuda Flags : '${CMAKE_CUDA_FLAGS}'") -# endif() -# # find_package(CUDAToolkit REQUIRED) - - message("-- Cuda libraries at : '${CUDA_LIBRARIES}'") -======= if (CUDA_FOUND) if (NOT CMAKE_CUDA_FLAGS) cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS Auto) @@ -206,7 +187,6 @@ if (TPL_ENABLE_CUDALIB) ## want to use cuda set(CMAKE_CUDA_FLAGS_DDEBUG "-O0 --expt-relaxed-constexpr -DDEBUG -g" CACHE STRING "") endif() ->>>>>>> 1a6d5bad87736c9e6f53530a63dce79856ecd76c # find_package(CUB REQUIRED) find_package(CUDAToolkit REQUIRED) diff --git a/example_scripts/batch_script_mpi_runit_cori_gpu_openmpi4.sh b/example_scripts/batch_script_mpi_runit_cori_gpu_openmpi4.sh index 15925205..248f85b0 100644 --- a/example_scripts/batch_script_mpi_runit_cori_gpu_openmpi4.sh +++ b/example_scripts/batch_script_mpi_runit_cori_gpu_openmpi4.sh @@ -10,31 +10,36 @@ module load cuda/11.1.1 module load openmpi/4.0.3 module load nsight-systems -export OMP_NUM_THREADS=5 -export NUM_GPU_STREAMS=1 -export SUPERLU_ACC_OFFLOAD=1 +export OMP_NUM_THREADS=1 +export MAX_BUFFER_SIZE=50000000 +export SUPERLU_NUM_GPU_STREAMS=1 +export SUPERLU_BIND_MPI_GPU=1 +export SUPERLU_ACC_OFFLOAD=1 # this can be 0 to do CPU tests on GPU nodes +export GPU3DVERSION=1 + # srun -n 1 ./EXAMPLE/pddrive -r 1 -c 1 ../EXAMPLE/g20.rua DIR=$CFS/ntrain9/YangLiu/matrix -export NSUP=128 -export NREL=20 +# export NSUP=128 +# export NREL=20 # export NSUP=256 # export NREL=256 # for MAT in big.rua # for MAT in g4.rua # for MAT in g20.rua -# for MAT in s1_mat_0_253872.bin +for MAT in s1_mat_0_126936.bin # for MAT in torso3.mtx # for MAT in Graphene2880/H.mtx # for MAT in s1_mat_0_126936.bin s1_mat_0_253872.bin s1_mat_0_507744.bin # for MAT in matrix_ACTIVSg70k_AC_00.mtx matrix_ACTIVSg10k_AC_00.mtx -for MAT in temp_13k.mtx temp_25k.mtx temp_75k.mtx +# for MAT in temp_13k.mtx temp_25k.mtx temp_75k.mtx do # srun -n 1 nsys profile --stats=true ./EXAMPLE/pddrive -r 1 -c 1 ../../matrix/$MAT # srun -n 1 ncu -k dlsum_bmod_inv_gpu_mrhs,dlsum_fmod_inv_gpu_mrhs --launch-count 1 --target-processes all ./EXAMPLE/pddrive -r 1 -c 1 ../../matrix/$MAT # srun -n 1 ncu -f -k dlsum_bmod_inv_gpu_mrhs --set full --launch-count 1 --target-processes all -o trisolve_u ./EXAMPLE/pddrive -r 1 -c 1 ../../matrix/$MAT # srun -n 1 ncu -f -k dlsum_fmod_inv_gpu_mrhs --set full --launch-count 1 --target-processes all -o trisolve_l "/project/projectdirs/m2957/liuyangz/my_research/superlu_dist_amd_mergefrom_master_12_01_2021/build/EXAMPLE/pddrive" -r 1 -c 1 ../../matrix/$MAT -srun -n 1 ./EXAMPLE/pddrive -r 1 -c 1 $DIR/$MAT -# srun -n 1 ./EXAMPLE/pddrive3d -r 1 -c 1 -d 1 $DIR/$MAT +srun -n 1 ./EXAMPLE/pddrive3d -r 1 -c 1 -d 1 $DIR/$MAT +# srun -n 1 ./EXAMPLE/pddrive3d -r 1 -c 1 -d 1 ../EXAMPLE/big.rua +# srun -n 1 ./EXAMPLE/pddrive -r 1 -c 1 $DIR/$MAT done diff --git a/example_scripts/run_cmake_build_cori_gpu_openmpi4.sh b/example_scripts/run_cmake_build_cori_gpu_openmpi4.sh index f048f30d..58a86700 100644 --- a/example_scripts/run_cmake_build_cori_gpu_openmpi4.sh +++ b/example_scripts/run_cmake_build_cori_gpu_openmpi4.sh @@ -1,23 +1,18 @@ #!/bin/bash # Bash script to submit many files to Cori/Edison/Queue -module unload cray-mpich -module swap PrgEnv-intel PrgEnv-gnu +module purge export MKLROOT=/opt/intel/compilers_and_libraries_2019.3.199/linux/mkl export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/compilers_and_libraries_2019.3.199/linux/mkl/lib/intel64 # module use /global/common/software/m3169/cori/modulefiles # module unload openmpi -module unload cmake -module load cmake + # module load cudatoolkit module load cgpu -<<<<<<< HEAD -module load cuda/10.2.89 -======= module load cuda/11.1.1 -module swap gcc gcc/8.3.0 ->>>>>>> master +module load gcc/8.3.0 module load openmpi/4.0.3 +module load cmake/3.22.1 # module load cuda # module load openmpi @@ -50,12 +45,10 @@ rm -rf DartConfiguration.tcl # -DCMAKE_CUDA_FLAGS="--disable-warnings -DPRNTlevel=1 -DPROFlevel=0 -DDEBUGlevel=0 -gencode arch=compute_70,code=sm_70 -I/usr/common/software/openmpi/4.0.3/gcc/8.3.0/cuda/10.2.89/include" - cmake .. \ -DCMAKE_BUILD_TYPE=Debug\ -Denable_complex16=OFF\ -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -<<<<<<< HEAD -DCMAKE_C_COMPILER=mpicc \ -DCMAKE_CXX_COMPILER=mpic++ \ -DCMAKE_Fortran_COMPILER=mpif90 \ @@ -97,14 +90,6 @@ cmake .. \ make pddrive make pddrive3d -======= - -DCMAKE_CXX_FLAGS="-Ofast -DRELEASE ${INC_VTUNE} -I${CUDA_ROOT}/include" \ - -DCMAKE_C_FLAGS="-DGPU_SOLVE -std=c11 -DPRNTlevel=1 -DPROFlevel=0 -DDEBUGlevel=0 ${INC_VTUNE} -I${CUDA_ROOT}/include" \ - -DCMAKE_CUDA_FLAGS="-lineinfo --disable-warnings -DPRNTlevel=1 -DPROFlevel=0 -DDEBUGlevel=0 -gencode arch=compute_70,code=sm_70 -I/usr/common/software/openmpi/4.0.3/gcc/8.3.0/cuda/10.2.89/include" -make pddrive -make pddrive3d -# make install ->>>>>>> master # -DTPL_BLAS_LIBRARIES="/opt/intel/compilers_and_libraries_2017.2.174/linux/mkl/lib/intel64/libmkl_intel_lp64.so;/opt/intel/compilers_and_libraries_2017.2.174/linux/mkl/lib/intel64/libmkl_sequential.so;/opt/intel/compilers_and_libraries_2017.2.174/linux/mkl/lib/intel64/libmkl_core.so" # -DTPL_BLAS_LIBRARIES="/opt/intel/compilers_and_libraries_2017.2.174/linux/mkl/lib/intel64/libmkl_intel_lp64.so;/opt/intel/compilers_and_libraries_2017.2.174/linux/mkl/lib/intel64/libmkl_sequential.so;/opt/intel/compilers_and_libraries_2017.2.174/linux/mkl/lib/intel64/libmkl_core.so" \