diff --git a/MAKE_INC/make.mac-x b/MAKE_INC/make.mac-x index ccef91dc..c835f82c 100644 --- a/MAKE_INC/make.mac-x +++ b/MAKE_INC/make.mac-x @@ -23,7 +23,10 @@ BLASLIB = $(SuperLUroot)/CBLAS/libblas.a # LAPACKLIB = # SLU_HAVE_LAPACK = TRUE -LIBS = $(DSUPERLULIB) ${BLASLIB} /Users/xsli/lib/parmetis-4.0.3/build/Darwin-x86_64/libparmetis/libparmetis.a /Users/xsli/lib/parmetis-4.0.3/build/Darwin-x86_64/libmetis/libmetis.a $(LAPACKLIB) +HAVE_PARMETIS = TRUE +PARMETIS_ROOT=/Users/xsli/Dropbox/xsli-lib/static/parmetis-4.0.3 + +LIBS = $(DSUPERLULIB) ${BLASLIB} ${PARMETIS_ROOT}/build/Darwin-x86_64/libparmetis/libparmetis.a ${PARMETIS_ROOT}/build/Darwin-x86_64/libmetis/libmetis.a $(LAPACKLIB) # # The archiver and the flag(s) to use when building archive (library) @@ -33,8 +36,8 @@ ARCH = /usr/bin/ar ARCHFLAGS = cr RANLIB = /usr/bin/ranlib -CC = /Users/xsli/lib/mpich2-install/bin/mpicc -CFLAGS = -O3 -DNDEBUG -I/Users/xsli/lib/parmetis-4.0.3/metis/include -I/Users/xsli/lib/parmetis-4.0.3/include -DDEBUGlevel=0 -DPRNTlevel=0 -std=c99 -g +CC = mpicc ##/Users/xsli/lib/mpich2-install/bin/mpicc +CFLAGS = -O3 -DNDEBUG -I${INCLUDEDIR} -I${PARMETIS_ROOT}/metis/include -I${PARMETIS_ROOT}/include -DDEBUGlevel=0 -DPRNTlevel=0 -std=c99 -g #CFLAGS += -openmp #XSDK_INDEX_SIZE = 64 ## 64-bit integer # CFLAGS += diff --git a/SRC/Makefile b/SRC/Makefile index eff0b558..f9192099 100644 --- a/SRC/Makefile +++ b/SRC/Makefile @@ -36,11 +36,11 @@ include ../make.inc # Precision independent routines # ALLAUX = sp_ienv.o etree.o sp_colorder.o get_perm_c.o \ - colamd.o mmd.o comm.o memory.o util.o superlu_grid.o \ - pxerr_dist.o superlu_timer.o symbfact.o \ - psymbfact.o psymbfact_util.o get_perm_c_parmetis.o mc64ad_dist.o \ - xerr_dist.o smach_dist.o dmach_dist.o \ - superlu_dist_version.o TreeInterface.o + colamd.o mmd.o comm.o memory.o util.o gpu_api_utils.o superlu_grid.o \ + pxerr_dist.o superlu_timer.o symbfact.o psymbfact.o psymbfact_util.o \ + get_perm_c_parmetis.o mc64ad_dist.o xerr_dist.o smach_dist.o dmach_dist.o \ + superlu_dist_version.o comm_tree.o + # Following are from 3D code ALLAUX += superlu_grid3d.o supernodal_etree.o supernodalForest.o \ trfAux.o communication_aux.o treeFactorization.o sec_structs.o @@ -52,60 +52,61 @@ SSLUSRC = slangs_dist.o sgsequ_dist.o slaqgs_dist.o sutil_dist.o \ DSLUSRC = dlangs_dist.o dgsequ_dist.o dlaqgs_dist.o dutil_dist.o \ dmemory_dist.o dmyblas2_dist.o dsp_blas2_dist.o dsp_blas3_dist.o ZSLUSRC = dcomplex_dist.o zlangs_dist.o zgsequ_dist.o zlaqgs_dist.o \ - zutil_dist.o zmemory_dist.o zmyblas2_dist.o \ - zsp_blas2_dist.o zsp_blas3_dist.o - + zutil_dist.o zmemory_dist.o zmyblas2_dist.o zsp_blas2_dist.o zsp_blas3_dist.o # # Routines for single precision parallel SuperLU -SPLUSRC = psgssvx.o psgssvx_ABglobal.o \ - sreadhb.o sreadrb.o sreadtriple.o sreadMM.o sbinary_io.o \ +SPLUSRC = psgssvx.o psgssvx_d2.o psgssvx_ABglobal.o \ + sreadhb.o sreadrb.o sreadtriple.o sreadtriple_noheader.o sreadMM.o sbinary_io.o \ psgsequ.o pslaqgs.o sldperm_dist.o pslangs.o psutil.o \ pssymbfact_distdata.o sdistribute.o psdistribute.o \ psgstrf.o sstatic_schedule.o psgstrf2.o psGetDiagU.o \ psgstrs.o psgstrs1.o psgstrs_lsum.o psgstrs_Bglobal.o \ - psgsrfs.o psgsmv.o psgsrfs_ABXglobal.o psgsmv_AXglobal.o \ - sreadtriple_noheader.o \ - psgssvx_d2.o psgsrfs_d2.o psgsmv_d2.o psgsequb.o + psgsrfs.o psgsmv.o psgsrfs_ABXglobal.o psgsmv_AXglobal.o ssuperlu_blas.o \ + psgsrfs_d2.o psgsmv_d2.o psgsequb.o +# from 3D code +SPLUSRC += psgssvx3d.o snrformat_loc3d.o psgstrf3d.o streeFactorization.o \ + streeFactorizationGPU.o sscatter3d.o sgather.o ps3dcomm.o strfAux.o \ + scommunication_aux.o strfCommWrapper.o # # Routines for double precision parallel SuperLU DPLUSRC = pdgssvx.o pdgssvx_ABglobal.o \ - dreadhb.o dreadrb.o dreadtriple.o dreadMM.o dbinary_io.o \ + dreadhb.o dreadrb.o dreadtriple.o dreadtriple_noheader.o dreadMM.o dbinary_io.o \ pdgsequ.o pdlaqgs.o dldperm_dist.o pdlangs.o pdutil.o \ pdsymbfact_distdata.o ddistribute.o pddistribute.o \ pdgstrf.o dstatic_schedule.o pdgstrf2.o pdGetDiagU.o \ pdgstrs.o pdgstrs1.o pdgstrs_lsum.o pdgstrs_Bglobal.o \ - pdgsrfs.o pdgsmv.o pdgsrfs_ABXglobal.o pdgsmv_AXglobal.o \ - dreadtriple_noheader.o dsuperlu_blas.o + pdgsrfs.o pdgsmv.o pdgsrfs_ABXglobal.o pdgsmv_AXglobal.o dsuperlu_blas.o # from 3D code -DPLUSRC += pdgssvx3d.o pdgstrf3d.o dtreeFactorization.o dscatter3d.o \ - dgather.o pd3dcomm.o dtrfAux.o dcommunication_aux.o dtrfCommWrapper.o \ - dnrformat_loc3d.o dtreeFactorizationGPU.o ##$(FACT3D) +DPLUSRC += pdgssvx3d.o dnrformat_loc3d.o pdgstrf3d.o dtreeFactorization.o \ + dtreeFactorizationGPU.o dscatter3d.o dgather.o pd3dcomm.o dtrfAux.o \ + dcommunication_aux.o dtrfCommWrapper.o # # Routines for double complex parallel SuperLU ZPLUSRC = pzgssvx.o pzgssvx_ABglobal.o \ - zreadhb.o zreadrb.o zreadtriple.o zreadMM.o zbinary_io.o\ + zreadhb.o zreadrb.o zreadtriple.o zreadMM.o zreadtriple_noheader.o zbinary_io.o\ pzgsequ.o pzlaqgs.o zldperm_dist.o pzlangs.o pzutil.o \ pzsymbfact_distdata.o zdistribute.o pzdistribute.o \ pzgstrf.o zstatic_schedule.o pzgstrf2.o pzGetDiagU.o \ pzgstrs.o pzgstrs1.o pzgstrs_lsum.o pzgstrs_Bglobal.o \ - pzgsrfs.o pzgsmv.o pzgsrfs_ABXglobal.o pzgsmv_AXglobal.o \ - zreadtriple_noheader.o zsuperlu_blas.o + pzgsrfs.o pzgsmv.o pzgsrfs_ABXglobal.o pzgsmv_AXglobal.o zsuperlu_blas.o # from 3D code -ZPLUSRC += pzgssvx3d.o pzgstrf3d.o ztreeFactorization.o zscatter3d.o \ - zgather.o pz3dcomm.o ztrfAux.o zcommunication_aux.o ztrfCommWrapper.o \ - znrformat_loc3d.o ztreeFactorizationGPU.o ##$(FACT3D) +ZPLUSRC += pzgssvx3d.o znrformat_loc3d.o pzgstrf3d.o ztreeFactorization.o \ + ztreeFactorizationGPU.o zscatter3d.o zgather.o pz3dcomm.o ztrfAux.o \ + zcommunication_aux.o ztrfCommWrapper.o ifeq ($(HAVE_CUDA),TRUE) -ALLAUX += cublas_utils.o superlu_gpu_utils.o pdgstrs_lsum_cuda.o -DPLUSRC += dsuperlu_gpu.o +ALLAUX += superlu_gpu_utils.o +SPLUSRC += ssuperlu_gpu.o +DPLUSRC += dsuperlu_gpu.o pdgstrs_lsum_cuda.o ZPLUSRC += zsuperlu_gpu.o endif ifeq ($(HAVE_COMBBLAS),TRUE) -DPLUSRC += d_c2cpp_GetHWPM.o -ZPLUSRC += z_c2cpp_GetHWPM.o +SPLUSRC += s_c2cpp_GetHWPM.o sHWPM_CombBLAS.hpp +DPLUSRC += d_c2cpp_GetHWPM.o dHWPM_CombBLAS.hpp +ZPLUSRC += z_c2cpp_GetHWPM.o zHWPM_CombBLAS.hpp endif all: single double complex16 diff --git a/SRC/superlu_dist_config.h b/SRC/superlu_dist_config.h index a477e6e1..ec3d9f9a 100644 --- a/SRC/superlu_dist_config.h +++ b/SRC/superlu_dist_config.h @@ -1,23 +1,7 @@ -/* superlu_dist_config.h.in */ - -/* Enable CUDA */ -/* #undef HAVE_CUDA */ - -/* Enable HIP */ -/* #undef HAVE_HIP */ - -/* Enable parmetis */ +/* #define XSDK_INDEX_SIZE 64 */ +/* #define SLU_HAVE_LAPACK TRUE */ #define HAVE_PARMETIS TRUE - -/* Enable LAPACK */ -/* #undef SLU_HAVE_LAPACK */ - -/* Enable CombBLAS */ -/* #undef HAVE_COMBBLAS */ - -/* enable 64bit index mode */ -/* #undef XSDK_INDEX_SIZE */ - +/* #define HAVE_COMBBLAS TRUE */ #if (XSDK_INDEX_SIZE == 64) #define _LONGINT 1 #endif