From f6023bda87d443b9b351f70314fda77cc7c9436d Mon Sep 17 00:00:00 2001 From: xiaoye Date: Sat, 23 Nov 2024 20:44:26 -0800 Subject: [PATCH] Implemnt user-defined supernodes in ilu_level_symbfact(). --- SRC/complex16/zutil_dist.c | 24 +++++ SRC/double/dutil_dist.c | 24 +++++ SRC/include/superlu_ddefs.h | 5 +- SRC/include/superlu_sdefs.h | 1 + SRC/include/superlu_zdefs.h | 1 + SRC/prec-independent/ilu_level_symbfact.c | 123 +++++++++++++++------- SRC/prec-independent/trfAux.c | 12 ++- SRC/single/sutil_dist.c | 24 +++++ make.inc.in | 3 + 9 files changed, 172 insertions(+), 45 deletions(-) diff --git a/SRC/complex16/zutil_dist.c b/SRC/complex16/zutil_dist.c index 0b21c44f..8fafdba7 100755 --- a/SRC/complex16/zutil_dist.c +++ b/SRC/complex16/zutil_dist.c @@ -191,6 +191,30 @@ void zPrint_CompCol_Matrix_dist(SuperMatrix *A) printf("\nend CompCol matrix.\n"); } +void zPrint_CompCol_triplet(SuperMatrix *A) +{ + NCformat *Astore = (NCformat *) A->Store; + register int i, j; + doublecomplex *dp; + int_t *colptr, *rowind; + + int n = A->nrow; + int_t nnz = Astore->nnz; + + printf("\nTriplet matrix:\n"); + printf("nrow %lld, ncol %lld, nnz %lld\n", n, (long long) A->ncol, nnz); + dp = (doublecomplex *) Astore->nzval; + colptr = Astore->colptr; + rowind = Astore->rowind; + + for (i = 0; i < A->ncol; ++i) { + for (j = colptr[i]; j < colptr[i+1]; ++j) { + printf("%8d %8d\t%f\t%f\n", rowind[j], i, dp[j].r, dp[j].i); + } + } + printf("\nend triplet matrix\n "); +} + void zPrint_Dense_Matrix_dist(SuperMatrix *A) { DNformat *Astore; diff --git a/SRC/double/dutil_dist.c b/SRC/double/dutil_dist.c index 6462950e..d251547b 100755 --- a/SRC/double/dutil_dist.c +++ b/SRC/double/dutil_dist.c @@ -192,6 +192,30 @@ void dPrint_CompCol_Matrix_dist(SuperMatrix *A) printf("\nend CompCol matrix.\n"); } +void dPrint_CompCol_triplet(SuperMatrix *A) +{ + NCformat *Astore = (NCformat *) A->Store; + register int i, j; + double *dp; + int_t *colptr, *rowind; + + int n = A->nrow; + int_t nnz = Astore->nnz; + + printf("\nTriplet matrix:\n"); + printf("nrow %lld, ncol %lld, nnz %lld\n", n, (long long) A->ncol, nnz); + dp = (double *) Astore->nzval; + colptr = Astore->colptr; + rowind = Astore->rowind; + + for (i = 0; i < A->ncol; ++i) { + for (j = colptr[i]; j < colptr[i+1]; ++j) { + printf("%8d %8d\t%f\n", rowind[j], i, dp[j]); + } + } + printf("\nend triplet matrix\n "); +} + void dPrint_Dense_Matrix_dist(SuperMatrix *A) { DNformat *Astore; diff --git a/SRC/include/superlu_ddefs.h b/SRC/include/superlu_ddefs.h index ea5dc345..69176564 100755 --- a/SRC/include/superlu_ddefs.h +++ b/SRC/include/superlu_ddefs.h @@ -484,7 +484,6 @@ typedef struct dlsumBmod_buff_t int_t *indCols; // }dlsumBmod_buff_t; - /*=====================*/ /*********************************************************************** @@ -539,8 +538,6 @@ extern int dcreate_matrix_dat(SuperMatrix *, int, double **, int *, double **, int *, FILE *, gridinfo_t *); extern int dcreate_matrix_postfix(SuperMatrix *, int, double **, int *, double **, int *, FILE *, char *, gridinfo_t *); -extern int dcreate_matrix_from_csc(SuperMatrix *A, - int_t m, int_t n, int_t nnz, int_t *rowind, int_t *colptr, double *nzval, gridinfo_t *grid); extern void dScalePermstructInit(const int_t, const int_t, dScalePermstruct_t *); @@ -1011,6 +1008,7 @@ extern void dPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, extern void dPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *, dLocalLU_t *); extern void dPrint_CompCol_Matrix_dist(SuperMatrix *); +extern void dPrint_CompCol_triplet(SuperMatrix *); extern void dPrint_Dense_Matrix_dist(SuperMatrix *); extern int dPrint_CompRowLoc_Matrix_dist(SuperMatrix *); extern int file_dPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A); @@ -1625,7 +1623,6 @@ extern void dDumpUblocks3D(int_t nsupers, gridinfo3d_t *grid3d, extern double *dready_x; extern double *dready_lsum; - #ifdef __cplusplus } #endif diff --git a/SRC/include/superlu_sdefs.h b/SRC/include/superlu_sdefs.h index 1af66988..d033b3d9 100755 --- a/SRC/include/superlu_sdefs.h +++ b/SRC/include/superlu_sdefs.h @@ -1008,6 +1008,7 @@ extern void sPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, extern void sPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *, sLocalLU_t *); extern void sPrint_CompCol_Matrix_dist(SuperMatrix *); +extern void sPrint_CompCol_triplet(SuperMatrix *); extern void sPrint_Dense_Matrix_dist(SuperMatrix *); extern int sPrint_CompRowLoc_Matrix_dist(SuperMatrix *); extern int file_sPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A); diff --git a/SRC/include/superlu_zdefs.h b/SRC/include/superlu_zdefs.h index 13c6539b..4d9da71d 100755 --- a/SRC/include/superlu_zdefs.h +++ b/SRC/include/superlu_zdefs.h @@ -1010,6 +1010,7 @@ extern void zPrintLblocks(int, int_t, gridinfo_t *, Glu_persist_t *, extern void zPrintUblocks(int, int_t, gridinfo_t *, Glu_persist_t *, zLocalLU_t *); extern void zPrint_CompCol_Matrix_dist(SuperMatrix *); +extern void zPrint_CompCol_triplet(SuperMatrix *); extern void zPrint_Dense_Matrix_dist(SuperMatrix *); extern int zPrint_CompRowLoc_Matrix_dist(SuperMatrix *); extern int file_zPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A); diff --git a/SRC/prec-independent/ilu_level_symbfact.c b/SRC/prec-independent/ilu_level_symbfact.c index 8e9a6344..b377ca70 100644 --- a/SRC/prec-independent/ilu_level_symbfact.c +++ b/SRC/prec-independent/ilu_level_symbfact.c @@ -31,7 +31,7 @@ at the top-level directory. * @param perm_c The column permutation vector. * @param Glu_persist Pointer to the structure which tracks the symbolic factorization information. * @param Glu_freeable Pointer to the structure which tracks the space used to store L/U data structures. - * @param stat Information on program execution. +n * @param stat Information on program execution. */ /* @@ -51,22 +51,27 @@ int_t ilu_level_symbfact Glu_freeable_t *Glu_freeable /* output */ ) { - if ( options->ILU_level != 0 ) { printf("ERROR: ILU(k>0) is not implemented yet\n"); return (0); } - + int iam = 0; + +#if (DEBUGlevel >= 1) + CHECK_MALLOC(iam, "Enter ilu_level_symbfact()"); +#endif + /* Now, do ILU(0) */ int_t iinfo; - int i, n = A->ncol, m=A->nrow; + int n = A->ncol, m = A->nrow; + int nsuper, i, j, fsupc; double t; - int min_mn = SUPERLU_MIN(m, n); + int min_mn = SUPERLU_MIN(m, n), irow; NCPformat *GACstore = A->Store; - int_t *colbeg, *colend, *rowind, irow; + int_t *colbeg, *colend, *rowind; rowind = GACstore->rowind; colbeg = GACstore->colbeg; colend = GACstore->colend; @@ -88,57 +93,95 @@ int_t ilu_level_symbfact /* User alreay allocated and defined the above supernode partition */ } - /* Count nonzeros per column for L & U */ - for (int j = 0; j < n; ++j) { -#if 0 - fscanf(fpU, "%d %d", &col_num, &Glu_freeable.usub[i]); - Glu_freeable.xusub[col_num] += 1; -#endif - for (i = colbeg[j]; i < colend[j]; ++i) { // (j,j) is diagonal - irow = rowind[i]; - if ( irow < j ) { // in U - nnzU++; - Glu_freeable->xusub[j+1] += 1; - } else { // in L, including diagonal of U - nnzL++; - Glu_freeable->xlsub[j+1] += 1; + /* Sherry: fix to accommodate supernodes (11/23/24) + Need to add an outer loop for supernodes + */ + int_t *xusub = Glu_freeable->xusub; + int_t *xlsub = Glu_freeable->xlsub; + int_t *xsup = Glu_persist->xsup; + int_t *supno = Glu_persist->supno; + int_t k, nextl = 0; + nsuper = (Glu_persist->supno)[n-1]; + + /* Count nonzeros per column for L & U; + Diagonal block of a supernode is stored in L + */ + for (i = 0; i <= nsuper; ++i) { /* loop through each supernode */ + fsupc = xsup[i]; + xlsub[fsupc] = nextl; + for (j = fsupc; j < xsup[i+1]; ++j) { // loop through columns in supernode i + for (k = colbeg[j]; k < colend[j]; ++k) { + irow = rowind[k]; + if ( irow < fsupc ) { // in U + nnzU++; + xusub[j+1] += 1; + } } } + + /* only check first column of supernode in L; + similar to fixupL_dist() + */ + for (k = colbeg[fsupc]; k < colend[fsupc]; ++k) { + irow = rowind[k]; + if ( irow >= fsupc ) ++nextl; // in L, including diagonal block + } + for (j = fsupc+1; j < xsup[i+1]; ++j) // loop through columns in supernode i + xlsub[j] = nextl; + nnzL += (nextl - xlsub[fsupc]) * (xsup[i+1]-fsupc); } + xlsub[n] = nextl; +#if ( PRNTlevel>=1 ) + printf(".... nnzL %d, nnzU %d, nextl %d, nsuper %d\n", nnzL, nnzU, nextl, nsuper); + fflush(stdout); +#endif + + /* Do prefix sum to set up column pointers */ + for(i = 1; i <= n; i++) { + //Glu_freeable->xlsub[i] += Glu_freeable->xlsub[i-1]; + Glu_freeable->xusub[i] += Glu_freeable->xusub[i-1]; + } + Glu_freeable->nnzLU = nnzU + nnzL; - Glu_freeable->lsub = (int_t *) SUPERLU_MALLOC(nnzL * sizeof(int_t)); - Glu_freeable->usub = (int_t *) SUPERLU_MALLOC(nnzU * sizeof(int_t)); Glu_freeable->nzlmax = nnzL; Glu_freeable->nzumax = nnzU; Glu_freeable->nnzLU = nnzL + nnzU - min_mn; - - /* YL: Assign lsub & usub */ - nnzL=0; - nnzU=0; - for (int j = 0; j < n; ++j) { - for (i = colbeg[j]; i < colend[j]; ++i) { // (j,j) is diagonal + Glu_freeable->lsub = (int_t *) intMalloc_dist(nnzL); + Glu_freeable->usub = (int_t *) intMalloc_dist(nnzU); + + /* YL: Assign usub & lsub */ + nnzU = 0; + for (j = 0; j < n; ++j) { // loop through each column + fsupc = xsup[supno[j]]; + for (i = colbeg[j]; i < colend[j]; ++i) { irow = rowind[i]; //if(j==0){ //printf("irow %5d \n",irow); //} - if ( irow < j ) { // in U - Glu_freeable->usub[nnzU] = irow; - nnzU++; - } else { // in L, including diagonal of U - // printf("%5d %5d\n",j,irow); - Glu_freeable->lsub[nnzL] = irow; - nnzL++; + if ( irow < fsupc ) { // in U + Glu_freeable->usub[nnzU++] = irow; } } } - - /* Do prefix sum to set up column pointers */ - for(i = 1; i <= n; i++) { - Glu_freeable->xlsub[i] += Glu_freeable->xlsub[i-1]; - Glu_freeable->xusub[i] += Glu_freeable->xusub[i-1]; + + /* only check first column of supernode in L; + similar to fixupL_dist() + */ + nnzL = 0; + for (i = 0; i <= nsuper; ++i) { // loop through each supernode + fsupc = xsup[i]; + for (k = colbeg[fsupc]; k < colend[fsupc]; ++k) { + irow = rowind[k]; + if ( irow >= fsupc ) { // in L, including diagonal block + Glu_freeable->lsub[nnzL++] = irow; + } + } } +#if (DEBUGlevel >= 1) + CHECK_MALLOC(iam, "Exit ilu_level_symbfact()"); +#endif return ( -(Glu_freeable->xlsub[n] + Glu_freeable->xusub[n]) ); } diff --git a/SRC/prec-independent/trfAux.c b/SRC/prec-independent/trfAux.c index 2529b1ba..69eabbbd 100755 --- a/SRC/prec-independent/trfAux.c +++ b/SRC/prec-independent/trfAux.c @@ -237,7 +237,7 @@ int_t* getPerm_c_supno(int_t nsupers, superlu_dist_options_t *options, { /*I do not understand the following code in detail, - I have just written a wrapper around it*/ + I have just written a wrapper around it (Piyush?) */ int_t* perm_c_supno; //Glu_persist_t *Glu_persist = LUstruct->Glu_persist; @@ -265,6 +265,12 @@ int_t* getPerm_c_supno(int_t nsupers, superlu_dist_options_t *options, nblocks = 0; ncb = nsupers / Pc; nrb = nsupers / Pr; + + +#if (DEBUGlevel >= 1) + CHECK_MALLOC(iam, "Enter getPerm_c_supno()"); +#endif + /* ================================================== * * static scheduling of j-th step of LU-factorization * * ================================================== */ @@ -1164,7 +1170,11 @@ int_t* getPerm_c_supno(int_t nsupers, superlu_dist_options_t *options, * end of static scheduling * * ======================== */ +#if (DEBUGlevel >= 1) + CHECK_MALLOC(iam, "Exit getPerm_c_supno()"); +#endif return perm_c_supno; + } /* getPerm_c_supno */ diff --git a/SRC/single/sutil_dist.c b/SRC/single/sutil_dist.c index 0cde783f..b4bddcbf 100755 --- a/SRC/single/sutil_dist.c +++ b/SRC/single/sutil_dist.c @@ -192,6 +192,30 @@ void sPrint_CompCol_Matrix_dist(SuperMatrix *A) printf("\nend CompCol matrix.\n"); } +void sPrint_CompCol_triplet(SuperMatrix *A) +{ + NCformat *Astore = (NCformat *) A->Store; + register int i, j; + float *dp; + int_t *colptr, *rowind; + + int n = A->nrow; + int_t nnz = Astore->nnz; + + printf("\nTriplet matrix:\n"); + printf("nrow %lld, ncol %lld, nnz %lld\n", n, (long long) A->ncol, nnz); + dp = (float *) Astore->nzval; + colptr = Astore->colptr; + rowind = Astore->rowind; + + for (i = 0; i < A->ncol; ++i) { + for (j = colptr[i]; j < colptr[i+1]; ++j) { + printf("%8d %8d\t%f\n", rowind[j], i, dp[j]); + } + } + printf("\nend triplet matrix\n "); +} + void sPrint_Dense_Matrix_dist(SuperMatrix *A) { DNformat *Astore; diff --git a/make.inc.in b/make.inc.in index 4c9b272d..be2e48ed 100644 --- a/make.inc.in +++ b/make.inc.in @@ -47,6 +47,9 @@ LIBS += ${COMBBLAS_LIB_EXPORT} LIBS += ${EXTRA_LIB_EXPORT} #LIBS += ${CUDA_LIB_EXPORT} +# Perlmutter +# CUDALIBS = /opt/nvidia/hpc_sdk/Linux_x86_64/23.9/cuda/12.2/lib64/libcudart.so /opt/nvidia/hpc_sdk/Linux_x86_64/23.9/math_libs/12.2/lib64/libcusolver.so /opt/nvidia/hpc_sdk/Linux_x86_64/23.9/math_libs/12.2/lib64/libcusparse.so + CUDALIBS = ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUSOLVER_LIBRARIES} ${CUDA_CUSPARSE_LIBRARIES} LIBS += $(CUDALIBS)