From c84ddbef025fecd2cde74db5fd44d3501af949c6 Mon Sep 17 00:00:00 2001 From: Sherry Li Date: Sat, 21 May 2022 09:04:16 -0700 Subject: [PATCH] Restore psdrive.c with double-precision IR; Declare psgsmv_comm_t{}::val_tosend as *void, accommodate double-prec IR; Clean up compiler warning in CBLAS/ files. --- CBLAS/dgemm.c | 2 +- CBLAS/dgemv.c | 2 +- CBLAS/dsymv.c | 2 +- CBLAS/sgemm.c | 2 +- CBLAS/sgemv.c | 2 +- CBLAS/ssymv.c | 2 +- CBLAS/zgemm.c | 2 +- CBLAS/zher2.c | 2 +- EXAMPLE/psdrive.c | 225 +++++++++++++++++++++++++++++++++++++------- SRC/dreadtriple.c | 5 +- SRC/get_perm_c.c | 1 - SRC/psgsmv_d2.c | 14 --- SRC/sreadtriple.c | 5 +- SRC/superlu_defs.h | 1 + SRC/superlu_sdefs.h | 4 +- SRC/zreadtriple.c | 5 +- 16 files changed, 212 insertions(+), 64 deletions(-) mode change 100755 => 100644 EXAMPLE/psdrive.c diff --git a/CBLAS/dgemm.c b/CBLAS/dgemm.c index 59895409..122ccd60 100644 --- a/CBLAS/dgemm.c +++ b/CBLAS/dgemm.c @@ -234,7 +234,7 @@ /* Quick return if possible. */ - if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + if (*m == 0 || *n == 0 || ((*alpha == 0. || *k == 0) && *beta == 1.)) { return 0; } diff --git a/CBLAS/dgemv.c b/CBLAS/dgemv.c index a418c072..79346196 100644 --- a/CBLAS/dgemv.c +++ b/CBLAS/dgemv.c @@ -150,7 +150,7 @@ /* Quick return if possible. */ - if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + if (*m == 0 || *n == 0 || (*alpha == 0. && *beta == 1.)) { return 0; } diff --git a/CBLAS/dsymv.c b/CBLAS/dsymv.c index da8df4b1..5cfd7fed 100644 --- a/CBLAS/dsymv.c +++ b/CBLAS/dsymv.c @@ -145,7 +145,7 @@ /* Quick return if possible. */ - if (*n == 0 || *alpha == 0. && *beta == 1.) { + if (*n == 0 || (*alpha == 0. && *beta == 1.)) { return 0; } diff --git a/CBLAS/sgemm.c b/CBLAS/sgemm.c index b14683d5..6c7d7020 100644 --- a/CBLAS/sgemm.c +++ b/CBLAS/sgemm.c @@ -225,7 +225,7 @@ /* Quick return if possible. */ - if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + if (*m == 0 || *n == 0 || ((*alpha == 0. || *k == 0) && *beta == 1.)) { return 0; } diff --git a/CBLAS/sgemv.c b/CBLAS/sgemv.c index 27238a6c..d7f9824c 100644 --- a/CBLAS/sgemv.c +++ b/CBLAS/sgemv.c @@ -150,7 +150,7 @@ /* Quick return if possible. */ - if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { + if (*m == 0 || *n == 0 || (*alpha == 0.f && *beta == 1.f)) { return 0; } diff --git a/CBLAS/ssymv.c b/CBLAS/ssymv.c index f58b37e0..6670c02b 100644 --- a/CBLAS/ssymv.c +++ b/CBLAS/ssymv.c @@ -145,7 +145,7 @@ /* Quick return if possible. */ - if (*n == 0 || *alpha == 0.f && *beta == 1.f) { + if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { return 0; } diff --git a/CBLAS/zgemm.c b/CBLAS/zgemm.c index 33db68c9..75b4852d 100644 --- a/CBLAS/zgemm.c +++ b/CBLAS/zgemm.c @@ -238,7 +238,7 @@ /* Quick return if possible. */ - if (*m == 0 || *n == 0 || ( (alpha->r == 0. && alpha->i == 0. || *k == 0) && + if (*m == 0 || *n == 0 || ( ((alpha->r == 0. && alpha->i == 0.) || *k == 0) && (beta->r == 1. && beta->i == 0.) )) { return 0; } diff --git a/CBLAS/zher2.c b/CBLAS/zher2.c index 30621ede..bd1e0219 100644 --- a/CBLAS/zher2.c +++ b/CBLAS/zher2.c @@ -153,7 +153,7 @@ /* Quick return if possible. */ - if (*n == 0 || alpha->r == 0. && alpha->i == 0.) { + if (*n == 0 || (alpha->r == 0. && alpha->i == 0.)) { return 0; } diff --git a/EXAMPLE/psdrive.c b/EXAMPLE/psdrive.c old mode 100755 new mode 100644 index c98d6847..7a6de166 --- a/EXAMPLE/psdrive.c +++ b/EXAMPLE/psdrive.c @@ -23,6 +23,7 @@ at the top-level directory. #include #include "superlu_sdefs.h" +#include "superlu_ddefs.h" /*! \brief * @@ -56,16 +57,35 @@ int main(int argc, char *argv[]) sLUstruct_t LUstruct; sSOLVEstruct_t SOLVEstruct; gridinfo_t grid; - float *berr; + float *err_bounds, *berr; float *b, *xtrue; int m, n; - int nprow, npcol, lookahead, colperm, rowperm, ir; + int nprow, npcol, lookahead, colperm, rowperm, ir, use_tensorcore, equil; int iam, info, ldb, ldx, nrhs; char **cpp, c, *postfix;; FILE *fp, *fopen(); int cpp_defs(); - int ii, omp_mpi_level; - + double *dxtrue, *db, *dberr; + + int ii, i, omp_mpi_level; + + extern int screate_A_x_b(SuperMatrix *A, int nrhs, float **rhs, + int *ldb, float **x, int *ldx, + FILE *fp, char * postfix, gridinfo_t *grid); + + extern void psgssvx_d2(superlu_dist_options_t *options, SuperMatrix *A, + sScalePermstruct_t *ScalePermstruct, + float B[], int ldb, int nrhs, gridinfo_t *grid, + sLUstruct_t *LUstruct, sSOLVEstruct_t *SOLVEstruct, + float *err_bounds, SuperLUStat_t *stat, int *info, + double *dxtrue); + + extern void psgssvx_tracking(superlu_dist_options_t *options, SuperMatrix *A, + sScalePermstruct_t *ScalePermstruct, + float B[], int ldb, int nrhs, gridinfo_t *grid, + sLUstruct_t *LUstruct, sSOLVEstruct_t *SOLVEstruct, float *berr, + SuperLUStat_t *stat, int *info, double *xtrue); + nprow = 1; /* Default process rows. */ npcol = 1; /* Default process columns. */ nrhs = 1; /* Number of right-hand side. */ @@ -73,6 +93,8 @@ int main(int argc, char *argv[]) colperm = -1; rowperm = -1; ir = -1; + equil = -1; + use_tensorcore = -1; /* ------------------------------------------------------------ INITIALIZE MPI ENVIRONMENT. @@ -105,14 +127,18 @@ int main(int argc, char *argv[]) break; case 'c': npcol = atoi(*cpp); break; - case 'l': lookahead = atoi(*cpp); - break; - case 'p': rowperm = atoi(*cpp); - break; - case 'q': colperm = atoi(*cpp); - break; - case 'i': ir = atoi(*cpp); - break; + case 'l': lookahead = atoi(*cpp); + break; + case 'p': rowperm = atoi(*cpp); + break; + case 'q': colperm = atoi(*cpp); + break; + case 'i': ir = atoi(*cpp); + break; + case 'e': equil = atoi(*cpp); + break; + case 't': use_tensorcore = atoi(*cpp); + break; } } else { /* Last arg is considered a filename */ if ( !(fp = fopen(*cpp, "r")) ) { @@ -180,20 +206,8 @@ int main(int argc, char *argv[]) postfix = &((*cpp)[ii+1]); } } - // printf("%s\n", postfix); + //printf("%s\n", postfix); - /* ------------------------------------------------------------ - GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. - ------------------------------------------------------------*/ - screate_matrix_postfix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, postfix, &grid); - - if ( !(berr = floatMalloc_dist(nrhs)) ) - ABORT("Malloc fails for berr[]."); - - /* ------------------------------------------------------------ - NOW WE SOLVE THE LINEAR SYSTEM. - ------------------------------------------------------------*/ - /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; @@ -211,10 +225,9 @@ int main(int argc, char *argv[]) set_default_options_dist(&options); options.IterRefine = SLU_SINGLE; #if 0 - options.RowPerm = LargeDiag_HWPM; - options.IterRefine = NOREFINE; + options.ReplaceTinyPivot = YES; + options.RowPerm = NOROWPERM; options.ColPerm = NATURAL; - options.Equil = NO; options.ReplaceTinyPivot = YES; #endif @@ -222,15 +235,142 @@ int main(int argc, char *argv[]) if (colperm != -1) options.ColPerm = colperm; if (lookahead != -1) options.num_lookaheads = lookahead; if (ir != -1) options.IterRefine = ir; + if (equil != -1) options.Equil = equil; + if (use_tensorcore != -1) options.Use_TensorCore = use_tensorcore; if (!iam) { + print_sp_ienv_dist(&options); print_options_dist(&options); fflush(stdout); } + + /* ------------------------------------------------------------ + GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE + ------------------------------------------------------------*/ + //screate_matrix_postfix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, postfix, &grid); + + /* Generate a good RHS in double precision, then rounded to single. + See LAWN 165: bullet 7, page 20. */ + /* The returned A, b and xtrue are in single precision + b <- A * xtrue in double internally, then rounded to single */ + screate_A_x_b(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, postfix, &grid); + fclose(fp); m = A.nrow; n = A.ncol; +#if ( PRNTlevel>=1 ) + if (iam==0) { + printf("\n(%d) generated single xtrue:\n", iam); + for (i = 0; i < 5; ++i) printf("%.16e\t", xtrue[i]); + printf("\n"); fflush(stdout); + } + + /* Compute the ground truth dXtrue in double precision */ + if ( options.IterRefine >= SLU_DOUBLE ) + { + superlu_dist_options_t options_d; + SuperMatrix dA; + dScalePermstruct_t dScalePermstruct; + dLUstruct_t dLUstruct; + dSOLVEstruct_t dSOLVEstruct; + //extern int dcreate_matrix_postfix(); + +#if 0 // Shouldn't use double precision A + fp = fopen(*cpp, "r"); + dcreate_matrix_postfix(&dA, nrhs, &db, &ldb, &dxtrue, &ldx, fp, postfix, &grid); + fclose(fp); +#else + /* Copy single-prec A into double-prec dA storage */ + NRformat_loc *Astore = A.Store; // Single-prec A + int m_loc = Astore->m_loc; + int nnz_loc = Astore->nnz_loc; + float *nzval = (float*) Astore->nzval; + int_t *rowptr = Astore->rowptr; + int_t *colind = Astore->colind; + + double *nzval_loc_dble = (double *) doubleMalloc_dist(nnz_loc); + int_t *colind_dble = (int_t *) intMalloc_dist(nnz_loc); + int_t *rowptr_dble = (int_t *) intMalloc_dist(m_loc + 1); + + for (i = 0; i < nnz_loc; ++i) { + nzval_loc_dble[i] = nzval[i]; + colind_dble[i] = colind[i]; + } + for (i = 0; i < m_loc + 1; ++i) rowptr_dble[i] = rowptr[i]; + + dCreate_CompRowLoc_Matrix_dist(&dA, m, n, nnz_loc, m_loc, Astore->fst_row, + nzval_loc_dble, colind_dble, rowptr_dble, + SLU_NR_loc, SLU_D, SLU_GE); +#endif + /* Now, compute dxtrue via double-precision solver */ + /* Why? dA is the double precision version of A, etc. + If db = dA*dXtrue to double, then rounding db to b and dA to A introduce + a perturbation of eps_single in A and b, so a perturbation of + cond(A)*eps_single in x vs dXtrue. + So need to use a computed Xtrue from a double code: dXtrue <- dA \ db, + this computed Xtrue would be comparable to x, i.e., having the same cond(A) + factor in the perturbation error. See bullet 7, page 20, LAWN 165.*/ + if ( !(dberr = doubleMalloc_dist(nrhs)) ) + ABORT("Malloc fails for dberr[]."); + if ( !(dxtrue = doubleMalloc_dist(m * nrhs)) ) + ABORT("Malloc fails for dberr[]."); + + set_default_options_dist(&options_d); + dScalePermstructInit(m, n, &dScalePermstruct); + dLUstructInit(n, &dLUstruct); + PStatInit(&stat); + + /* Need to use correct single-prec {A,b} to solve */ + db = doubleMalloc_dist(m_loc * nrhs); + for (i = 0; i < m_loc * nrhs; ++i) { + db[i] = b[i]; + dxtrue[i] = (double) xtrue[i]; // generated truth in single + } + + pdgssvx(&options_d, &dA, &dScalePermstruct, db, ldb, nrhs, &grid, + &dLUstruct, &dSOLVEstruct, dberr, &stat, &info); + + pdinf_norm_error(iam, m_loc, nrhs, db, ldb, dxtrue, ldx, grid.comm); + + for (i = 0; i < m_loc * nrhs; ++i) dxtrue[i] = db[i]; // computed truth in double + + /* Rounded to single */ + for (i = 0; i < m_loc; ++i) { + xtrue[i] = (float) db[i]; + } + +#if ( PRNTlevel>=1 ) + if ( iam==0 ) { //(nprow*npcol-1) ) { + printf("\ndouble computed xtrue (stored in db):\n"); + for (i = 0; i < 5; ++i) printf("%.16e\t", db[i]); + printf("\n"); fflush(stdout); + } +#endif + + PStatPrint(&options_d, &stat, &grid); /* Print the statistics. */ + + PStatFree(&stat); + Destroy_CompRowLoc_Matrix_dist(&dA); + dScalePermstructFree(&dScalePermstruct); + dDestroy_LU(n, &grid, &dLUstruct); + dLUstructFree(&dLUstruct); + if ( options_d.SolveInitialized ) { + dSolveFinalize(&options_d, &dSOLVEstruct); + } + SUPERLU_FREE(db); + SUPERLU_FREE(dberr); + } /* end if IterRefine >= SLU_DOUBLE */ +#endif + + /* ------------------------------------------------------------ + NOW WE SOLVE THE LINEAR SYSTEM in single precision + ------------------------------------------------------------*/ + if ( !(err_bounds = floatCalloc_dist(nrhs*3)) ) + ABORT("Malloc fails for err_bounds[]."); + if ( !(berr = floatMalloc_dist(nrhs)) ) + ABORT("Malloc fails for berr[]."); + /* Initialize ScalePermstruct and LUstruct. */ sScalePermstructInit(m, n, &ScalePermstruct); sLUstructInit(n, &LUstruct); @@ -238,9 +378,20 @@ int main(int argc, char *argv[]) /* Initialize the statistics variables. */ PStatInit(&stat); - /* Call the linear equation solver. */ - psgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, - &LUstruct, &SOLVEstruct, berr, &stat, &info); + if ( options.IterRefine == SLU_DOUBLE || options.IterRefine == SLU_EXTRA ) { + /* Call the linear equation solver with extra-precise iterative refinement */ + psgssvx_d2(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, + &LUstruct, &SOLVEstruct, err_bounds, &stat, &info, dxtrue); + } else { + /* Call the linear equation solver */ +#if ( PRNTlevel>=2 ) + psgssvx_tracking(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, + &LUstruct, &SOLVEstruct, berr, &stat, &info, dxtrue); +#else + psgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid, + &LUstruct, &SOLVEstruct, berr, &stat, &info); +#endif + } if ( info ) { /* Something is wrong */ if ( iam==0 ) { @@ -251,8 +402,15 @@ int main(int argc, char *argv[]) /* Check the accuracy of the solution. */ psinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc, nrhs, b, ldb, xtrue, ldx, grid.comm); + if ( iam==0 && (options.IterRefine == SLU_DOUBLE || options.IterRefine == SLU_EXTRA) ) { + printf("** Forward error bounds:\n"); + printf("\tNormwise: %e\n", err_bounds[0]); + printf("\tComponentwise: %e\n", err_bounds[1*nrhs]); + printf("** Componentwise backword error: %e\n", err_bounds[2*nrhs]); + fflush(stdout); + } } - + PStatPrint(&options, &stat, &grid); /* Print the statistics. */ /* ------------------------------------------------------------ @@ -267,8 +425,9 @@ int main(int argc, char *argv[]) sSolveFinalize(&options, &SOLVEstruct); SUPERLU_FREE(b); SUPERLU_FREE(xtrue); + SUPERLU_FREE(err_bounds); SUPERLU_FREE(berr); - fclose(fp); + if ( options.IterRefine >= SLU_DOUBLE ) SUPERLU_FREE(dxtrue); /* ------------------------------------------------------------ RELEASE THE SUPERLU PROCESS GRID. diff --git a/SRC/dreadtriple.c b/SRC/dreadtriple.c index 523e2596..097988fb 100644 --- a/SRC/dreadtriple.c +++ b/SRC/dreadtriple.c @@ -82,14 +82,15 @@ dreadtriple_dist(FILE *fp, int_t *m, int_t *n, int_t *nonz, fscanf(fp, "%d%d%lf\n", &row[nz], &col[nz], &val[nz]); #endif - if ( nnz == 0 ) /* first nonzero */ + if ( nnz == 0 ) { /* first nonzero */ if ( row[0] == 0 || col[0] == 0 ) { zero_base = 1; printf("triplet file: row/col indices are zero-based.\n"); } else { printf("triplet file: row/col indices are one-based.\n"); } - + } + if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; diff --git a/SRC/get_perm_c.c b/SRC/get_perm_c.c index b2236e6f..fa8aa674 100644 --- a/SRC/get_perm_c.c +++ b/SRC/get_perm_c.c @@ -43,7 +43,6 @@ get_metis( int_t i, nm, numflag = 0; /* C-Style ordering */ int_t *perm, *iperm; int_t *b_colptr_int, *b_rowind_int; - extern int check_perm_dist(char *what, int_t n, int_t *perm); extern int METIS_NodeND(int_t*, int_t*, int_t*, int_t*, int_t*, int_t*, int_t*); diff --git a/SRC/psgsmv_d2.c b/SRC/psgsmv_d2.c index 43ad2dd2..5dde6444 100644 --- a/SRC/psgsmv_d2.c +++ b/SRC/psgsmv_d2.c @@ -389,17 +389,3 @@ psgsmv_d2( } /* PSGSMV_D2 */ -#if 0 /* This is defined elsewhere */ -void psgsmv_finalize(psgsmv_comm_t *gsmv_comm) -{ - int_t *it; - double *dt; - SUPERLU_FREE(gsmv_comm->extern_start); - if ( it = gsmv_comm->ind_tosend ) SUPERLU_FREE(it); - if ( it = gsmv_comm->ind_torecv ) SUPERLU_FREE(it); - SUPERLU_FREE(gsmv_comm->ptr_ind_tosend); - SUPERLU_FREE(gsmv_comm->SendCounts); - if ( dt = gsmv_comm->val_tosend ) SUPERLU_FREE(dt); - if ( dt = gsmv_comm->val_torecv ) SUPERLU_FREE(dt); -} -#endif diff --git a/SRC/sreadtriple.c b/SRC/sreadtriple.c index 7e10f4ac..6c554af1 100644 --- a/SRC/sreadtriple.c +++ b/SRC/sreadtriple.c @@ -82,14 +82,15 @@ sreadtriple_dist(FILE *fp, int_t *m, int_t *n, int_t *nonz, fscanf(fp, "%d%d%f\n", &row[nz], &col[nz], &val[nz]); #endif - if ( nnz == 0 ) /* first nonzero */ + if ( nnz == 0 ) { /* first nonzero */ if ( row[0] == 0 || col[0] == 0 ) { zero_base = 1; printf("triplet file: row/col indices are zero-based.\n"); } else { printf("triplet file: row/col indices are one-based.\n"); } - + } + if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz]; diff --git a/SRC/superlu_defs.h b/SRC/superlu_defs.h index 696f27f4..1f34272b 100644 --- a/SRC/superlu_defs.h +++ b/SRC/superlu_defs.h @@ -1201,6 +1201,7 @@ extern int get_acc_offload(); extern void print_panel_seg_dist(int_t, int_t, int_t, int_t, int_t *, int_t *); extern void check_repfnz_dist(int_t, int_t, int_t, int_t *); extern int_t CheckZeroDiagonal(int_t, int_t *, int_t *, int_t *); +extern int check_perm_dist(char *what, int_t n, int_t *perm); extern void PrintDouble5(char *, int_t, double *); extern void PrintInt10(char *, int_t, int_t *); extern void PrintInt32(char *, int, int *); diff --git a/SRC/superlu_sdefs.h b/SRC/superlu_sdefs.h index 4c71df93..e5d7780f 100644 --- a/SRC/superlu_sdefs.h +++ b/SRC/superlu_sdefs.h @@ -272,8 +272,8 @@ typedef struct { (also numbers of X values to be received) */ int *RecvCounts; /* Numbers of X indices to be received (also numbers of X values to be sent) */ - float *val_tosend; /* X values to be sent to other processes */ - float *val_torecv; /* X values to be received from other processes */ + void *val_tosend; /* X values to be sent to other processes */ + void *val_torecv; /* X values to be received from other processes */ int_t TotalIndSend; /* Total number of indices to be sent (also total number of values to be received) */ int_t TotalValSend; /* Total number of values to be sent. diff --git a/SRC/zreadtriple.c b/SRC/zreadtriple.c index d8e4c9a2..6f2ee105 100644 --- a/SRC/zreadtriple.c +++ b/SRC/zreadtriple.c @@ -81,14 +81,15 @@ zreadtriple_dist(FILE *fp, int_t *m, int_t *n, int_t *nonz, fscanf(fp, "%d%d%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i); #endif - if ( nnz == 0 ) /* first nonzero */ + if ( nnz == 0 ) { /* first nonzero */ if ( row[0] == 0 || col[0] == 0 ) { zero_base = 1; printf("triplet file: row/col indices are zero-based.\n"); } else { printf("triplet file: row/col indices are one-based.\n"); } - + } + if ( !zero_base ) { /* Change to 0-based indexing. */ --row[nz];