Restore psdrive.c with double-precision IR;

Declare psgsmv_comm_t{}::val_tosend as *void, accommodate double-prec IR; Clean up compiler warning in CBLAS/ files.
xiaoyeli · May 21, 2022 · c84ddbe · c84ddbe
1 parent cb6759b
commit c84ddbe
Show file tree

Hide file tree

Showing 16 changed files with 212 additions and 64 deletions.
diff --git a/CBLAS/dgemm.c b/CBLAS/dgemm.c
@@ -234,7 +234,7 @@
 
 /*     Quick return if possible. */
 
-    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
+    if (*m == 0 || *n == 0 || ((*alpha == 0. || *k == 0) && *beta == 1.)) {
 	return 0;
     }
 

diff --git a/CBLAS/dgemv.c b/CBLAS/dgemv.c
@@ -150,7 +150,7 @@
 
 /*     Quick return if possible. */
 
-    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
+    if (*m == 0 || *n == 0 || (*alpha == 0. && *beta == 1.)) {
 	return 0;
     }
 

diff --git a/CBLAS/dsymv.c b/CBLAS/dsymv.c
@@ -145,7 +145,7 @@
 
 /*     Quick return if possible. */
 
-    if (*n == 0 || *alpha == 0. && *beta == 1.) {
+    if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
 	return 0;
     }
 

diff --git a/CBLAS/sgemm.c b/CBLAS/sgemm.c
@@ -225,7 +225,7 @@
 
 /*     Quick return if possible. */
 
-    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
+    if (*m == 0 || *n == 0 || ((*alpha == 0. || *k == 0) && *beta == 1.)) {
 	return 0;
     }
 

diff --git a/CBLAS/sgemv.c b/CBLAS/sgemv.c
@@ -150,7 +150,7 @@
 
 /*     Quick return if possible. */
 
-    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
+    if (*m == 0 || *n == 0 || (*alpha == 0.f && *beta == 1.f)) {
 	return 0;
     }
 

diff --git a/CBLAS/ssymv.c b/CBLAS/ssymv.c
@@ -145,7 +145,7 @@
 
 /*     Quick return if possible. */
 
-    if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
+    if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
 	return 0;
     }
 

diff --git a/CBLAS/zgemm.c b/CBLAS/zgemm.c
@@ -238,7 +238,7 @@
 
 /*     Quick return if possible. */
 
-    if (*m == 0 || *n == 0 || ( (alpha->r == 0. && alpha->i == 0. || *k == 0) &&
+    if (*m == 0 || *n == 0 || ( ((alpha->r == 0. && alpha->i == 0.) || *k == 0) &&
 				(beta->r == 1. && beta->i == 0.) )) {
 	return 0;
     }

diff --git a/CBLAS/zher2.c b/CBLAS/zher2.c
@@ -153,7 +153,7 @@
 
 /*     Quick return if possible. */
 
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0.) {
+    if (*n == 0 || (alpha->r == 0. && alpha->i == 0.)) {
 	return 0;
     }
 

diff --git a/EXAMPLE/psdrive.c b/EXAMPLE/psdrive.c
@@ -23,6 +23,7 @@ at the top-level directory.
 
 #include <math.h>
 #include "superlu_sdefs.h"
+#include "superlu_ddefs.h"
 
 /*! \brief
  *
@@ -56,23 +57,44 @@ int main(int argc, char *argv[])
     sLUstruct_t LUstruct;
     sSOLVEstruct_t SOLVEstruct;
     gridinfo_t grid;
-    float   *berr;
+    float   *err_bounds, *berr;
     float   *b, *xtrue;
     int    m, n;
-    int      nprow, npcol, lookahead, colperm, rowperm, ir;
+    int      nprow, npcol, lookahead, colperm, rowperm, ir, use_tensorcore, equil;
     int      iam, info, ldb, ldx, nrhs;
     char     **cpp, c, *postfix;;
     FILE *fp, *fopen();
     int cpp_defs();
-    int ii, omp_mpi_level;
-
+    double *dxtrue, *db, *dberr;
+
+    int ii, i, omp_mpi_level;
+
+    extern int screate_A_x_b(SuperMatrix *A, int nrhs, float **rhs,
+			     int *ldb, float **x, int *ldx,
+			     FILE *fp, char * postfix, gridinfo_t *grid);
+
+    extern void psgssvx_d2(superlu_dist_options_t *options, SuperMatrix *A,
+			sScalePermstruct_t *ScalePermstruct,
+			float B[], int ldb, int nrhs, gridinfo_t *grid,
+			sLUstruct_t *LUstruct, sSOLVEstruct_t *SOLVEstruct,
+			float *err_bounds, SuperLUStat_t *stat, int *info,
+			double *dxtrue);
+
+    extern void psgssvx_tracking(superlu_dist_options_t *options, SuperMatrix *A,
+				 sScalePermstruct_t *ScalePermstruct,
+				 float B[], int ldb, int nrhs, gridinfo_t *grid,
+				 sLUstruct_t *LUstruct, sSOLVEstruct_t *SOLVEstruct, float *berr,
+				 SuperLUStat_t *stat, int *info, double *xtrue);
+
     nprow = 1;  /* Default process rows.      */
     npcol = 1;  /* Default process columns.   */
     nrhs = 1;   /* Number of right-hand side. */
     lookahead = -1;
     colperm = -1;
     rowperm = -1;
     ir = -1;
+    equil = -1;
+    use_tensorcore = -1;
 
     /* ------------------------------------------------------------
        INITIALIZE MPI ENVIRONMENT. 
@@ -105,14 +127,18 @@ int main(int argc, char *argv[])
 		        break;
 	      case 'c': npcol = atoi(*cpp);
 		        break;
-              case 'l': lookahead = atoi(*cpp);
-                        break;
-              case 'p': rowperm = atoi(*cpp);
-                        break;
-              case 'q': colperm = atoi(*cpp);
-                        break;
-              case 'i': ir = atoi(*cpp);
-                        break;
+	      case 'l': lookahead = atoi(*cpp);
+		        break;
+	      case 'p': rowperm = atoi(*cpp);
+		        break;
+	      case 'q': colperm = atoi(*cpp);
+		        break;
+	      case 'i': ir = atoi(*cpp);
+		        break;
+	      case 'e': equil = atoi(*cpp);
+		        break;
+	      case 't': use_tensorcore = atoi(*cpp);
+		        break;
 	    }
 	} else { /* Last arg is considered a filename */
 	    if ( !(fp = fopen(*cpp, "r")) ) {
@@ -180,20 +206,8 @@ int main(int argc, char *argv[])
 		postfix = &((*cpp)[ii+1]);
 	}
     }
-    // printf("%s\n", postfix);
+    //printf("%s\n", postfix);
 
-    /* ------------------------------------------------------------
-       GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE. 
-       ------------------------------------------------------------*/
-    screate_matrix_postfix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, postfix, &grid);
-
-    if ( !(berr = floatMalloc_dist(nrhs)) )
-	ABORT("Malloc fails for berr[].");
-
-    /* ------------------------------------------------------------
-       NOW WE SOLVE THE LINEAR SYSTEM.
-       ------------------------------------------------------------*/
-
     /* Set the default input options:
         options.Fact              = DOFACT;
         options.Equil             = YES;
@@ -211,36 +225,173 @@ int main(int argc, char *argv[])
     set_default_options_dist(&options);
     options.IterRefine = SLU_SINGLE;
 #if 0
-    options.RowPerm = LargeDiag_HWPM;
-    options.IterRefine = NOREFINE;
+    options.ReplaceTinyPivot  = YES;
+    options.RowPerm = NOROWPERM;
     options.ColPerm = NATURAL;
-    options.Equil = NO; 
     options.ReplaceTinyPivot = YES;
 #endif
 
     if (rowperm != -1) options.RowPerm = rowperm;
     if (colperm != -1) options.ColPerm = colperm;
     if (lookahead != -1) options.num_lookaheads = lookahead;
     if (ir != -1) options.IterRefine = ir;
+    if (equil != -1) options.Equil = equil;
+    if (use_tensorcore != -1) options.Use_TensorCore = use_tensorcore;
 
     if (!iam) {
+	print_sp_ienv_dist(&options);
 	print_options_dist(&options);
 	fflush(stdout);
     }
+
+    /* ------------------------------------------------------------
+       GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE
+       ------------------------------------------------------------*/
+    //screate_matrix_postfix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, postfix, &grid);
+
+    /* Generate a good RHS in double precision, then rounded to single.
+       See LAWN 165: bullet 7, page 20. */
+    /* The returned A, b and xtrue are in single precision
+       b <- A * xtrue in double internally, then rounded to single */
+    screate_A_x_b(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, postfix, &grid);
+    fclose(fp);
 
     m = A.nrow;
     n = A.ncol;
 
+#if ( PRNTlevel>=1 )    
+    if (iam==0) {
+	  printf("\n(%d) generated single xtrue:\n", iam);
+	  for (i = 0; i < 5; ++i) printf("%.16e\t", xtrue[i]);
+	  printf("\n"); fflush(stdout);
+    }
+
+    /* Compute the ground truth dXtrue in double precision */
+    if ( options.IterRefine >= SLU_DOUBLE ) 
+    {
+        superlu_dist_options_t options_d;
+	SuperMatrix dA;
+	dScalePermstruct_t dScalePermstruct;
+	dLUstruct_t dLUstruct;
+	dSOLVEstruct_t dSOLVEstruct;
+	//extern int dcreate_matrix_postfix();
+
+#if 0 // Shouldn't use double precision A	
+	fp = fopen(*cpp, "r");
+	dcreate_matrix_postfix(&dA, nrhs, &db, &ldb, &dxtrue, &ldx, fp, postfix, &grid);
+	fclose(fp);
+#else
+	/* Copy single-prec A into double-prec dA storage */
+	NRformat_loc *Astore = A.Store;  // Single-prec A
+	int m_loc = Astore->m_loc;
+	int nnz_loc = Astore->nnz_loc;
+	float *nzval = (float*) Astore->nzval;
+	int_t *rowptr = Astore->rowptr;
+	int_t *colind = Astore->colind;
+
+	double *nzval_loc_dble = (double *) doubleMalloc_dist(nnz_loc);
+	int_t *colind_dble = (int_t *) intMalloc_dist(nnz_loc);
+	int_t *rowptr_dble = (int_t *) intMalloc_dist(m_loc + 1);
+
+	for (i = 0; i < nnz_loc; ++i) {
+	  nzval_loc_dble[i] = nzval[i];
+	  colind_dble[i] = colind[i];
+	}
+	for (i = 0; i < m_loc + 1; ++i) rowptr_dble[i] = rowptr[i];
+
+	dCreate_CompRowLoc_Matrix_dist(&dA, m, n, nnz_loc, m_loc, Astore->fst_row,
+				       nzval_loc_dble, colind_dble, rowptr_dble,
+				       SLU_NR_loc, SLU_D, SLU_GE);
+#endif
+	/* Now, compute dxtrue via double-precision solver */
+	/* Why? dA is the double precision version of A, etc.
+	   If db = dA*dXtrue to double, then rounding db to b and dA to A introduce
+	   a perturbation of eps_single in A and b, so a perturbation of 
+	   cond(A)*eps_single in x vs dXtrue. 
+	   So need to use a computed Xtrue from a double code: dXtrue <- dA \ db,
+	   this computed Xtrue would be comparable to x, i.e., having the same cond(A)
+	   factor in the perturbation error.   See bullet 7, page 20, LAWN 165.*/
+	if ( !(dberr = doubleMalloc_dist(nrhs)) )
+	  ABORT("Malloc fails for dberr[].");
+	if ( !(dxtrue = doubleMalloc_dist(m * nrhs)) )
+	  ABORT("Malloc fails for dberr[].");
+
+	set_default_options_dist(&options_d);
+	dScalePermstructInit(m, n, &dScalePermstruct);
+	dLUstructInit(n, &dLUstruct);
+	PStatInit(&stat);
+
+	/* Need to use correct single-prec {A,b} to solve  */
+	db = doubleMalloc_dist(m_loc * nrhs);
+	for (i = 0; i < m_loc * nrhs; ++i) {
+	  db[i] = b[i];
+	  dxtrue[i] = (double) xtrue[i]; // generated truth in single
+	}
+
+	pdgssvx(&options_d, &dA, &dScalePermstruct, db, ldb, nrhs, &grid,
+		&dLUstruct, &dSOLVEstruct, dberr, &stat, &info);
+
+	pdinf_norm_error(iam, m_loc, nrhs, db, ldb, dxtrue, ldx, grid.comm);
+
+	for (i = 0; i < m_loc * nrhs; ++i) dxtrue[i] = db[i]; // computed truth in double
+
+	/* Rounded to single */
+	for (i = 0; i < m_loc; ++i) {
+	  xtrue[i] = (float) db[i];
+	}
+
+#if ( PRNTlevel>=1 )	
+	if ( iam==0 ) { //(nprow*npcol-1) ) {
+	  printf("\ndouble computed xtrue (stored in db):\n");
+	  for (i = 0; i < 5; ++i) printf("%.16e\t", db[i]);
+	  printf("\n"); fflush(stdout);
+	}
+#endif
+
+	PStatPrint(&options_d, &stat, &grid); /* Print the statistics. */
+
+	PStatFree(&stat);
+	Destroy_CompRowLoc_Matrix_dist(&dA);
+	dScalePermstructFree(&dScalePermstruct);
+	dDestroy_LU(n, &grid, &dLUstruct);
+	dLUstructFree(&dLUstruct);
+	if ( options_d.SolveInitialized ) {
+	  dSolveFinalize(&options_d, &dSOLVEstruct);
+	}
+	SUPERLU_FREE(db);
+	SUPERLU_FREE(dberr);
+    } /* end if IterRefine >= SLU_DOUBLE */
+#endif
+
+    /* ------------------------------------------------------------
+       NOW WE SOLVE THE LINEAR SYSTEM in single precision
+       ------------------------------------------------------------*/
+    if ( !(err_bounds = floatCalloc_dist(nrhs*3)) )
+	ABORT("Malloc fails for err_bounds[].");
+    if ( !(berr = floatMalloc_dist(nrhs)) )
+	ABORT("Malloc fails for berr[].");
+
     /* Initialize ScalePermstruct and LUstruct. */
     sScalePermstructInit(m, n, &ScalePermstruct);
     sLUstructInit(n, &LUstruct);
 
     /* Initialize the statistics variables. */
     PStatInit(&stat);
 
-    /* Call the linear equation solver. */
-    psgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid,
-	    &LUstruct, &SOLVEstruct, berr, &stat, &info);
+    if ( options.IterRefine == SLU_DOUBLE || options.IterRefine == SLU_EXTRA ) { 
+        /* Call the linear equation solver with extra-precise iterative refinement */
+        psgssvx_d2(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid,
+		   &LUstruct, &SOLVEstruct, err_bounds, &stat, &info, dxtrue);
+    } else {
+        /* Call the linear equation solver */
+#if ( PRNTlevel>=2 )
+        psgssvx_tracking(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid,
+			 &LUstruct, &SOLVEstruct, berr, &stat, &info, dxtrue);
+#else
+        psgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid,
+		&LUstruct, &SOLVEstruct, berr, &stat, &info);
+#endif	
+    }
 
     if ( info ) {  /* Something is wrong */
         if ( iam==0 ) {
@@ -251,8 +402,15 @@ int main(int argc, char *argv[])
         /* Check the accuracy of the solution. */
         psinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc,
 		         nrhs, b, ldb, xtrue, ldx, grid.comm);
+	if ( iam==0 && (options.IterRefine == SLU_DOUBLE || options.IterRefine == SLU_EXTRA) ) {
+	  printf("** Forward error bounds:\n");
+	  printf("\tNormwise:       %e\n", err_bounds[0]);
+	  printf("\tComponentwise:  %e\n", err_bounds[1*nrhs]);
+	  printf("** Componentwise backword error: %e\n", err_bounds[2*nrhs]);
+	  fflush(stdout);
+	}
     }
-
+    
     PStatPrint(&options, &stat, &grid);        /* Print the statistics. */
 
     /* ------------------------------------------------------------
@@ -267,8 +425,9 @@ int main(int argc, char *argv[])
     sSolveFinalize(&options, &SOLVEstruct);
     SUPERLU_FREE(b);
     SUPERLU_FREE(xtrue);
+    SUPERLU_FREE(err_bounds);
     SUPERLU_FREE(berr);
-    fclose(fp);
+    if ( options.IterRefine >= SLU_DOUBLE ) SUPERLU_FREE(dxtrue);
 
     /* ------------------------------------------------------------
        RELEASE THE SUPERLU PROCESS GRID.