Skip to content

Commit

Permalink
Restore psdrive.c with double-precision IR;
Browse files Browse the repository at this point in the history
Declare	 psgsmv_comm_t{}::val_tosend as *void, accommodate double-prec IR;
Clean up compiler warning in CBLAS/ files.
  • Loading branch information
xiaoyeli committed May 21, 2022
1 parent cb6759b commit c84ddbe
Show file tree
Hide file tree
Showing 16 changed files with 212 additions and 64 deletions.
2 changes: 1 addition & 1 deletion CBLAS/dgemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@

/* Quick return if possible. */

if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
if (*m == 0 || *n == 0 || ((*alpha == 0. || *k == 0) && *beta == 1.)) {
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion CBLAS/dgemv.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@

/* Quick return if possible. */

if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
if (*m == 0 || *n == 0 || (*alpha == 0. && *beta == 1.)) {
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion CBLAS/dsymv.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@

/* Quick return if possible. */

if (*n == 0 || *alpha == 0. && *beta == 1.) {
if (*n == 0 || (*alpha == 0. && *beta == 1.)) {
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion CBLAS/sgemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@

/* Quick return if possible. */

if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
if (*m == 0 || *n == 0 || ((*alpha == 0. || *k == 0) && *beta == 1.)) {
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion CBLAS/sgemv.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@

/* Quick return if possible. */

if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
if (*m == 0 || *n == 0 || (*alpha == 0.f && *beta == 1.f)) {
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion CBLAS/ssymv.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@

/* Quick return if possible. */

if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) {
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion CBLAS/zgemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@

/* Quick return if possible. */

if (*m == 0 || *n == 0 || ( (alpha->r == 0. && alpha->i == 0. || *k == 0) &&
if (*m == 0 || *n == 0 || ( ((alpha->r == 0. && alpha->i == 0.) || *k == 0) &&
(beta->r == 1. && beta->i == 0.) )) {
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion CBLAS/zher2.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@

/* Quick return if possible. */

if (*n == 0 || alpha->r == 0. && alpha->i == 0.) {
if (*n == 0 || (alpha->r == 0. && alpha->i == 0.)) {
return 0;
}

Expand Down
225 changes: 192 additions & 33 deletions EXAMPLE/psdrive.c
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ at the top-level directory.

#include <math.h>
#include "superlu_sdefs.h"
#include "superlu_ddefs.h"

/*! \brief
*
Expand Down Expand Up @@ -56,23 +57,44 @@ int main(int argc, char *argv[])
sLUstruct_t LUstruct;
sSOLVEstruct_t SOLVEstruct;
gridinfo_t grid;
float *berr;
float *err_bounds, *berr;
float *b, *xtrue;
int m, n;
int nprow, npcol, lookahead, colperm, rowperm, ir;
int nprow, npcol, lookahead, colperm, rowperm, ir, use_tensorcore, equil;
int iam, info, ldb, ldx, nrhs;
char **cpp, c, *postfix;;
FILE *fp, *fopen();
int cpp_defs();
int ii, omp_mpi_level;

double *dxtrue, *db, *dberr;

int ii, i, omp_mpi_level;

extern int screate_A_x_b(SuperMatrix *A, int nrhs, float **rhs,
int *ldb, float **x, int *ldx,
FILE *fp, char * postfix, gridinfo_t *grid);

extern void psgssvx_d2(superlu_dist_options_t *options, SuperMatrix *A,
sScalePermstruct_t *ScalePermstruct,
float B[], int ldb, int nrhs, gridinfo_t *grid,
sLUstruct_t *LUstruct, sSOLVEstruct_t *SOLVEstruct,
float *err_bounds, SuperLUStat_t *stat, int *info,
double *dxtrue);

extern void psgssvx_tracking(superlu_dist_options_t *options, SuperMatrix *A,
sScalePermstruct_t *ScalePermstruct,
float B[], int ldb, int nrhs, gridinfo_t *grid,
sLUstruct_t *LUstruct, sSOLVEstruct_t *SOLVEstruct, float *berr,
SuperLUStat_t *stat, int *info, double *xtrue);

nprow = 1; /* Default process rows. */
npcol = 1; /* Default process columns. */
nrhs = 1; /* Number of right-hand side. */
lookahead = -1;
colperm = -1;
rowperm = -1;
ir = -1;
equil = -1;
use_tensorcore = -1;

/* ------------------------------------------------------------
INITIALIZE MPI ENVIRONMENT.
Expand Down Expand Up @@ -105,14 +127,18 @@ int main(int argc, char *argv[])
break;
case 'c': npcol = atoi(*cpp);
break;
case 'l': lookahead = atoi(*cpp);
break;
case 'p': rowperm = atoi(*cpp);
break;
case 'q': colperm = atoi(*cpp);
break;
case 'i': ir = atoi(*cpp);
break;
case 'l': lookahead = atoi(*cpp);
break;
case 'p': rowperm = atoi(*cpp);
break;
case 'q': colperm = atoi(*cpp);
break;
case 'i': ir = atoi(*cpp);
break;
case 'e': equil = atoi(*cpp);
break;
case 't': use_tensorcore = atoi(*cpp);
break;
}
} else { /* Last arg is considered a filename */
if ( !(fp = fopen(*cpp, "r")) ) {
Expand Down Expand Up @@ -180,20 +206,8 @@ int main(int argc, char *argv[])
postfix = &((*cpp)[ii+1]);
}
}
// printf("%s\n", postfix);
//printf("%s\n", postfix);

/* ------------------------------------------------------------
GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE.
------------------------------------------------------------*/
screate_matrix_postfix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, postfix, &grid);

if ( !(berr = floatMalloc_dist(nrhs)) )
ABORT("Malloc fails for berr[].");

/* ------------------------------------------------------------
NOW WE SOLVE THE LINEAR SYSTEM.
------------------------------------------------------------*/

/* Set the default input options:
options.Fact = DOFACT;
options.Equil = YES;
Expand All @@ -211,36 +225,173 @@ int main(int argc, char *argv[])
set_default_options_dist(&options);
options.IterRefine = SLU_SINGLE;
#if 0
options.RowPerm = LargeDiag_HWPM;
options.IterRefine = NOREFINE;
options.ReplaceTinyPivot = YES;
options.RowPerm = NOROWPERM;
options.ColPerm = NATURAL;
options.Equil = NO;
options.ReplaceTinyPivot = YES;
#endif

if (rowperm != -1) options.RowPerm = rowperm;
if (colperm != -1) options.ColPerm = colperm;
if (lookahead != -1) options.num_lookaheads = lookahead;
if (ir != -1) options.IterRefine = ir;
if (equil != -1) options.Equil = equil;
if (use_tensorcore != -1) options.Use_TensorCore = use_tensorcore;

if (!iam) {
print_sp_ienv_dist(&options);
print_options_dist(&options);
fflush(stdout);
}

/* ------------------------------------------------------------
GET THE MATRIX FROM FILE AND SETUP THE RIGHT HAND SIDE
------------------------------------------------------------*/
//screate_matrix_postfix(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, postfix, &grid);

/* Generate a good RHS in double precision, then rounded to single.
See LAWN 165: bullet 7, page 20. */
/* The returned A, b and xtrue are in single precision
b <- A * xtrue in double internally, then rounded to single */
screate_A_x_b(&A, nrhs, &b, &ldb, &xtrue, &ldx, fp, postfix, &grid);
fclose(fp);

m = A.nrow;
n = A.ncol;

#if ( PRNTlevel>=1 )
if (iam==0) {
printf("\n(%d) generated single xtrue:\n", iam);
for (i = 0; i < 5; ++i) printf("%.16e\t", xtrue[i]);
printf("\n"); fflush(stdout);
}

/* Compute the ground truth dXtrue in double precision */
if ( options.IterRefine >= SLU_DOUBLE )
{
superlu_dist_options_t options_d;
SuperMatrix dA;
dScalePermstruct_t dScalePermstruct;
dLUstruct_t dLUstruct;
dSOLVEstruct_t dSOLVEstruct;
//extern int dcreate_matrix_postfix();

#if 0 // Shouldn't use double precision A
fp = fopen(*cpp, "r");
dcreate_matrix_postfix(&dA, nrhs, &db, &ldb, &dxtrue, &ldx, fp, postfix, &grid);
fclose(fp);
#else
/* Copy single-prec A into double-prec dA storage */
NRformat_loc *Astore = A.Store; // Single-prec A
int m_loc = Astore->m_loc;
int nnz_loc = Astore->nnz_loc;
float *nzval = (float*) Astore->nzval;
int_t *rowptr = Astore->rowptr;
int_t *colind = Astore->colind;

double *nzval_loc_dble = (double *) doubleMalloc_dist(nnz_loc);
int_t *colind_dble = (int_t *) intMalloc_dist(nnz_loc);
int_t *rowptr_dble = (int_t *) intMalloc_dist(m_loc + 1);

for (i = 0; i < nnz_loc; ++i) {
nzval_loc_dble[i] = nzval[i];
colind_dble[i] = colind[i];
}
for (i = 0; i < m_loc + 1; ++i) rowptr_dble[i] = rowptr[i];

dCreate_CompRowLoc_Matrix_dist(&dA, m, n, nnz_loc, m_loc, Astore->fst_row,
nzval_loc_dble, colind_dble, rowptr_dble,
SLU_NR_loc, SLU_D, SLU_GE);
#endif
/* Now, compute dxtrue via double-precision solver */
/* Why? dA is the double precision version of A, etc.
If db = dA*dXtrue to double, then rounding db to b and dA to A introduce
a perturbation of eps_single in A and b, so a perturbation of
cond(A)*eps_single in x vs dXtrue.
So need to use a computed Xtrue from a double code: dXtrue <- dA \ db,
this computed Xtrue would be comparable to x, i.e., having the same cond(A)
factor in the perturbation error. See bullet 7, page 20, LAWN 165.*/
if ( !(dberr = doubleMalloc_dist(nrhs)) )
ABORT("Malloc fails for dberr[].");
if ( !(dxtrue = doubleMalloc_dist(m * nrhs)) )
ABORT("Malloc fails for dberr[].");

set_default_options_dist(&options_d);
dScalePermstructInit(m, n, &dScalePermstruct);
dLUstructInit(n, &dLUstruct);
PStatInit(&stat);

/* Need to use correct single-prec {A,b} to solve */
db = doubleMalloc_dist(m_loc * nrhs);
for (i = 0; i < m_loc * nrhs; ++i) {
db[i] = b[i];
dxtrue[i] = (double) xtrue[i]; // generated truth in single
}

pdgssvx(&options_d, &dA, &dScalePermstruct, db, ldb, nrhs, &grid,
&dLUstruct, &dSOLVEstruct, dberr, &stat, &info);

pdinf_norm_error(iam, m_loc, nrhs, db, ldb, dxtrue, ldx, grid.comm);

for (i = 0; i < m_loc * nrhs; ++i) dxtrue[i] = db[i]; // computed truth in double

/* Rounded to single */
for (i = 0; i < m_loc; ++i) {
xtrue[i] = (float) db[i];
}

#if ( PRNTlevel>=1 )
if ( iam==0 ) { //(nprow*npcol-1) ) {
printf("\ndouble computed xtrue (stored in db):\n");
for (i = 0; i < 5; ++i) printf("%.16e\t", db[i]);
printf("\n"); fflush(stdout);
}
#endif

PStatPrint(&options_d, &stat, &grid); /* Print the statistics. */

PStatFree(&stat);
Destroy_CompRowLoc_Matrix_dist(&dA);
dScalePermstructFree(&dScalePermstruct);
dDestroy_LU(n, &grid, &dLUstruct);
dLUstructFree(&dLUstruct);
if ( options_d.SolveInitialized ) {
dSolveFinalize(&options_d, &dSOLVEstruct);
}
SUPERLU_FREE(db);
SUPERLU_FREE(dberr);
} /* end if IterRefine >= SLU_DOUBLE */
#endif

/* ------------------------------------------------------------
NOW WE SOLVE THE LINEAR SYSTEM in single precision
------------------------------------------------------------*/
if ( !(err_bounds = floatCalloc_dist(nrhs*3)) )
ABORT("Malloc fails for err_bounds[].");
if ( !(berr = floatMalloc_dist(nrhs)) )
ABORT("Malloc fails for berr[].");

/* Initialize ScalePermstruct and LUstruct. */
sScalePermstructInit(m, n, &ScalePermstruct);
sLUstructInit(n, &LUstruct);

/* Initialize the statistics variables. */
PStatInit(&stat);

/* Call the linear equation solver. */
psgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid,
&LUstruct, &SOLVEstruct, berr, &stat, &info);
if ( options.IterRefine == SLU_DOUBLE || options.IterRefine == SLU_EXTRA ) {
/* Call the linear equation solver with extra-precise iterative refinement */
psgssvx_d2(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid,
&LUstruct, &SOLVEstruct, err_bounds, &stat, &info, dxtrue);
} else {
/* Call the linear equation solver */
#if ( PRNTlevel>=2 )
psgssvx_tracking(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid,
&LUstruct, &SOLVEstruct, berr, &stat, &info, dxtrue);
#else
psgssvx(&options, &A, &ScalePermstruct, b, ldb, nrhs, &grid,
&LUstruct, &SOLVEstruct, berr, &stat, &info);
#endif
}

if ( info ) { /* Something is wrong */
if ( iam==0 ) {
Expand All @@ -251,8 +402,15 @@ int main(int argc, char *argv[])
/* Check the accuracy of the solution. */
psinf_norm_error(iam, ((NRformat_loc *)A.Store)->m_loc,
nrhs, b, ldb, xtrue, ldx, grid.comm);
if ( iam==0 && (options.IterRefine == SLU_DOUBLE || options.IterRefine == SLU_EXTRA) ) {
printf("** Forward error bounds:\n");
printf("\tNormwise: %e\n", err_bounds[0]);
printf("\tComponentwise: %e\n", err_bounds[1*nrhs]);
printf("** Componentwise backword error: %e\n", err_bounds[2*nrhs]);
fflush(stdout);
}
}

PStatPrint(&options, &stat, &grid); /* Print the statistics. */

/* ------------------------------------------------------------
Expand All @@ -267,8 +425,9 @@ int main(int argc, char *argv[])
sSolveFinalize(&options, &SOLVEstruct);
SUPERLU_FREE(b);
SUPERLU_FREE(xtrue);
SUPERLU_FREE(err_bounds);
SUPERLU_FREE(berr);
fclose(fp);
if ( options.IterRefine >= SLU_DOUBLE ) SUPERLU_FREE(dxtrue);

/* ------------------------------------------------------------
RELEASE THE SUPERLU PROCESS GRID.
Expand Down
Loading

0 comments on commit c84ddbe

Please sign in to comment.