From d2f0f1e11a4b41c6bb8a1b80554e2339e76c9e8b Mon Sep 17 00:00:00 2001 From: Sherry Li Date: Mon, 27 Jan 2025 09:10:33 -0800 Subject: [PATCH] Fix a logic issue: when reusing L&U structure, always call pxdistribute() to distribute A even if in the case of parallel symbolic. --- SRC/complex16/pzgssvx.c | 10 ++++++---- SRC/double/pdgssvx.c | 13 ++++++++----- SRC/single/psgssvx.c | 11 ++++++----- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/SRC/complex16/pzgssvx.c b/SRC/complex16/pzgssvx.c index c5dc7e9b..99dc6bd5 100755 --- a/SRC/complex16/pzgssvx.c +++ b/SRC/complex16/pzgssvx.c @@ -1127,9 +1127,10 @@ pzgssvx(superlu_dist_options_t *options, SuperMatrix *A, if (symb_comm != MPI_COMM_NULL) MPI_Comm_free (&symb_comm); /* Distribute entries of A into L & U data structures. */ - //if (parSymbFact == NO || ???? Fact == SamePattern_SameRowPerm) { - if ( parSymbFact == NO ) { - /* CASE OF SERIAL SYMBOLIC */ + if (parSymbFact == NO || Fact == SamePattern_SameRowPerm) { + //if ( parSymbFact == NO ) { + /* Case of serial symbolic, + or parallel symbolic and reuse L&U structure, only redistribute A */ /* Apply column permutation to the original distributed A */ for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]]; @@ -1390,7 +1391,7 @@ pzgssvx(superlu_dist_options_t *options, SuperMatrix *A, - /* nvshmem related. */ + /* nvshmem related */ #ifdef HAVE_NVSHMEM nsupers = Glu_persist->supno[n-1] + 1; int nc = CEILING( nsupers, grid->npcol); @@ -1405,6 +1406,7 @@ pzgssvx(superlu_dist_options_t *options, SuperMatrix *A, if (get_acc_solve()){ nv_init_wrapper(grid->comm); zprepare_multiGPU_buffers(flag_bc_size,flag_rd_size,ready_x_size,ready_lsum_size,my_flag_bc_size,my_flag_rd_size); + } #endif diff --git a/SRC/double/pdgssvx.c b/SRC/double/pdgssvx.c index 60f412a8..0d558aca 100755 --- a/SRC/double/pdgssvx.c +++ b/SRC/double/pdgssvx.c @@ -1126,9 +1126,10 @@ pdgssvx(superlu_dist_options_t *options, SuperMatrix *A, if (symb_comm != MPI_COMM_NULL) MPI_Comm_free (&symb_comm); /* Distribute entries of A into L & U data structures. */ - //if (parSymbFact == NO || ???? Fact == SamePattern_SameRowPerm) { - if ( parSymbFact == NO ) { - /* CASE OF SERIAL SYMBOLIC */ + if (parSymbFact == NO || Fact == SamePattern_SameRowPerm) { + //if ( parSymbFact == NO ) { + /* Case of serial symbolic, + or parallel symbolic and reuse L&U structure, only redistribute A */ /* Apply column permutation to the original distributed A */ for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]]; @@ -1165,6 +1166,7 @@ pdgssvx(superlu_dist_options_t *options, SuperMatrix *A, /*if (!iam) printf ("\tDISTRIBUTE time %8.2f\n", stat->utime[DIST]);*/ + /* Perform numerical factorization in parallel. */ t = SuperLU_timer_(); // #pragma omp parallel @@ -1384,7 +1386,7 @@ pdgssvx(superlu_dist_options_t *options, SuperMatrix *A, - /* nvshmem related. */ + /* nvshmem related */ #ifdef HAVE_NVSHMEM nsupers = Glu_persist->supno[n-1] + 1; int nc = CEILING( nsupers, grid->npcol); @@ -1397,8 +1399,9 @@ pdgssvx(superlu_dist_options_t *options, SuperMatrix *A, int ready_x_size = maxrecvsz*nc; int ready_lsum_size = 2*maxrecvsz*nr; if (get_acc_solve()){ - nv_init_wrapper(grid->comm); + nv_init_wrapper(grid->comm); dprepare_multiGPU_buffers(flag_bc_size,flag_rd_size,ready_x_size,ready_lsum_size,my_flag_bc_size,my_flag_rd_size); + } #endif diff --git a/SRC/single/psgssvx.c b/SRC/single/psgssvx.c index c9e6a0b6..0a395525 100755 --- a/SRC/single/psgssvx.c +++ b/SRC/single/psgssvx.c @@ -1126,9 +1126,10 @@ psgssvx(superlu_dist_options_t *options, SuperMatrix *A, if (symb_comm != MPI_COMM_NULL) MPI_Comm_free (&symb_comm); /* Distribute entries of A into L & U data structures. */ - //if (parSymbFact == NO || ???? Fact == SamePattern_SameRowPerm) { - if ( parSymbFact == NO ) { - /* CASE OF SERIAL SYMBOLIC */ + if (parSymbFact == NO || Fact == SamePattern_SameRowPerm) { + //if ( parSymbFact == NO ) { + /* Case of serial symbolic, + or parallel symbolic and reuse L&U structure, only redistribute A */ /* Apply column permutation to the original distributed A */ for (j = 0; j < nnz_loc; ++j) colind[j] = perm_c[colind[j]]; @@ -1385,7 +1386,7 @@ psgssvx(superlu_dist_options_t *options, SuperMatrix *A, - /* nvshmem related.*/ + /* nvshmem related */ #ifdef HAVE_NVSHMEM nsupers = Glu_persist->supno[n-1] + 1; int nc = CEILING( nsupers, grid->npcol); @@ -1400,10 +1401,10 @@ psgssvx(superlu_dist_options_t *options, SuperMatrix *A, if (get_acc_solve()){ nv_init_wrapper(grid->comm); sprepare_multiGPU_buffers(flag_bc_size,flag_rd_size,ready_x_size,ready_lsum_size,my_flag_bc_size,my_flag_rd_size); + } #endif - } /* end if (!factored) */ if ( options->Fact == DOFACT || options->Fact == SamePattern ) {