Skip to content

Commit

Permalink
Merge branch 'amd' of https://github.com/xiaoyeli/superlu_dist into amd
Browse files Browse the repository at this point in the history
  • Loading branch information
liuyangzhuan committed Dec 1, 2021
2 parents f892d06 + eac6cda commit 098e96f
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 33 deletions.
27 changes: 19 additions & 8 deletions SRC/ddistribute.c
Original file line number Diff line number Diff line change
Expand Up @@ -778,14 +778,25 @@ ddistribute(fact_t fact, int_t n, SuperMatrix *A,
ABORT("Malloc fails for Lindval_loc_bc_ptr[ljb][]");
Lindval_loc_bc_offset[ljb]=nrbl*3;
Lindval_loc_bc_cnt += Lindval_loc_bc_offset[ljb];
if (!(Linv_bc_ptr[ljb] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Linv_bc_ptr[ljb][]");
Linv_bc_offset[ljb]=nsupc*nsupc;
Linv_bc_cnt += Linv_bc_offset[ljb];
if (!(Uinv_bc_ptr[ljb] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Uinv_bc_ptr[ljb][]");
Uinv_bc_offset[ljb]=nsupc*nsupc;
Uinv_bc_cnt += Uinv_bc_offset[ljb];

myrow = MYROW( iam, grid );
krow = PROW( jb, grid );
if(myrow==krow){ /* diagonal block */
if (!(Linv_bc_ptr[ljb] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Linv_bc_ptr[ljb][]");
Linv_bc_offset[ljb]=nsupc*nsupc;
Linv_bc_cnt += Linv_bc_offset[ljb];
if (!(Uinv_bc_ptr[ljb] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Uinv_bc_ptr[ljb][]");
Uinv_bc_offset[ljb]=nsupc*nsupc;
Uinv_bc_cnt += Uinv_bc_offset[ljb];
}else{
Linv_bc_ptr[ljb] = NULL;
Linv_bc_offset[ljb] = -1;
Uinv_bc_ptr[ljb] = NULL;
Uinv_bc_offset[ljb] = -1;
}

mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 );
mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc );
mybufmax[4] = SUPERLU_MAX( mybufmax[4], len );
Expand Down
30 changes: 19 additions & 11 deletions SRC/pddistribute.c
Original file line number Diff line number Diff line change
Expand Up @@ -1104,17 +1104,25 @@ pddistribute(fact_t fact, int_t n, SuperMatrix *A,
Lindval_loc_bc_offset[ljb]=nrbl*3;
Lindval_loc_bc_cnt += Lindval_loc_bc_offset[ljb];


if (!(Linv_bc_ptr[ljb] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Linv_bc_ptr[ljb][]");
Linv_bc_offset[ljb]=nsupc*nsupc;
Linv_bc_cnt += Linv_bc_offset[ljb];

if (!(Uinv_bc_ptr[ljb] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Uinv_bc_ptr[ljb][]");
Uinv_bc_offset[ljb]=nsupc*nsupc;
Uinv_bc_cnt += Uinv_bc_offset[ljb];

myrow = MYROW( iam, grid );
krow = PROW( jb, grid );
if(myrow==krow){ /* diagonal block */
if (!(Linv_bc_ptr[ljb] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Linv_bc_ptr[ljb][]");
Linv_bc_offset[ljb]=nsupc*nsupc;
Linv_bc_cnt += Linv_bc_offset[ljb];

if (!(Uinv_bc_ptr[ljb] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Uinv_bc_ptr[ljb][]");
Uinv_bc_offset[ljb]=nsupc*nsupc;
Uinv_bc_cnt += Uinv_bc_offset[ljb];
}else{
Linv_bc_ptr[ljb] = NULL;
Linv_bc_offset[ljb] = -1;
Uinv_bc_ptr[ljb] = NULL;
Uinv_bc_offset[ljb] = -1;
}

mybufmax[0] = SUPERLU_MAX( mybufmax[0], len1 );
mybufmax[1] = SUPERLU_MAX( mybufmax[1], len*nsupc );
mybufmax[4] = SUPERLU_MAX( mybufmax[4], len );
Expand Down
9 changes: 4 additions & 5 deletions SRC/pdgstrs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,10 @@ pdgstrs(int_t n, dLUstruct_t *LUstruct,
const int warp_size = 32; /* number of threads per warp*/
gpuStream_t sid=0;
int gid=0;
gridinfo_t *d_grid = NULL;
double *d_x = NULL;
double *d_lsum = NULL;
int_t *d_fmod = NULL;
#endif
#endif

Expand Down Expand Up @@ -1884,11 +1888,6 @@ t1 = SuperLU_timer_();
// roctxRangePush("hipLaunchKernel");
// #endif

gridinfo_t *d_grid = NULL;
double *d_x = NULL;
double *d_lsum = NULL;
int_t *d_fmod = NULL;

checkGPU(gpuMalloc( (void**)&d_grid, sizeof(gridinfo_t)));

checkGPU(gpuMalloc( (void**)&recvbuf_BC_gpu, maxrecvsz* CEILING( nsupers, grid->npcol) * sizeof(double))); // used for receiving and forwarding x on each thread
Expand Down
2 changes: 1 addition & 1 deletion SRC/pdgstrs_lsum_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1587,7 +1587,7 @@ __device__ void C_RdTree_forwardMessageSimple_Device(C_Tree* Tree, void* localBu

// the first nbcol_loc handles all computations and broadcast communication
if(bid<nbcol_loc){
if(Uinv_bc_offset[bid]==-1){
if(Uinv_bc_offset[bid]==-1 && Ucb_indoffset[bid]==-1){
return;
}

Expand Down
26 changes: 18 additions & 8 deletions SRC/pdsymbfact_distdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -1900,14 +1900,24 @@ double *dense, *dense_col; /* SPA */
Lnzval_bc_offset[ljb_j]=len*nsupc;
Lnzval_bc_cnt += Lnzval_bc_offset[ljb_j];

if (!(Linv_bc_ptr[ljb_j] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Linv_bc_ptr[ljb_j][]");
Linv_bc_offset[ljb_j]=nsupc*nsupc;
Linv_bc_cnt += Linv_bc_offset[ljb_j];
if (!(Uinv_bc_ptr[ljb_j] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Uinv_bc_ptr[ljb_j][]");
Uinv_bc_offset[ljb_j]=nsupc*nsupc;
Uinv_bc_cnt += Uinv_bc_offset[ljb_j];
myrow = MYROW( iam, grid );
krow = PROW( jb, grid );
if(myrow==krow){ /* diagonal block */

if (!(Linv_bc_ptr[ljb_j] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Linv_bc_ptr[ljb_j][]");
Linv_bc_offset[ljb_j]=nsupc*nsupc;
Linv_bc_cnt += Linv_bc_offset[ljb_j];
if (!(Uinv_bc_ptr[ljb_j] = (double*)SUPERLU_MALLOC(nsupc*nsupc * sizeof(double))))
ABORT("Malloc fails for Uinv_bc_ptr[ljb_j][]");
Uinv_bc_offset[ljb_j]=nsupc*nsupc;
Uinv_bc_cnt += Uinv_bc_offset[ljb_j];
}else{
Linv_bc_ptr[ljb_j] = NULL;
Linv_bc_offset[ljb_j] = -1;
Uinv_bc_ptr[ljb_j] = NULL;
Uinv_bc_offset[ljb_j] = -1;
}

memNLU += len1*iword + len*nsupc*dword;

Expand Down

0 comments on commit 098e96f

Please sign in to comment.