-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
173 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
#include "superlu_ddefs.h" | ||
#include "lupanels.hpp" | ||
#include "lupanels_GPU.cuh" | ||
|
||
//TODO: needs to be merged as a single factorization function | ||
int_t LUstruct_v100::dsparseTreeFactorGPU( | ||
sForest_t *sforest, | ||
commRequests_t **comReqss, // lists of communication requests // size maxEtree level | ||
dscuBufs_t *scuBufs, // contains buffers for schur complement update | ||
packLUInfo_t *packLUInfo, | ||
msgs_t **msgss, // size=num Look ahead | ||
dLUValSubBuf_t **LUvsbs, // size=num Look ahead | ||
ddiagFactBufs_t **dFBufs, // size maxEtree level | ||
gEtreeInfo_t *gEtreeInfo, // global etree info | ||
int_t *gIperm_c_supno, | ||
double thresh, int tag_ub, | ||
int *info) | ||
{ | ||
int_t nnodes = sforest->nNodes; // number of nodes in the tree | ||
if (nnodes < 1) | ||
{ | ||
return 1; | ||
} | ||
|
||
#if (DEBUGlevel >= 1) | ||
CHECK_MALLOC(grid3d->iam, "Enter dsparseTreeFactor_ASYNC()"); | ||
#endif | ||
|
||
int_t *perm_c_supno = sforest->nodeList; // list of nodes in the order of factorization | ||
treeTopoInfo_t *treeTopoInfo = &sforest->topoInfo; | ||
int_t *myIperm = treeTopoInfo->myIperm; | ||
int_t maxTopoLevel = treeTopoInfo->numLvl; | ||
int_t *eTreeTopLims = treeTopoInfo->eTreeTopLims; | ||
|
||
/*main loop over all the levels*/ | ||
int_t numLA = getNumLookAhead(options); | ||
|
||
|
||
for (int_t topoLvl = 0; topoLvl < maxTopoLevel; ++topoLvl) | ||
{ | ||
/* code */ | ||
int_t k_st = eTreeTopLims[topoLvl]; | ||
int_t k_end = eTreeTopLims[topoLvl + 1]; | ||
for (int_t k0 = k_st; k0 < k_end; ++k0) | ||
{ | ||
int_t k = perm_c_supno[k0]; | ||
int_t offset = k0 - k_st; | ||
int_t ksupc = SuperSize(k); | ||
cublasHandle_t cubHandle= A_gpu.cuHandles[0]; | ||
cudaStream_t cuStream = A_gpu.cuStreams[0]; | ||
/*======= Diagonal Factorization ======*/ | ||
if (iam == procIJ(k, k)) | ||
{ | ||
lPanelVec[g2lCol(k)].diagFactorPackDiagBlockGPU( k, | ||
dFBufs[offset]->BlockUFactor, ksupc, // CPU pointers | ||
dFBufs[offset]->BlockLFactor, ksupc, // CPU pointers | ||
thresh, xsup, options, stat, info); | ||
} | ||
|
||
/*======= Diagonal Broadcast ======*/ | ||
if (myrow == krow(k)) | ||
MPI_Bcast((void *)dFBufs[offset]->BlockLFactor, ksupc * ksupc, | ||
MPI_DOUBLE, kcol(k), (grid->rscp).comm); | ||
if (mycol == kcol(k)) | ||
MPI_Bcast((void *)dFBufs[offset]->BlockUFactor, ksupc * ksupc, | ||
MPI_DOUBLE, krow(k), (grid->cscp).comm); | ||
|
||
/*======= Panel Update ======*/ | ||
if (myrow == krow(k)) | ||
uPanelVec[g2lRow(k)].panelSolveGPU( | ||
cubHandle, cuStream, | ||
ksupc, dFBufs[offset]->BlockLFactor, ksupc); | ||
|
||
if (mycol == kcol(k)) | ||
lPanelVec[g2lCol(k)].panelSolveGPU( | ||
cubHandle, cuStream, | ||
ksupc, dFBufs[offset]->BlockUFactor, ksupc); | ||
|
||
/*======= Panel Broadcast ======*/ | ||
upanel_t k_upanel(UidxRecvBufs[0], UvalRecvBufs[0], | ||
A_gpu.UidxRecvBufs[0], A_gpu.UvalRecvBufs[0]) ; | ||
lpanel_t k_lpanel(LidxRecvBufs[0], LvalRecvBufs[0], | ||
A_gpu.LidxRecvBufs[0], A_gpu.LvalRecvBufs[0]); | ||
if (myrow == krow(k)) | ||
{ | ||
k_upanel= uPanelVec[g2lRow(k)]; | ||
} | ||
if (mycol == kcol(k)) | ||
k_lpanel = lPanelVec[g2lCol(k)]; | ||
|
||
if(UidxSendCounts[k]>0) | ||
{ | ||
// assuming GPU direct is available | ||
MPI_Bcast(k_upanel.gpuPanel.index, UidxSendCounts[k], mpi_int_t, krow(k), grid3d->cscp.comm); | ||
MPI_Bcast(k_upanel.gpuPanel.val, UvalSendCounts[k], MPI_DOUBLE, krow(k), grid3d->cscp.comm); | ||
// copy the index to cpu | ||
cudaMemcpy(k_upanel.index, k_upanel.gpuPanel.index, | ||
sizeof(int_t)*UidxSendCounts[k], cudaMemcpyDeviceToHost); | ||
} | ||
|
||
if(LidxSendCounts[k]>0) | ||
{ | ||
MPI_Bcast(k_lpanel.gpuPanel.index, LidxSendCounts[k], mpi_int_t, kcol(k), grid3d->rscp.comm); | ||
MPI_Bcast(k_lpanel.gpuPanel.val, LvalSendCounts[k], MPI_DOUBLE, kcol(k), grid3d->rscp.comm); | ||
cudaMemcpy(k_lpanel.index, k_lpanel.gpuPanel.index, | ||
sizeof(int_t)*LidxSendCounts[k], cudaMemcpyDeviceToHost); | ||
} | ||
|
||
|
||
/*======= Schurcomplement Update ======*/ | ||
#warning single node only | ||
// dSchurComplementUpdate(k, lPanelVec[g2lCol(k)], uPanelVec[g2lRow(k)]); | ||
// dSchurComplementUpdate(k, lPanelVec[g2lCol(k)], k_upanel); | ||
if(UidxSendCounts[k]>0 && LidxSendCounts[k]>0) | ||
{ | ||
// k_upanel.checkCorrectness(); | ||
int streamId =0; | ||
dSchurComplementUpdateGPU( | ||
streamId, | ||
k, k_lpanel, k_upanel); | ||
|
||
} | ||
// MPI_Barrier(grid3d->comm); | ||
|
||
} /*for k0= k_st:k_end */ | ||
|
||
} /*for topoLvl = 0:maxTopoLevel*/ | ||
|
||
#if (DEBUGlevel >= 1) | ||
CHECK_MALLOC(grid3d->iam, "Exit dsparseTreeFactor_ASYNC()"); | ||
#endif | ||
|
||
return 0; | ||
} /* dsparseTreeFactor_ASYNC */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters