Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ShyLU-Basker : move ND data-struc setup to symbolic #13749

Merged
merged 7 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions packages/amesos2/example/SimpleSolve_File.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ int main(int argc, char *argv[]) {

Teuchos::oblackholestream blackhole;

bool multi_solve = false;
bool printMatrix = false;
bool printSolution = false;
bool checkSolution = false;
Expand All @@ -83,6 +84,7 @@ int main(int argc, char *argv[]) {
cmdp.setOption("rhs_filename",&rhs_filename,"Filename for Matrix-Market right-hand-side.");
cmdp.setOption("solvername",&solvername,"Name of solver.");
cmdp.setOption("xml_filename",&xml_filename,"XML Filename for Solver parameters.");
cmdp.setOption("multi-solve","no-multi-solve",&multi_solve,"Test multiple numFacto & solve per symbolic.");
cmdp.setOption("print-matrix","no-print-matrix",&printMatrix,"Print the full matrix after reading it.");
cmdp.setOption("print-solution","no-print-solution",&printSolution,"Print solution vector after solve.");
cmdp.setOption("check-solution","no-check-solution",&checkSolution,"Check solution vector after solve.");
Expand All @@ -104,7 +106,7 @@ int main(int argc, char *argv[]) {
const size_t numVectors = 1;

// Read matrix
RCP<const MAT> A = Tpetra::MatrixMarket::Reader<MAT>::readSparseFile(mat_filename, comm);
RCP<MAT> A = Tpetra::MatrixMarket::Reader<MAT>::readSparseFile(mat_filename, comm);

// get the map (Range Map used for both X & B)
RCP<const Map<LO,GO> > rngmap = A->getRangeMap();
Expand Down Expand Up @@ -217,11 +219,34 @@ int main(int argc, char *argv[]) {
stackedTimer = rcp(new Teuchos::StackedTimer("Amesos2 SimpleSolve-File"));
Teuchos::TimeMonitor::setStackedTimer(stackedTimer);
}
solver->symbolicFactorization().numericFactorization().solve();
solver->symbolicFactorization(); comm->barrier();
solver->numericFactorization(); comm->barrier();
solver->solve(); comm->barrier();
if (multi_solve) {
{
// change (1,1) diagonal entry value
Teuchos::Array<GO> gblColIndsBuf (1);
Teuchos::Array<Scalar> valsBuf (1);
valsBuf[0] = 7.0;
gblColIndsBuf[0] = 0;

Teuchos::ArrayView<GO> gblColInds = gblColIndsBuf.view (0, 1);
Teuchos::ArrayView<Scalar> vals = valsBuf.view (0, 1);

A->resumeFill();
A->replaceGlobalValues (0, gblColInds, vals);
A->fillComplete();
}
// perform numeric for the second time
solver->numericFactorization();

// chage RHS, and re-do solve
B->putScalar(10);
solver->solve();
}
if(useStackedTimer) {
stackedTimer->stopBaseTimer();
}

if( printSolution ){
// Print the solution
RCP<Map<LO,GO> > root_map
Expand Down
12 changes: 10 additions & 2 deletions packages/amesos2/test/solvers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -390,14 +390,22 @@ IF (${PACKAGE_NAME}_ENABLE_ShyLU_NodeBasker)
STANDARD_PASS_OUTPUT
)

##Copy shylubasker_test.xml, but do not test
##depends on not included matrices
##Copy shylubasker_test.xml
TRIBITS_COPY_FILES_TO_BINARY_DIR(SolverTestCopyShyLUBaskerFiles
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
SOURCE_FILES shylubasker_test.xml
EXEDEPS Solver_Test
)

##Test only if ShyLU-Basker is enabled
TRIBITS_ADD_TEST(
Solver_Test
NAME ShyLUBasker_Solver_Test
ARGS "--xml-params=shylubasker_test.xml --filedir=${CMAKE_CURRENT_BINARY_DIR}/../matrices/ --multiple-solves --refactor"
STANDARD_PASS_OUTPUT
NUM_MPI_PROCS 2
COMM serial mpi
)
ENDIF()

##### MUMPS Tests ####
Expand Down
28 changes: 14 additions & 14 deletions packages/amesos2/test/solvers/shylubasker_test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<!-- Wathen120 is a bad performance matrix that is symm with nice supernodes -->
<ParameterList name="wathen120.mtx">
<ParameterList name="Basker">
<ParameterList name="ShyLUBasker">

<!-- Test Epetra objects first -->
<ParameterList name="epetra">
Expand Down Expand Up @@ -221,13 +221,13 @@
</ParameterList> <!-- end solver_run_params -->
</ParameterList> <!-- end run-int-long-p#-default -->
</ParameterList> <!-- end tpetra -->
</ParameterList> <!-- end Basker -->
</ParameterList> <!-- end ShyLUBasker -->
</ParameterList> <!-- end wathen120 -->


<!-- Asic_680ks is a key matrix with medium performance first btf blk small -->
<ParameterList name="asic_680ks.mtx">
<ParameterList name="Basker">
<ParameterList name="ShyLUBasker">

<!-- Test Epetra objects first -->
<ParameterList name="epetra">
Expand Down Expand Up @@ -442,12 +442,12 @@
</ParameterList> <!-- end solver_run_params -->
</ParameterList> <!-- end run-int-long-p#-default -->
</ParameterList> <!-- end tpetra -->
</ParameterList> <!-- end Basker -->
</ParameterList> <!-- end ShyLUBasker -->
</ParameterList> <!-- end asic680ks -->

<!-- G2 Very bad matrix sym large supernodes -->
<ParameterList name="G2_circuit.mtx">
<ParameterList name="Basker">
<ParameterList name="ShyLUBasker">

<!-- Test Epetra objects first -->
<ParameterList name="epetra">
Expand Down Expand Up @@ -662,12 +662,12 @@
</ParameterList> <!-- end solver_run_params -->
</ParameterList> <!-- end run-int-long-p#-default -->
</ParameterList> <!-- end tpetra -->
</ParameterList> <!-- end Basker -->
</ParameterList> <!-- end ShyLUBasker -->
</ParameterList> <!-- end G2_circuit -->

<!-- Private Large Power Simulation Matrix, good very btf matrix -->
<ParameterList name="power0.mtx">
<ParameterList name="Basker">
<ParameterList name="ShyLUBasker">

<!-- Test Epetra objects first -->
<ParameterList name="epetra">
Expand Down Expand Up @@ -882,12 +882,12 @@
</ParameterList> <!-- end solver_run_params -->
</ParameterList> <!-- end run-int-long-p#-default -->
</ParameterList> <!-- end tpetra -->
</ParameterList> <!-- end Basker -->
</ParameterList> <!-- end ShyLUBasker -->
</ParameterList> <!-- end power0 -->

<!-- hvdc2, interesting matrix with need for pivoting -->
<ParameterList name="hvdc2.mtx">
<ParameterList name="Basker">
<ParameterList name="ShyLUBasker">

<!-- Test Epetra objects first -->
<ParameterList name="epetra">
Expand Down Expand Up @@ -1102,12 +1102,12 @@
</ParameterList> <!-- end solver_run_params -->
</ParameterList> <!-- end run-int-long-p#-default -->
</ParameterList> <!-- end tpetra -->
</ParameterList> <!-- end Basker -->
</ParameterList> <!-- end ShyLUBasker -->
</ParameterList> <!-- end hvdc2 -->

<!-- amesos_test_mat1 has good tests (limit to 4 threads) -->
<ParameterList name="amesos2_test_mat1.mtx">
<ParameterList name="Basker">
<ParameterList name="ShyLUBasker">

<!-- Test Epetra objects first -->
<ParameterList name="epetra">
Expand Down Expand Up @@ -1272,12 +1272,12 @@
</ParameterList> <!-- end solver_run_params -->
</ParameterList> <!-- end run-int-long-p#-default -->
</ParameterList> <!-- end tpetra -->
</ParameterList> <!-- end Basker -->
</ParameterList> <!-- end ShyLUBasker -->
</ParameterList> <!-- end amesos2_test_mat1 -->

<!-- amesos2_test_mat4 has good tests (limit to 4 threads) -->
<ParameterList name="amesos2_test_mat4.mtx">
<ParameterList name="Basker">
<ParameterList name="ShyLUBasker">

<!-- Test Epetra objects first -->
<ParameterList name="epetra">
Expand Down Expand Up @@ -1442,7 +1442,7 @@
</ParameterList> <!-- end solver_run_params -->
</ParameterList> <!-- end run-int-long-p#-default -->
</ParameterList> <!-- end tpetra -->
</ParameterList> <!-- end Basker -->
</ParameterList> <!-- end ShyLUBasker -->
</ParameterList> <!-- end amesos2_test_mat4 -->
<!-- TO BE CONT... -->
</ParameterList> <!-- end test_parms -->
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ namespace BaskerNS
int sfactor_copy();

BASKER_INLINE
int sfactor_copy2(bool alloc_BTFA = false, bool copy_BTFA = true);
int sfactor_copy2(bool doSymbolic = true, bool alloc_BTFA = false, bool copy_BTFA = true);


//old
Expand Down
65 changes: 39 additions & 26 deletions packages/shylu/shylu_node/basker/src/shylubasker_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ namespace BaskerNS
Kokkos::Timer timer;
#endif

int err = 0;
if(Options.verbose == BASKER_TRUE)
{
std::cout << "\n == Basker Symbolic ==" << std::endl;
Expand Down Expand Up @@ -533,21 +534,6 @@ namespace BaskerNS
symb_flag = BASKER_TRUE;
}


if(Options.verbose == BASKER_TRUE)
{
printf(" == Basker Symbolic Done ==\n\n"); fflush(stdout);
}

#ifdef BASKER_TIMER
time = timer.seconds();
stats.time_sfactor += time;
std::cout << "Basker Symbolic total time: " << time
<< std::endl << std::endl;
std::cout.precision(old_precision);
std::cout.flags(old_settings);
#endif

// NDE store matrix dims here for comparison in Factor
sym_gn = A.ncol;
sym_gm = A.nrow;
Expand Down Expand Up @@ -577,8 +563,30 @@ namespace BaskerNS
}
}
printf("];\n");*/
bool allocate_nd_workspace = (Options.blk_matching == 0 && Options.static_delayed_pivot == 0);
if (btf_tabs_offset != 0 && allocate_nd_workspace) {
// setup data-structure for ND
bool doSymbolic = true;
bool copy_BTFA = (Options.blk_matching == 0 || Options.static_delayed_pivot != 0);
bool alloc_BTFA = (Options.static_delayed_pivot != 0);
err = sfactor_copy2(doSymbolic, alloc_BTFA, copy_BTFA);
}

return 0;
#ifdef BASKER_TIMER
time = timer.seconds();
stats.time_sfactor += time;
std::cout << "Basker Symbolic total time: " << time
<< std::endl << std::endl;
std::cout.precision(old_precision);
std::cout.flags(old_settings);
#endif

if(Options.verbose == BASKER_TRUE)
{
printf(" == Basker Symbolic Done ==\n\n"); fflush(stdout);
}

return err;
} //end Symbolic()


Expand Down Expand Up @@ -1934,6 +1942,7 @@ namespace BaskerNS
// sfactor_copy2 is now only responsible for the copy from BTF_A to 2D blocks
Kokkos::Timer timer_sfactorcopy;
double sfactorcopy_time = 0.0;
bool doSymbolic_ND = (Options.blk_matching != 0 || Options.static_delayed_pivot != 0);
if (btf_tabs_offset != 0) {
bool flag = true;
#ifdef BASKER_KOKKOS
Expand All @@ -1947,24 +1956,27 @@ namespace BaskerNS
}*/

Kokkos::Timer nd_setup2_timer;
#ifdef BASKER_PARALLEL_INIT_WORKSPACE
kokkos_sfactor_init_workspace<Int,Entry,Exe_Space>
iWS(flag, this);
Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS);
Kokkos::fence();
#else
for (Int p = 0; p < num_threads; p++) {
this->t_init_workspace(flag, p);
// if sfactor_copy2 has been called in symbolic
// then all the blocks have been allocated and can initialize them in parallel-for
// if not, then use non-parallel for
if (doSymbolic_ND) {
for (Int p = 0; p < num_threads; p++) {
this->t_init_workspace(flag, p);
}
} else {
kokkos_sfactor_init_workspace<Int,Entry,Exe_Space>
iWS(flag, this);
Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS);
Kokkos::fence();
}
#endif
if(Options.verbose == BASKER_TRUE) {
std::cout<< " > Basker Factor: Time for workspace allocation after ND on a big block A: " << nd_setup2_timer.seconds() << std::endl;
}
#endif
}
bool copy_BTFA = (Options.blk_matching == 0 || Options.static_delayed_pivot != 0);
bool alloc_BTFA = (Options.static_delayed_pivot != 0);
err = sfactor_copy2(alloc_BTFA, copy_BTFA);
err = sfactor_copy2(doSymbolic_ND, alloc_BTFA, copy_BTFA);

if(Options.verbose == BASKER_TRUE) {
sfactorcopy_time += timer_sfactorcopy.seconds();
Expand All @@ -1973,6 +1985,7 @@ namespace BaskerNS
}
if(err == BASKER_ERROR)
{ return BASKER_ERROR; }
//BTF_A.print_matrix("AA.dat");

Kokkos::Timer timer_factornotoken;
double fnotoken_time = 0.0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ namespace BaskerNS
Int mnnz; //malloc nnz

INT_1DARRAY col_ptr;
INT_1DARRAY dig_ptr;
INT_1DARRAY col_idx; // NOTE: auxiliary for find_2D_convert
INT_1DARRAY row_idx;
ENTRY_1DARRAY val;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ namespace BaskerNS
if(v_fill == BASKER_TRUE)
{
FREE_INT_1DARRAY(col_ptr);
FREE_INT_1DARRAY(dig_ptr);
FREE_INT_1DARRAY(row_idx);
FREE_ENTRY_1DARRAY(val);
v_fill = BASKER_FALSE;
Expand Down Expand Up @@ -190,10 +191,12 @@ namespace BaskerNS
//printf( " init_col(n=%d)\n",ncol );
BASKER_ASSERT(ncol >= 0, "INIT_COL, ncol > 0");
MALLOC_INT_1DARRAY(col_ptr, ncol+1);
MALLOC_INT_1DARRAY(dig_ptr, ncol+1);
MALLOC_INT_1DARRAY(col_idx, ncol+1);
for(Int i = 0; i < ncol+1; ++i)
{
col_ptr(i) = (Int) BASKER_MAX_IDX;
dig_ptr(i) = (Int) BASKER_MAX_IDX;
col_idx(i) = (Int) BASKER_MAX_IDX;
}
}//end init_col()
Expand All @@ -206,6 +209,7 @@ namespace BaskerNS
for(Int i = 0; i < ncol+1; ++i)
{
col_ptr(i) = (Int) BASKER_MAX_IDX;
dig_ptr(i) = (Int) BASKER_MAX_IDX;
col_idx(i) = (Int) BASKER_MAX_IDX;
}
nnz = 0;
Expand All @@ -228,6 +232,7 @@ namespace BaskerNS
{
BASKER_ASSERT((ncol+1)>0, "matrix init_vector ncol");
MALLOC_INT_1DARRAY(col_ptr,ncol+1);
MALLOC_INT_1DARRAY(dig_ptr,ncol+1);
}
if(nnz > 0)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ namespace BaskerNS


//----------------Sep level upper tri-------------
for(Int l = 1; l < (lvl) && info == BASKER_SUCCESS; ++l)
for(Int l = 1; l < lvl && info == BASKER_SUCCESS; ++l)
{
for(Int k = 0; k < ncol; ++k)
{
Expand Down Expand Up @@ -352,10 +352,10 @@ namespace BaskerNS

// ------------------------------------------------------- //
// > factor the k-th column of the off-diagonal blocks
#ifdef BASKER_TIMER
timer_facoff.reset();
#endif
if (info == BASKER_SUCCESS) {
#ifdef BASKER_TIMER
timer_facoff.reset();
#endif
#ifdef BASKER_DEBUG_NFACTOR_COL2
printf(" calling lower offdiag factor, kid: %d k: %d \n",
kid, k); fflush(stdout);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,7 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2)
}
return info_scotch;
} else if(Options.verbose == BASKER_TRUE) {
printf( "\n part_scotch done (num_threads = %d,%lu)\n",num_threads,part_tree.leaf_nnz.extent(0) );
printf( "\n part_scotch done (num_threads = %d,%lu)\n",int(num_threads),part_tree.leaf_nnz.extent(0) );
//for (Int i = 0; i < num_threads; i++) printf( " nnz_leaf[%d] = %d\n",i,part_tree.leaf_nnz[i] ); printf( "\n" );
}
nd_flag = BASKER_TRUE;
Expand Down
Loading
Loading