diff --git a/DOC/html/EXAMPLE_2dcreate__matrix_8c.html b/DOC/html/EXAMPLE_2dcreate__matrix_8c.html deleted file mode 100644 index 7be1154c..00000000 --- a/DOC/html/EXAMPLE_2dcreate__matrix_8c.html +++ /dev/null @@ -1,274 +0,0 @@ - - -
- - - - -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read the matrix from data file. -More...
--Functions | |
int | dcreate_matrix_postfix (SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, char *postfix, gridinfo_t *grid) |
int | dcreate_matrix (SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, gridinfo_t *grid) |
Read the matrix from data file.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 2.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -March 15, 2003 -
int dcreate_matrix | -( | -SuperMatrix * | -A, | -
- | - | int | -nrhs, | -
- | - | double ** | -rhs, | -
- | - | int * | -ldb, | -
- | - | double ** | -x, | -
- | - | int * | -ldx, | -
- | - | FILE * | -fp, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
int dcreate_matrix_postfix | -( | -SuperMatrix * | -A, | -
- | - | int | -nrhs, | -
- | - | double ** | -rhs, | -
- | - | int * | -ldb, | -
- | - | double ** | -x, | -
- | - | int * | -ldx, | -
- | - | FILE * | -fp, | -
- | - | char * | -postfix, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format. -More...
--Functions | |
static void | ReadVector (FILE *, int_t, int_t *, int_t, int_t) |
static void | dReadValues (FILE *, int_t, double *, int_t, int_t) |
void | FormFullA (int_t, int_t *, double **, int_t **, int_t **) |
static int | DumpLine (FILE *) |
static int | ParseIntFormat (char *, int_t *, int_t *) |
static int | ParseFloatFormat (char *, int_t *, int_t *) |
void | dreadhb_dist (int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) |
Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
void dreadhb_dist | -( | -int | -iam, | -
- | - | FILE * | -fp, | -
- | - | int_t * | -nrow, | -
- | - | int_t * | -ncol, | -
- | - | int_t * | -nonz, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
-Purpose -======= - -Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format -as described below. - -Line 1 (A72,A8) - Col. 1 - 72 Title (TITLE) - Col. 73 - 80 Key (KEY) - -Line 2 (5I14) - Col. 1 - 14 Total number of lines excluding header (TOTCRD) - Col. 15 - 28 Number of lines for pointers (PTRCRD) - Col. 29 - 42 Number of lines for row (or variable) indices (INDCRD) - Col. 43 - 56 Number of lines for numerical values (VALCRD) - Col. 57 - 70 Number of lines for right-hand sides (RHSCRD) - (including starting guesses and solution vectors - if present) - (zero indicates no right-hand side data is present) - -Line 3 (A3, 11X, 4I14) - Col. 1 - 3 Matrix type (see below) (MXTYPE) - Col. 15 - 28 Number of rows (or variables) (NROW) - Col. 29 - 42 Number of columns (or elements) (NCOL) - Col. 43 - 56 Number of row (or variable) indices (NNZERO) - (equal to number of entries for assembled matrices) - Col. 57 - 70 Number of elemental matrix entries (NELTVL) - (zero in the case of assembled matrices) -Line 4 (2A16, 2A20) - Col. 1 - 16 Format for pointers (PTRFMT) - Col. 17 - 32 Format for row (or variable) indices (INDFMT) - Col. 33 - 52 Format for numerical values of coefficient matrix (VALFMT) - Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) - -Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present - Col. 1 Right-hand side type: - F for full storage or M for same format as matrix - Col. 2 G if a starting vector(s) (Guess) is supplied. (RHSTYP) - Col. 3 X if an exact solution vector(s) is supplied. - Col. 15 - 28 Number of right-hand sides (NRHS) - Col. 29 - 42 Number of row indices (NRHSIX) - (ignored in case of unassembled matrices) - -The three character type field on line 3 describes the matrix type. -The following table lists the permitted values for each of the three -characters. As an example of the type field, RSA denotes that the matrix -is real, symmetric, and assembled. - -First Character: - R Real matrix - C Complex matrix - P Pattern only (no numerical values supplied) - -Second Character: - S Symmetric - U Unsymmetric - H Hermitian - Z Skew symmetric - R Rectangular - -Third Character: - A Assembled - E Elemental matrices (unassembled) -
-
|
- -static | -
-
|
- -static | -
void FormFullA | -( | -int_t | -n, | -
- | - | int_t * | -nonz, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
-On input, nonz/nzval/rowind/colptr represents lower part of a symmetric -matrix. On exit, it represents the full matrix with lower and upper parts. -
-
|
- -static | -
-
|
- -static | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions | |
void | dreadtriple (FILE *fp, int_t *m, int_t *n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) |
void | dreadrhs (int m, double *b) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-void dreadrhs | -( | -int | -m, | -
- | - | double * | -b | -
- | ) | -- |
void dreadtriple | -( | -FILE * | -fp, | -
- | - | int_t * | -m, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nonz, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
brief
--Output parameters -================= - (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of - nonzeros in columns of matrix A; (*nzval)[*] the numerical values; - column i of A is given by (*nzval)[k], k = (*rowind)[i],..., - (*rowind)[i+1]-1. -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions | |
void | pzgsmv_init (SuperMatrix *A, int_t *row_to_proc, gridinfo_t *grid, pzgsmv_comm_t *gsmv_comm) |
void | pzgsmv (int_t abs, SuperMatrix *A_internal, gridinfo_t *grid, pzgsmv_comm_t *gsmv_comm, doublecomplex x[], doublecomplex ax[]) |
void | pzgsmv_finalize (pzgsmv_comm_t *gsmv_comm) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 2.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -March 15, 2003 -
void pzgsmv | -( | -int_t | -abs, | -
- | - | SuperMatrix * | -A_internal, | -
- | - | gridinfo_t * | -grid, | -
- | - | pzgsmv_comm_t * | -gsmv_comm, | -
- | - | doublecomplex | -x[], | -
- | - | doublecomplex | -ax[] | -
- | ) | -- |
void pzgsmv_finalize | -( | -pzgsmv_comm_t * | -gsmv_comm | ) | -- |
void pzgsmv_init | -( | -SuperMatrix * | -A, | -
- | - | int_t * | -row_to_proc, | -
- | - | gridinfo_t * | -grid, | -
- | - | pzgsmv_comm_t * | -gsmv_comm | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read the matrix from data file. -More...
--Functions | |
int | zcreate_matrix_postfix (SuperMatrix *A, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, char *postfix, gridinfo_t *grid) |
int | zcreate_matrix (SuperMatrix *A, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, gridinfo_t *grid) |
Read the matrix from data file.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 2.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -March 15, 2003 -
int zcreate_matrix | -( | -SuperMatrix * | -A, | -
- | - | int | -nrhs, | -
- | - | doublecomplex ** | -rhs, | -
- | - | int * | -ldb, | -
- | - | doublecomplex ** | -x, | -
- | - | int * | -ldx, | -
- | - | FILE * | -fp, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
int zcreate_matrix_postfix | -( | -SuperMatrix * | -A, | -
- | - | int | -nrhs, | -
- | - | doublecomplex ** | -rhs, | -
- | - | int * | -ldb, | -
- | - | doublecomplex ** | -x, | -
- | - | int * | -ldx, | -
- | - | FILE * | -fp, | -
- | - | char * | -postfix, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format. -More...
--Functions | |
static void | ReadVector (FILE *, int_t, int_t *, int_t, int_t) |
static void | zReadValues (FILE *, int_t, doublecomplex *, int_t, int_t) |
static int | DumpLine (FILE *) |
static int | ParseIntFormat (char *, int_t *, int_t *) |
static int | ParseFloatFormat (char *, int_t *, int_t *) |
void | zreadhb_dist (int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) |
Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
void zreadhb_dist | -( | -int | -iam, | -
- | - | FILE * | -fp, | -
- | - | int_t * | -nrow, | -
- | - | int_t * | -ncol, | -
- | - | int_t * | -nonz, | -
- | - | doublecomplex ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
-Purpose -======= - -Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format -as described below. - -Line 1 (A72,A8) - Col. 1 - 72 Title (TITLE) - Col. 73 - 80 Key (KEY) - -Line 2 (5I14) - Col. 1 - 14 Total number of lines excluding header (TOTCRD) - Col. 15 - 28 Number of lines for pointers (PTRCRD) - Col. 29 - 42 Number of lines for row (or variable) indices (INDCRD) - Col. 43 - 56 Number of lines for numerical values (VALCRD) - Col. 57 - 70 Number of lines for right-hand sides (RHSCRD) - (including starting guesses and solution vectors - if present) - (zero indicates no right-hand side data is present) - -Line 3 (A3, 11X, 4I14) - Col. 1 - 3 Matrix type (see below) (MXTYPE) - Col. 15 - 28 Number of rows (or variables) (NROW) - Col. 29 - 42 Number of columns (or elements) (NCOL) - Col. 43 - 56 Number of row (or variable) indices (NNZERO) - (equal to number of entries for assembled matrices) - Col. 57 - 70 Number of elemental matrix entries (NELTVL) - (zero in the case of assembled matrices) -Line 4 (2A16, 2A20) - Col. 1 - 16 Format for pointers (PTRFMT) - Col. 17 - 32 Format for row (or variable) indices (INDFMT) - Col. 33 - 52 Format for numerical values of coefficient matrix (VALFMT) - Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) - -Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present - Col. 1 Right-hand side type: - F for full storage or M for same format as matrix - Col. 2 G if a starting vector(s) (Guess) is supplied. (RHSTYP) - Col. 3 X if an exact solution vector(s) is supplied. - Col. 15 - 28 Number of right-hand sides (NRHS) - Col. 29 - 42 Number of row indices (NRHSIX) - (ignored in case of unassembled matrices) - -The three character type field on line 3 describes the matrix type. -The following table lists the permitted values for each of the three -characters. As an example of the type field, RSA denotes that the matrix -is real, symmetric, and assembled. - -First Character: - R Real matrix - C Complex matrix - P Pattern only (no numerical values supplied) - -Second Character: - S Symmetric - U Unsymmetric - H Hermitian - Z Skew symmetric - R Rectangular - -Third Character: - A Assembled - E Elemental matrices (unassembled) -
-
|
- -static | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions | |
void | zreadtriple (FILE *fp, int_t *m, int_t *n, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) |
void | zreadrhs (int m, doublecomplex *b) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-void zreadrhs | -( | -int | -m, | -
- | - | doublecomplex * | -b | -
- | ) | -- |
void zreadtriple | -( | -FILE * | -fp, | -
- | - | int_t * | -m, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nonz, | -
- | - | doublecomplex ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
brief
--Output parameters -================= - (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of - nonzeros in columns of matrix A; (*nzval)[*] the numerical values; - column i of A is given by (*nzval)[k], k = (*rowind)[i],..., - (*rowind)[i+1]-1. -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Chooses machine-dependent parameters for the local environment. -More...
--Functions | |
int | sp_ienv_dist (int ispec, superlu_dist_options_t *options) |
Chooses machine-dependent parameters for the local environment.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-int sp_ienv_dist | -( | -int | -ispec, | -
- | - | superlu_dist_options_t * | -options | -
- | ) | -- |
sp_ienv_dist() is inquired to choose machine-dependent integer parameters for the local environment. See ISPEC for a description of the parameters.
-
This version provides a set of parameters which should give good,
- but not optimal, performance on many of the currently available
- computers. Users are encouraged to set the environment variable to change the tuning parameters for their particular machines.
ISPEC (input) int Specifies the parameter to be returned as the value of SP_IENV_DIST.
- = 1: the panel size w; a panel consists of w consecutive columns of matrix A in the process of Gaussian elimination. The best value depends on machine's cache characters. = 2: the relaxation parameter relax; if the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the their row structures. = 3: the maximum size for a supernode, which must be greater than or equal to relaxation parameter (see case 2); = 4: the minimum row dimension for 2-D blocking to be used; = 5: the minimum column dimension for 2-D blocking to be used; = 6: the estimated fills factor for the adjacency structures of L and U, compared with A; = 7: the minimum value of the product M*N*K for a GEMM call worth being offloaded to accelerator (e.g., GPU, Xeon Phi). = 8: the maximum buffer size on GPU that can hold the "dC" matrix in the GEMM call for the Schur complement update. If this is too small, the Schur complement update will be done in multiple partitions, may be slower. = 9: number of GPU streams = 10: whether to offload work to GPU or not
options (input) superlu_dist_options_t* The structure defines the input parameters to control how the LU decomposition the solves are performed.
-(SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC
- < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value.
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions | |
LUgpu_Handle | createLUgpuHandle (int_t nsupers, int_t ldt_, dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SCT_t *SCT_, superlu_dist_options_t *options_, SuperLUStat_t *stat, double thresh_, int *info_) |
void | destroyLUgpuHandle (LUgpu_Handle LuH) |
int | dgatherFactoredLU3Dto2D (LUgpu_Handle LuH) |
int | copyLUGPU2Host (LUgpu_Handle LuH, dLUstruct_t *LUstruct) |
int | pdgstrf3d_LUpackedInterface (LUgpu_Handle LUHand) |
int copyLUGPU2Host | -( | -LUgpu_Handle | -LuH, | -
- | - | dLUstruct_t * | -LUstruct | -
- | ) | -- |
LUgpu_Handle createLUgpuHandle | -( | -int_t | -nsupers, | -
- | - | int_t | -ldt_, | -
- | - | dtrf3Dpartition_t * | -trf3Dpartition, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo3d_t * | -grid3d, | -
- | - | SCT_t * | -SCT_, | -
- | - | superlu_dist_options_t * | -options_, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | double | -thresh_, | -
- | - | int * | -info_ | -
- | ) | -- |
void destroyLUgpuHandle | -( | -LUgpu_Handle | -LuH | ) | -- |
int dgatherFactoredLU3Dto2D | -( | -LUgpu_Handle | -LuH | ) | -- |
int pdgstrf3d_LUpackedInterface | -( | -LUgpu_Handle | -LUHand | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format. -More...
--Functions | |
static void | ReadVector (FILE *, int_t, int_t *, int_t, int_t) |
static void | dReadValues (FILE *, int_t, double *, int_t, int_t) |
static void | FormFullA (int_t, int_t *, double **, int_t **, int_t **) |
static int | DumpLine (FILE *) |
static int | ParseIntFormat (char *, int_t *, int_t *) |
static int | ParseFloatFormat (char *, int_t *, int_t *) |
void | dreadhb_dist (int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) |
Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
void dreadhb_dist | -( | -int | -iam, | -
- | - | FILE * | -fp, | -
- | - | int_t * | -nrow, | -
- | - | int_t * | -ncol, | -
- | - | int_t * | -nonz, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
-Purpose -======= - -Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format -as described below. - -Line 1 (A72,A8) - Col. 1 - 72 Title (TITLE) - Col. 73 - 80 Key (KEY) - -Line 2 (5I14) - Col. 1 - 14 Total number of lines excluding header (TOTCRD) - Col. 15 - 28 Number of lines for pointers (PTRCRD) - Col. 29 - 42 Number of lines for row (or variable) indices (INDCRD) - Col. 43 - 56 Number of lines for numerical values (VALCRD) - Col. 57 - 70 Number of lines for right-hand sides (RHSCRD) - (including starting guesses and solution vectors - if present) - (zero indicates no right-hand side data is present) - -Line 3 (A3, 11X, 4I14) - Col. 1 - 3 Matrix type (see below) (MXTYPE) - Col. 15 - 28 Number of rows (or variables) (NROW) - Col. 29 - 42 Number of columns (or elements) (NCOL) - Col. 43 - 56 Number of row (or variable) indices (NNZERO) - (equal to number of entries for assembled matrices) - Col. 57 - 70 Number of elemental matrix entries (NELTVL) - (zero in the case of assembled matrices) -Line 4 (2A16, 2A20) - Col. 1 - 16 Format for pointers (PTRFMT) - Col. 17 - 32 Format for row (or variable) indices (INDFMT) - Col. 33 - 52 Format for numerical values of coefficient matrix (VALFMT) - Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) - -Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present - Col. 1 Right-hand side type: - F for full storage or M for same format as matrix - Col. 2 G if a starting vector(s) (Guess) is supplied. (RHSTYP) - Col. 3 X if an exact solution vector(s) is supplied. - Col. 15 - 28 Number of right-hand sides (NRHS) - Col. 29 - 42 Number of row indices (NRHSIX) - (ignored in case of unassembled matrices) - -The three character type field on line 3 describes the matrix type. -The following table lists the permitted values for each of the three -characters. As an example of the type field, RSA denotes that the matrix -is real, symmetric, and assembled. - -First Character: - R Real matrix - C Complex matrix - P Pattern only (no numerical values supplied) - -Second Character: - S Symmetric - U Unsymmetric - H Hermitian - Z Skew symmetric - R Rectangular - -Third Character: - A Assembled - E Elemental matrices (unassembled) -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
-On input, nonz/nzval/rowind/colptr represents lower part of a symmetric -matrix. On exit, it represents the full matrix with lower and upper parts. -
-
|
- -static | -
-
|
- -static | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions | |
void | dreadtriple_dist (FILE *fp, int_t *m, int_t *n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) |
void | dreadrhs (int m, double *b) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-void dreadrhs | -( | -int | -m, | -
- | - | double * | -b | -
- | ) | -- |
void dreadtriple_dist | -( | -FILE * | -fp, | -
- | - | int_t * | -m, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nonz, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
brief
--Output parameters -================= - (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of - nonzeros in columns of matrix A; (*nzval)[*] the numerical values; - column i of A is given by (*nzval)[k], k = (*rowind)[i],..., - (*rowind)[i+1]-1. -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Parallel sparse matrix-vector multiplication. -More...
--Functions | |
void | pzgsmv_init (SuperMatrix *A, int_t *row_to_proc, gridinfo_t *grid, pzgsmv_comm_t *gsmv_comm) |
void | pzgsmv (int_t abs, SuperMatrix *A_internal, gridinfo_t *grid, pzgsmv_comm_t *gsmv_comm, doublecomplex x[], doublecomplex ax[]) |
void | pzgsmv_finalize (pzgsmv_comm_t *gsmv_comm) |
Parallel sparse matrix-vector multiplication.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 2.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -March 15, 2003 -
void pzgsmv | -( | -int_t | -abs, | -
- | - | SuperMatrix * | -A_internal, | -
- | - | gridinfo_t * | -grid, | -
- | - | pzgsmv_comm_t * | -gsmv_comm, | -
- | - | doublecomplex | -x[], | -
- | - | doublecomplex | -ax[] | -
- | ) | -- |
void pzgsmv_finalize | -( | -pzgsmv_comm_t * | -gsmv_comm | ) | -- |
void pzgsmv_init | -( | -SuperMatrix * | -A, | -
- | - | int_t * | -row_to_proc, | -
- | - | gridinfo_t * | -grid, | -
- | - | pzgsmv_comm_t * | -gsmv_comm | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Chooses machine-dependent parameters for the local environment. -More...
--Functions | |
int | sp_ienv_dist (int ispec, superlu_dist_options_t *options) |
Chooses machine-dependent parameters for the local environment.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-int sp_ienv_dist | -( | -int | -ispec, | -
- | - | superlu_dist_options_t * | -options | -
- | ) | -- |
sp_ienv_dist() is inquired to choose machine-dependent integer parameters for the local environment. See ISPEC for a description of the parameters.
-
This version provides a set of parameters which should give good,
- but not optimal, performance on many of the currently available
- computers. Users are encouraged to set the environment variable to change the tuning parameters for their particular machines.
ISPEC (input) int Specifies the parameter to be returned as the value of SP_IENV_DIST.
- = 1: the panel size w; a panel consists of w consecutive columns of matrix A in the process of Gaussian elimination. The best value depends on machine's cache characters. = 2: the relaxation parameter relax; if the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the their row structures. = 3: the maximum size for a supernode, which must be greater than or equal to relaxation parameter (see case 2); = 4: the minimum row dimension for 2-D blocking to be used; = 5: the minimum column dimension for 2-D blocking to be used; = 6: the estimated fills factor for the adjacency structures of L and U, compared with A; = 7: the minimum value of the product M*N*K for a GEMM call worth being offloaded to accelerator (e.g., GPU, Xeon Phi). = 8: the maximum buffer size on GPU that can hold the "dC" matrix in the GEMM call for the Schur complement update. If this is too small, the Schur complement update will be done in multiple partitions, may be slower. = 9: number of GPU streams = 10: whether to offload work to GPU or not
options (input) superlu_dist_options_t* The structure defines the input parameters to control how the LU decomposition the solves are performed.
-(SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC
- < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value.
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include <assert.h>
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format. -More...
--Functions | |
static void | ReadVector (FILE *, int_t, int_t *, int_t, int_t) |
static void | zReadValues (FILE *, int_t, doublecomplex *, int_t, int_t) |
static int | DumpLine (FILE *) |
static int | ParseIntFormat (char *, int_t *, int_t *) |
static int | ParseFloatFormat (char *, int_t *, int_t *) |
void | zreadhb_dist (int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) |
Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
void zreadhb_dist | -( | -int | -iam, | -
- | - | FILE * | -fp, | -
- | - | int_t * | -nrow, | -
- | - | int_t * | -ncol, | -
- | - | int_t * | -nonz, | -
- | - | doublecomplex ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
-Purpose -======= - -Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format -as described below. - -Line 1 (A72,A8) - Col. 1 - 72 Title (TITLE) - Col. 73 - 80 Key (KEY) - -Line 2 (5I14) - Col. 1 - 14 Total number of lines excluding header (TOTCRD) - Col. 15 - 28 Number of lines for pointers (PTRCRD) - Col. 29 - 42 Number of lines for row (or variable) indices (INDCRD) - Col. 43 - 56 Number of lines for numerical values (VALCRD) - Col. 57 - 70 Number of lines for right-hand sides (RHSCRD) - (including starting guesses and solution vectors - if present) - (zero indicates no right-hand side data is present) - -Line 3 (A3, 11X, 4I14) - Col. 1 - 3 Matrix type (see below) (MXTYPE) - Col. 15 - 28 Number of rows (or variables) (NROW) - Col. 29 - 42 Number of columns (or elements) (NCOL) - Col. 43 - 56 Number of row (or variable) indices (NNZERO) - (equal to number of entries for assembled matrices) - Col. 57 - 70 Number of elemental matrix entries (NELTVL) - (zero in the case of assembled matrices) -Line 4 (2A16, 2A20) - Col. 1 - 16 Format for pointers (PTRFMT) - Col. 17 - 32 Format for row (or variable) indices (INDFMT) - Col. 33 - 52 Format for numerical values of coefficient matrix (VALFMT) - Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) - -Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present - Col. 1 Right-hand side type: - F for full storage or M for same format as matrix - Col. 2 G if a starting vector(s) (Guess) is supplied. (RHSTYP) - Col. 3 X if an exact solution vector(s) is supplied. - Col. 15 - 28 Number of right-hand sides (NRHS) - Col. 29 - 42 Number of row indices (NRHSIX) - (ignored in case of unassembled matrices) - -The three character type field on line 3 describes the matrix type. -The following table lists the permitted values for each of the three -characters. As an example of the type field, RSA denotes that the matrix -is real, symmetric, and assembled. - -First Character: - R Real matrix - C Complex matrix - P Pattern only (no numerical values supplied) - -Second Character: - S Symmetric - U Unsymmetric - H Hermitian - Z Skew symmetric - R Rectangular - -Third Character: - A Assembled - E Elemental matrices (unassembled) --
-
|
- -static | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions | |
void | zreadtriple_dist (FILE *fp, int_t *m, int_t *n, int_t *nonz, doublecomplex **nzval, int_t **rowind, int_t **colptr) |
void | zreadrhs (int m, doublecomplex *b) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-void zreadrhs | -( | -int | -m, | -
- | - | doublecomplex * | -b | -
- | ) | -- |
void zreadtriple_dist | -( | -FILE * | -fp, | -
- | - | int_t * | -m, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nonz, | -
- | - | doublecomplex ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
brief
--Output parameters -================= - (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of - nonzeros in columns of matrix A; (*nzval)[*] the numerical values; - column i of A is given by (*nzval)[k], k = (*rowind)[i],..., - (*rowind)[i+1]-1. -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read the matrix from data file. -More...
--Functions | |
int | dcreate_matrix (SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, gridinfo_t *grid) |
Read the matrix from data file.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 2.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -March 15, 2003 -
int dcreate_matrix | -( | -SuperMatrix * | -A, | -
- | - | int | -nrhs, | -
- | - | double ** | -rhs, | -
- | - | int * | -ldb, | -
- | - | double ** | -x, | -
- | - | int * | -ldx, | -
- | - | FILE * | -fp, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read the matrix from data file. -More...
--Functions | |
int | zcreate_matrix (SuperMatrix *A, int nrhs, doublecomplex **rhs, int *ldb, doublecomplex **x, int *ldx, FILE *fp, gridinfo_t *grid) |
Read the matrix from data file.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 2.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -March 15, 2003 -
int zcreate_matrix | -( | -SuperMatrix * | -A, | -
- | - | int | -nrhs, | -
- | - | doublecomplex ** | -rhs, | -
- | - | int * | -ldb, | -
- | - | doublecomplex ** | -x, | -
- | - | int * | -ldx, | -
- | - | FILE * | -fp, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Namespaces | |
SuperLU_ASYNCOMM | |
-Functions | |
BcTree | SuperLU_ASYNCOMM::BcTree_Create (MPI_Comm comm, Int *ranks, Int rank_cnt, Int msgSize, double rseed, char precision) |
void | SuperLU_ASYNCOMM::BcTree_Destroy (BcTree Tree, char precision) |
void | SuperLU_ASYNCOMM::BcTree_SetTag (BcTree Tree, Int tag, char precision) |
yes_no_t | SuperLU_ASYNCOMM::BcTree_IsRoot (BcTree Tree, char precision) |
void | SuperLU_ASYNCOMM::BcTree_forwardMessageSimple (BcTree Tree, void *localBuffer, Int msgSize, char precision) |
void | SuperLU_ASYNCOMM::BcTree_waitSendRequest (BcTree Tree, char precision) |
void | SuperLU_ASYNCOMM::BcTree_allocateRequest (BcTree Tree, char precision) |
int | SuperLU_ASYNCOMM::BcTree_getDestCount (BcTree Tree, char precision) |
int | SuperLU_ASYNCOMM::BcTree_GetMsgSize (BcTree Tree, char precision) |
StdList | SuperLU_ASYNCOMM::StdList_Init () |
void | SuperLU_ASYNCOMM::StdList_Pushback (StdList lst, int_t dat) |
void | SuperLU_ASYNCOMM::StdList_Pushfront (StdList lst, int_t dat) |
int_t | SuperLU_ASYNCOMM::StdList_Popfront (StdList lst) |
yes_no_t | SuperLU_ASYNCOMM::StdList_Find (StdList lst, int_t dat) |
int_t | SuperLU_ASYNCOMM::StdList_Size (StdList lst) |
yes_no_t | SuperLU_ASYNCOMM::StdList_Empty (StdList lst) |
RdTree | SuperLU_ASYNCOMM::RdTree_Create (MPI_Comm comm, Int *ranks, Int rank_cnt, Int msgSize, double rseed, char precision) |
void | SuperLU_ASYNCOMM::RdTree_Destroy (RdTree Tree, char precision) |
void | SuperLU_ASYNCOMM::RdTree_SetTag (RdTree Tree, Int tag, char precision) |
int | SuperLU_ASYNCOMM::RdTree_GetDestCount (RdTree Tree, char precision) |
int | SuperLU_ASYNCOMM::RdTree_GetMsgSize (RdTree Tree, char precision) |
yes_no_t | SuperLU_ASYNCOMM::RdTree_IsRoot (RdTree Tree, char precision) |
void | SuperLU_ASYNCOMM::RdTree_forwardMessageSimple (RdTree Tree, void *localBuffer, Int msgSize, char precision) |
void | SuperLU_ASYNCOMM::RdTree_allocateRequest (RdTree Tree, char precision) |
void | SuperLU_ASYNCOMM::RdTree_waitSendRequest (RdTree Tree, char precision) |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Macros | |
#define | CLAMP(x, low, high) (((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x))) |
#define | MAX_DIM 12800 |
#define | MAX_IN_DIM 256 |
#define | LOG_2_MAX_IN_DIM 8 |
#define | LOG_2_MAX_DIM 7 |
-Functions | |
static double | load_imb (double *A, int nthreads) |
double | get_acc_async_cost () |
static unsigned int | next_power_2 (unsigned int v) |
static unsigned int | previous_power_2 (unsigned int v) |
static uint32_t | my_log2 (const uint32_t x) |
static unsigned int | nearst_2_100 (unsigned int v) |
static unsigned int | nearst_k (unsigned int v) |
double | estimate_acc_time (int m, int n, int k) |
double | estimate_acc_gemm_time (int m, int n, int k) |
double | estimate_acc_scatter_time (int m, int n, int k) |
double | estimate_cpu_time (int m, int n, int k) |
double | acc_data_send_time (size_t sz) |
void | LookUpTableInit (int my_rank) |
double | estimate_acc_scatter_time_strat1 (Ublock_info_t *Ublock_info, int_t nub, Remain_info_t *Lblock_info, int_t nlb) |
int_t | fixed_cpu_acc_partition (Ublock_info_t *Ublock_info_Phi, int_t num_u_blks_Phi, int_t Rnbrow, int_t ldu_Phi) |
int_t | tuned_partition (int_t num_u_blks_Phi, Ublock_info_t *Ublock_info_Phi, Remain_info_t *Remain_info, int_t RemainBlk, double cpu_time_0, int_t Rnbrow, int_t ldu_Phi) |
-Variables | |
double | cpu_bandwidth |
int | communication_overlap |
double | acc_async_cost |
int_t | fixed_partition |
double | frac |
double | CpuDgemmLookUp [8][8][9] |
double | PhiDgemmLookUp [8][8][9] |
double | PhiBWLookUp [8] |
double | MicPciBandwidth [18] |
double | MicScatterBW [24][24] |
double | l_count [24] |
double | u_count [24] |
#define CLAMP | -( | -- | x, | -
- | - | - | low, | -
- | - | - | high | -
- | ) | -(((x) > (high)) ? (high) : (((x) < (low)) ? (low) : (x))) | -
#define LOG_2_MAX_DIM 7 | -
#define LOG_2_MAX_IN_DIM 8 | -
#define MAX_DIM 12800 | -
#define MAX_IN_DIM 256 | -
double acc_data_send_time | -( | -size_t | -sz | ) | -- |
double estimate_acc_gemm_time | -( | -int | -m, | -
- | - | int | -n, | -
- | - | int | -k | -
- | ) | -- |
double estimate_acc_scatter_time | -( | -int | -m, | -
- | - | int | -n, | -
- | - | int | -k | -
- | ) | -- |
double estimate_acc_scatter_time_strat1 | -( | -Ublock_info_t * | -Ublock_info, | -
- | - | int_t | -nub, | -
- | - | Remain_info_t * | -Lblock_info, | -
- | - | int_t | -nlb | -
- | ) | -- |
double estimate_acc_time | -( | -int | -m, | -
- | - | int | -n, | -
- | - | int | -k | -
- | ) | -- |
double estimate_cpu_time | -( | -int | -m, | -
- | - | int | -n, | -
- | - | int | -k | -
- | ) | -- |
int_t fixed_cpu_acc_partition | -( | -Ublock_info_t * | -Ublock_info_Phi, | -
- | - | int_t | -num_u_blks_Phi, | -
- | - | int_t | -Rnbrow, | -
- | - | int_t | -ldu_Phi | -
- | ) | -- |
double get_acc_async_cost | -( | -) | -- |
-
|
- -inlinestatic | -
void LookUpTableInit | -( | -int | -my_rank | ) | -- |
-
|
- -inlinestatic | -
-
|
- -inlinestatic | -
-
|
- -inlinestatic | -
-
|
- -inlinestatic | -
-
|
- -inlinestatic | -
int_t tuned_partition | -( | -int_t | -num_u_blks_Phi, | -
- | - | Ublock_info_t * | -Ublock_info_Phi, | -
- | - | Remain_info_t * | -Remain_info, | -
- | - | int_t | -RemainBlk, | -
- | - | double | -cpu_time_0, | -
- | - | int_t | -Rnbrow, | -
- | - | int_t | -ldu_Phi | -
- | ) | -- |
double acc_async_cost | -
int communication_overlap | -
double cpu_bandwidth | -
double CpuDgemmLookUp[8][8][9] | -
int_t fixed_partition | -
double frac | -
double l_count[24] | -
double MicPciBandwidth[18] | -
double MicScatterBW[24][24] | -
double PhiBWLookUp[8] | -
double PhiDgemmLookUp[8][8][9] | -
double u_count[24] | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Go to the source code of this file.
--Classes | |
struct | mdwin_t |
-Typedefs | |
typedef struct mdwin_t | mdwin_t |
-Functions | |
int_t | get_max_buffer_size () |
double | get_acc_async_cost () |
double | estimate_acc_time (int m, int n, int k) |
double | estimate_acc_gemm_time (int m, int n, int k) |
double | estimate_acc_scatter_time (int m, int n, int k) |
double | estimate_cpu_time (int m, int n, int k) |
double | acc_data_send_time (size_t sz) |
void | LookUpTableInit (int my_rank) |
int_t | fixed_cpu_acc_partition (Ublock_info_t *Ublock_info_Phi, int_t num_u_blks_Phi, int_t Rnbrow, int_t ldu_Phi) |
int_t | tuned_partition (int_t num_u_blks_Phi, Ublock_info_t *Ublock_info_Phi, Remain_info_t *Remain_info, int_t RemainBlk, double cpu_time_0, int_t Rnbrow, int_t ldu_Phi) |
double acc_data_send_time | -( | -size_t | -sz | ) | -- |
double estimate_acc_gemm_time | -( | -int | -m, | -
- | - | int | -n, | -
- | - | int | -k | -
- | ) | -- |
double estimate_acc_scatter_time | -( | -int | -m, | -
- | - | int | -n, | -
- | - | int | -k | -
- | ) | -- |
double estimate_acc_time | -( | -int | -m, | -
- | - | int | -n, | -
- | - | int | -k | -
- | ) | -- |
double estimate_cpu_time | -( | -int | -m, | -
- | - | int | -n, | -
- | - | int | -k | -
- | ) | -- |
int_t fixed_cpu_acc_partition | -( | -Ublock_info_t * | -Ublock_info_Phi, | -
- | - | int_t | -num_u_blks_Phi, | -
- | - | int_t | -Rnbrow, | -
- | - | int_t | -ldu_Phi | -
- | ) | -- |
double get_acc_async_cost | -( | -) | -- |
int_t get_max_buffer_size | -( | -) | -- |
void LookUpTableInit | -( | -int | -my_rank | ) | -- |
int_t tuned_partition | -( | -int_t | -num_u_blks_Phi, | -
- | - | Ublock_info_t * | -Ublock_info_Phi, | -
- | - | Remain_info_t * | -Remain_info, | -
- | - | int_t | -RemainBlk, | -
- | - | double | -cpu_time_0, | -
- | - | int_t | -Rnbrow, | -
- | - | int_t | -ldu_Phi | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include <vector>
#include <iostream>
#include "superlu_ddefs.h"
#include "lu_common.hpp"
#include "lupanels_GPU.cuh"
#include "commWrapper.hpp"
Go to the source code of this file.
--Classes | |
struct | anc25d_t |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read the matrix from data file, then distribute it in a distributed CSR format. -More...
--Functions | |
int | c2f_dcreate_matrix_x_b (char *fname, int nrhs, int nprocs, MPI_Comm slucomm, SuperMatrix *A, int *m_g, int *n_g, int_t *nnz_g, double *rhs, int *ldb, double *x, int *ldx) |
Read the matrix from data file, then distribute it in a distributed CSR format.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 7.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -March 15, 2003 -Last update: December 31, 2020 -
int c2f_dcreate_matrix_x_b | -( | -char * | -fname, | -
- | - | int | -nrhs, | -
- | - | int | -nprocs, | -
- | - | MPI_Comm | -slucomm, | -
- | - | SuperMatrix * | -A, | -
- | - | int * | -m_g, | -
- | - | int * | -n_g, | -
- | - | int_t * | -nnz_g, | -
- | - | double * | -rhs, | -
- | - | int * | -ldb, | -
- | - | double * | -x, | -
- | - | int * | -ldx | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read the matrix from data file, then distribute it in a distributed CSR format. -More...
--Functions | |
int | c2f_zcreate_matrix_x_b (char *fname, int nrhs, int nprocs, MPI_Comm slucomm, SuperMatrix *A, int *m_g, int *n_g, int_t *nnz_g, doublecomplex *rhs, int *ldb, doublecomplex *x, int *ldx) |
Read the matrix from data file, then distribute it in a distributed CSR format.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 7.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -March 15, 2003 -Last update: December 31, 2020 -
int c2f_zcreate_matrix_x_b | -( | -char * | -fname, | -
- | - | int | -nrhs, | -
- | - | int | -nprocs, | -
- | - | MPI_Comm | -slucomm, | -
- | - | SuperMatrix * | -A, | -
- | - | int * | -m_g, | -
- | - | int * | -n_g, | -
- | - | int_t * | -nnz_g, | -
- | - | doublecomplex * | -rhs, | -
- | - | int * | -ldb, | -
- | - | doublecomplex * | -x, | -
- | - | int * | -ldx | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
-Classes | |
struct | factors_dist_t |
-Macros | |
#define | HANDLE_SIZE 8 |
-Functions | |
int | c_fortran_pdgssvx_ABglobal_ (int *iopt, int_t *n, int_t *nnz, int *nrhs, double *values, int_t *rowind, int_t *colptr, double *b, int *ldb, int grid_handle[HANDLE_SIZE], double *berr, int factors[HANDLE_SIZE], int *info) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-#define HANDLE_SIZE 8 | -
int c_fortran_pdgssvx_ABglobal_ | -( | -int * | -iopt, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nnz, | -
- | - | int * | -nrhs, | -
- | - | double * | -values, | -
- | - | int_t * | -rowind, | -
- | - | int_t * | -colptr, | -
- | - | double * | -b, | -
- | - | int * | -ldb, | -
- | - | int | -grid_handle[HANDLE_SIZE], | -
- | - | double * | -berr, | -
- | - | int | -factors[HANDLE_SIZE], | -
- | - | int * | -info | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
-Macros | |
#define | HANDLE_SIZE 8 |
-Functions | |
void | c_fortran_slugrid_ (int *iopt, MPI_Comm *slu_comm, int *nprow, int *npcol, int grid_handle[HANDLE_SIZE]) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-#define HANDLE_SIZE 8 | -
void c_fortran_slugrid_ | -( | -int * | -iopt, | -
- | - | MPI_Comm * | -slu_comm, | -
- | - | int * | -nprow, | -
- | - | int * | -npcol, | -
- | - | int | -grid_handle[HANDLE_SIZE] | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
This is the complete list of members for bcastStruct, including all inherited members.
-bcastAlgm | bcastStruct | |
bcastStatus | bcastStruct | |
bcastStruct(MPI_Comm comm_, MPI_Datatype dtype_, collAlg algm) | bcastStruct | |
bcastStruct() | bcastStruct | inline |
buffer | bcastStruct | private |
comm | bcastStruct | |
commSize | bcastStruct | private |
count | bcastStruct | private |
dtype | bcastStruct | |
init(void *buffer, int root, int count) | bcastStruct | |
isFinished() | bcastStruct | |
myRank | bcastStruct | private |
request | bcastStruct | |
root | bcastStruct | private |
status | bcastStruct | |
test() | bcastStruct | |
wait() | bcastStruct |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include <commWrapper.hpp>
-Public Member Functions | |
bcastStruct (MPI_Comm comm_, MPI_Datatype dtype_, collAlg algm) | |
bcastStruct () | |
int | init (void *buffer, int root, int count) |
int | test () |
int | wait () |
bool | isFinished () |
-Public Attributes | |
MPI_Request | request |
MPI_Status | status |
MPI_Comm | comm |
MPI_Datatype | dtype |
collAlg | bcastAlgm |
collCommStatus | bcastStatus |
-Private Attributes | |
void * | buffer |
int | root |
int | count |
int | commSize |
int | myRank |
bcastStruct::bcastStruct | -( | -MPI_Comm | -comm_, | -
- | - | MPI_Datatype | -dtype_, | -
- | - | collAlg | -algm | -
- | ) | -- |
-
|
- -inline | -
int bcastStruct::init | -( | -void * | -buffer, | -
- | - | int | -root, | -
- | - | int | -count | -
- | ) | -- |
bool bcastStruct::isFinished | -( | -) | -- |
int bcastStruct::test | -( | -) | -- |
int bcastStruct::wait | -( | -) | -- |
collAlg bcastStruct::bcastAlgm | -
collCommStatus bcastStruct::bcastStatus | -
-
|
- -private | -
MPI_Comm bcastStruct::comm | -
-
|
- -private | -
-
|
- -private | -
MPI_Datatype bcastStruct::dtype | -
-
|
- -private | -
MPI_Request bcastStruct::request | -
-
|
- -private | -
MPI_Status bcastStruct::status | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
This is the complete list of members for lpanel_t, including all inherited members.
-blkPtr(int_t k) | lpanel_t | inline |
blkPtrGPU(int k) | lpanel_t | inline |
blkPtrOffset(int_t k) | lpanel_t | inline |
checkGPU() | lpanel_t | |
copyBackToGPU() | lpanel_t | |
copyFromGPU() | lpanel_t | |
copyToGPU() | lpanel_t | |
copyToGPU(void *basePtr) | lpanel_t | |
diagFactor(int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info) | lpanel_t | |
diagFactorCuSolver(int_t k, cusolverDnHandle_t cusolverH, cudaStream_t cuStream, double *dWork, int *dInfo, double *dDiagBuf, int_t LDD, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info) | lpanel_t | |
diagFactorPackDiagBlockGPU(int_t k, double *UBlk, int_t LDU, double *DiagLBlk, int_t LDD, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info) | lpanel_t | |
find(int_t k) | lpanel_t | |
getEndBlock(int iSt, int maxRows) | lpanel_t | |
gid(int_t k) | lpanel_t | inline |
gpuPanel | lpanel_t | |
haveDiag() | lpanel_t | inline |
index | lpanel_t | |
indexSize() | lpanel_t | inline |
isEmpty() | lpanel_t | inline |
LDA() | lpanel_t | inline |
lpanel_t(int_t k, int_t *lsub, double *nzval, int_t *xsup, int_t isDiagIncluded) | lpanel_t | |
lpanel_t() | lpanel_t | inline |
lpanel_t(int_t *index_, double *val_) | lpanel_t | inline |
lpanel_t(int_t *index_, double *val_, int_t *indexGPU, double *valGPU) | lpanel_t | inline |
nblocks() | lpanel_t | inline |
nbrow(int_t k) | lpanel_t | inline |
ncols() | lpanel_t | inline |
nzrows() | lpanel_t | inline |
nzvalSize() | lpanel_t | inline |
packDiagBlock(double *DiagLBlk, int_t LDD) | lpanel_t | |
panelSolve(int_t ksupsz, double *DiagBlk, int_t LDD) | lpanel_t | |
panelSolveGPU(cublasHandle_t handle, cudaStream_t cuStream, int_t ksupsz, double *DiagBlk, int_t LDD) | lpanel_t | |
rowList(int_t k) | lpanel_t | inline |
stRow(int k) | lpanel_t | inline |
totalSize() | lpanel_t | inline |
val | lpanel_t |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include <lupanels.hpp>
-Public Member Functions | |
lpanel_t (int_t k, int_t *lsub, double *nzval, int_t *xsup, int_t isDiagIncluded) | |
lpanel_t () | |
lpanel_t (int_t *index_, double *val_) | |
int_t | nblocks () |
int_t | nzrows () |
int_t | haveDiag () |
int_t | ncols () |
int_t | gid (int_t k) |
int_t | nbrow (int_t k) |
int_t | stRow (int k) |
int_t * | rowList (int_t k) |
double * | blkPtr (int_t k) |
size_t | blkPtrOffset (int_t k) |
int_t | LDA () |
int_t | find (int_t k) |
int_t | panelSolve (int_t ksupsz, double *DiagBlk, int_t LDD) |
int_t | diagFactor (int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info) |
int_t | packDiagBlock (double *DiagLBlk, int_t LDD) |
int_t | isEmpty () |
int_t | nzvalSize () |
int_t | indexSize () |
size_t | totalSize () |
int | getEndBlock (int iSt, int maxRows) |
lpanelGPU_t | copyToGPU () |
lpanelGPU_t | copyToGPU (void *basePtr) |
int | checkGPU () |
int | copyBackToGPU () |
int_t | panelSolveGPU (cublasHandle_t handle, cudaStream_t cuStream, int_t ksupsz, double *DiagBlk, int_t LDD) |
int_t | diagFactorPackDiagBlockGPU (int_t k, double *UBlk, int_t LDU, double *DiagLBlk, int_t LDD, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info) |
int_t | diagFactorCuSolver (int_t k, cusolverDnHandle_t cusolverH, cudaStream_t cuStream, double *dWork, int *dInfo, double *dDiagBuf, int_t LDD, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info) |
double * | blkPtrGPU (int k) |
lpanel_t (int_t *index_, double *val_, int_t *indexGPU, double *valGPU) | |
int_t | copyFromGPU () |
-Public Attributes | |
int_t * | index |
double * | val |
lpanelGPU_t | gpuPanel |
lpanel_t::lpanel_t | -( | -int_t | -k, | -
- | - | int_t * | -lsub, | -
- | - | double * | -nzval, | -
- | - | int_t * | -xsup, | -
- | - | int_t | -isDiagIncluded | -
- | ) | -- |
BLOCK DESCRIPTOR (of size LB_DESCRIPTOR) | block number (global) | number of full rows in the block
- -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
int lpanel_t::checkGPU | -( | -) | -- |
int lpanel_t::copyBackToGPU | -( | -) | -- |
int_t lpanel_t::copyFromGPU | -( | -) | -- |
lpanelGPU_t lpanel_t::copyToGPU | -( | -) | -- |
lpanelGPU_t lpanel_t::copyToGPU | -( | -void * | -basePtr | ) | -- |
int_t lpanel_t::diagFactor | -( | -int_t | -k, | -
- | - | double * | -UBlk, | -
- | - | int_t | -LDU, | -
- | - | double | -thresh, | -
- | - | int_t * | -xsup, | -
- | - | superlu_dist_options_t * | -options, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | int * | -info | -
- | ) | -- |
int_t lpanel_t::diagFactorCuSolver | -( | -int_t | -k, | -
- | - | cusolverDnHandle_t | -cusolverH, | -
- | - | cudaStream_t | -cuStream, | -
- | - | double * | -dWork, | -
- | - | int * | -dInfo, | -
- | - | double * | -dDiagBuf, | -
- | - | int_t | -LDD, | -
- | - | double | -thresh, | -
- | - | int_t * | -xsup, | -
- | - | superlu_dist_options_t * | -options, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | int * | -info | -
- | ) | -- |
int_t lpanel_t::diagFactorPackDiagBlockGPU | -( | -int_t | -k, | -
- | - | double * | -UBlk, | -
- | - | int_t | -LDU, | -
- | - | double * | -DiagLBlk, | -
- | - | int_t | -LDD, | -
- | - | double | -thresh, | -
- | - | int_t * | -xsup, | -
- | - | superlu_dist_options_t * | -options, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | int * | -info | -
- | ) | -- |
int lpanel_t::getEndBlock | -( | -int | -iSt, | -
- | - | int | -maxRows | -
- | ) | -- |
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
int_t lpanel_t::packDiagBlock | -( | -double * | -DiagLBlk, | -
- | - | int_t | -LDD | -
- | ) | -- |
int_t lpanel_t::panelSolve | -( | -int_t | -ksupsz, | -
- | - | double * | -DiagBlk, | -
- | - | int_t | -LDD | -
- | ) | -- |
int_t lpanel_t::panelSolveGPU | -( | -cublasHandle_t | -handle, | -
- | - | cudaStream_t | -cuStream, | -
- | - | int_t | -ksupsz, | -
- | - | double * | -DiagBlk, | -
- | - | int_t | -LDD | -
- | ) | -- |
-
|
- -inline | -
-
|
- -inline | -
lpanelGPU_t lpanel_t::gpuPanel | -
int_t* lpanel_t::index | -
double* lpanel_t::val | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
This is the complete list of members for upanel_t, including all inherited members.
-blkPtr(int_t k) | upanel_t | inline |
blkPtrGPU(int k) | upanel_t | inline |
blkPtrOffset(int_t k) | upanel_t | inline |
checkCorrectness() | upanel_t | inline |
checkGPU() | upanel_t | |
colList(int_t k) | upanel_t | inline |
copyBackToGPU() | upanel_t | |
copyFromGPU() | upanel_t | |
copyToGPU() | upanel_t | |
copyToGPU(void *basePtr) | upanel_t | |
diagFactor(int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info) | upanel_t | |
find(int_t k) | upanel_t | |
getEndBlock(int jSt, int maxCols) | upanel_t | |
gid(int_t k) | upanel_t | inline |
gpuPanel | upanel_t | |
index | upanel_t | |
indexSize() | upanel_t | inline |
isEmpty() | upanel_t | inline |
LDA() | upanel_t | inline |
nbcol(int_t k) | upanel_t | inline |
nblocks() | upanel_t | inline |
nzcols() | upanel_t | inline |
nzvalSize() | upanel_t | inline |
packed2skyline(int_t k, int_t *usub, double *uval, int_t *xsup) | upanel_t | |
panelSolve(int_t ksupsz, double *DiagBlk, int_t LDD) | upanel_t | |
panelSolveGPU(cublasHandle_t handle, cudaStream_t cuStream, int_t ksupsz, double *DiagBlk, int_t LDD) | upanel_t | |
stCol(int k) | upanel_t | inline |
totalSize() | upanel_t | inline |
upanel_t(int_t k, int_t *usub, double *uval, int_t *xsup) | upanel_t | |
upanel_t() | upanel_t | inline |
upanel_t(int_t *index_, double *val_) | upanel_t | inline |
upanel_t(int_t *index_, double *val_, int_t *indexGPU, double *valGPU) | upanel_t | inline |
val | upanel_t |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include <lupanels.hpp>
-Public Member Functions | |
upanel_t (int_t k, int_t *usub, double *uval, int_t *xsup) | |
upanel_t () | |
upanel_t (int_t *index_, double *val_) | |
int_t | nblocks () |
int_t | nzcols () |
int_t | LDA () |
int_t | gid (int_t k) |
int_t | nbcol (int_t k) |
int_t * | colList (int_t k) |
double * | blkPtr (int_t k) |
size_t | blkPtrOffset (int_t k) |
int_t | packed2skyline (int_t k, int_t *usub, double *uval, int_t *xsup) |
int_t | panelSolve (int_t ksupsz, double *DiagBlk, int_t LDD) |
int_t | diagFactor (int_t k, double *UBlk, int_t LDU, double thresh, int_t *xsup, superlu_dist_options_t *options, SuperLUStat_t *stat, int *info) |
int_t | find (int_t k) |
int_t | isEmpty () |
int_t | nzvalSize () |
int_t | indexSize () |
size_t | totalSize () |
int_t | checkCorrectness () |
int_t | stCol (int k) |
int | getEndBlock (int jSt, int maxCols) |
upanelGPU_t | copyToGPU () |
upanelGPU_t | copyToGPU (void *basePtr) |
int | copyBackToGPU () |
int_t | panelSolveGPU (cublasHandle_t handle, cudaStream_t cuStream, int_t ksupsz, double *DiagBlk, int_t LDD) |
int | checkGPU () |
double * | blkPtrGPU (int k) |
upanel_t (int_t *index_, double *val_, int_t *indexGPU, double *valGPU) | |
int_t | copyFromGPU () |
-Public Attributes | |
int_t * | index |
double * | val |
upanelGPU_t | gpuPanel |
upanel_t::upanel_t | -( | -int_t | -k, | -
- | - | int_t * | -usub, | -
- | - | double * | -uval, | -
- | - | int_t * | -xsup | -
- | ) | -- |
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
int upanel_t::checkGPU | -( | -) | -- |
int upanel_t::copyBackToGPU | -( | -) | -- |
int_t upanel_t::copyFromGPU | -( | -) | -- |
upanelGPU_t upanel_t::copyToGPU | -( | -) | -- |
upanelGPU_t upanel_t::copyToGPU | -( | -void * | -basePtr | ) | -- |
int_t upanel_t::diagFactor | -( | -int_t | -k, | -
- | - | double * | -UBlk, | -
- | - | int_t | -LDU, | -
- | - | double | -thresh, | -
- | - | int_t * | -xsup, | -
- | - | superlu_dist_options_t * | -options, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | int * | -info | -
- | ) | -- |
int upanel_t::getEndBlock | -( | -int | -jSt, | -
- | - | int | -maxCols | -
- | ) | -- |
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
-
|
- -inline | -
int_t upanel_t::packed2skyline | -( | -int_t | -k, | -
- | - | int_t * | -usub, | -
- | - | double * | -uval, | -
- | - | int_t * | -xsup | -
- | ) | -- |
int_t upanel_t::panelSolve | -( | -int_t | -ksupsz, | -
- | - | double * | -DiagBlk, | -
- | - | int_t | -LDD | -
- | ) | -- |
int_t upanel_t::panelSolveGPU | -( | -cublasHandle_t | -handle, | -
- | - | cudaStream_t | -cuStream, | -
- | - | int_t | -ksupsz, | -
- | - | double * | -DiagBlk, | -
- | - | int_t | -LDD | -
- | ) | -- |
-
|
- -inline | -
-
|
- -inline | -
upanelGPU_t upanel_t::gpuPanel | -
int_t* upanel_t::index | -
double* upanel_t::val | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
A sparse matrix column ordering algorithm. -More...
--Macros | |
#define | NDEBUG |
#define | PUBLIC |
#define | PRIVATE static |
#define | MAX(a, b) (((a) > (b)) ? (a) : (b)) |
#define | MIN(a, b) (((a) < (b)) ? (a) : (b)) |
#define | ONES_COMPLEMENT(r) (-(r)-1) |
#define | TRUE (1) |
#define | FALSE (0) |
#define | EMPTY (-1) |
#define | ALIVE (0) |
#define | DEAD (-1) |
#define | DEAD_PRINCIPAL (-1) |
#define | DEAD_NON_PRINCIPAL (-2) |
#define | ROW_IS_DEAD(r) ROW_IS_MARKED_DEAD (Row[r].shared2.mark) |
#define | ROW_IS_MARKED_DEAD(row_mark) (row_mark < ALIVE) |
#define | ROW_IS_ALIVE(r) (Row [r].shared2.mark >= ALIVE) |
#define | COL_IS_DEAD(c) (Col [c].start < ALIVE) |
#define | COL_IS_ALIVE(c) (Col [c].start >= ALIVE) |
#define | COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == DEAD_PRINCIPAL) |
#define | KILL_ROW(r) { Row [r].shared2.mark = DEAD ; } |
#define | KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; } |
#define | KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; } |
#define | PRINTF printf |
#define | INDEX(i) (i) |
#define | DEBUG0(params) ; |
#define | DEBUG1(params) ; |
#define | DEBUG2(params) ; |
#define | DEBUG3(params) ; |
#define | DEBUG4(params) ; |
#define | ASSERT(expression) ((void) 0) |
-Functions | |
PRIVATE int | init_rows_cols (int n_row, int n_col, Colamd_Row Row[], Colamd_Col Col[], int A[], int p[], int stats[COLAMD_STATS]) |
PRIVATE void | init_scoring (int n_row, int n_col, Colamd_Row Row[], Colamd_Col Col[], int A[], int head[], double knobs[COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg) |
PRIVATE int | find_ordering (int n_row, int n_col, int Alen, Colamd_Row Row[], Colamd_Col Col[], int A[], int head[], int n_col2, int max_deg, int pfree) |
PRIVATE void | order_children (int n_col, Colamd_Col Col[], int p[]) |
PRIVATE void | detect_super_cols (Colamd_Col Col[], int A[], int head[], int row_start, int row_length) |
PRIVATE int | garbage_collection (int n_row, int n_col, Colamd_Row Row[], Colamd_Col Col[], int A[], int *pfree) |
PRIVATE int | clear_mark (int n_row, Colamd_Row Row[]) |
PRIVATE void | print_report (char *method, int stats[COLAMD_STATS]) |
PUBLIC int | colamd_recommended (int nnz, int n_row, int n_col) |
PUBLIC void | colamd_set_defaults (double knobs[COLAMD_KNOBS]) |
PUBLIC int | symamd (int n, int A[], int p[], int perm[], double knobs[COLAMD_KNOBS], int stats[COLAMD_STATS], void *(*allocate)(size_t, size_t), void(*release)(void *)) |
PUBLIC int | colamd (int n_row, int n_col, int Alen, int A[], int p[], double knobs[COLAMD_KNOBS], int stats[COLAMD_STATS]) |
PUBLIC void | colamd_report (int stats[COLAMD_STATS]) |
PUBLIC void | symamd_report (int stats[COLAMD_STATS]) |
A sparse matrix column ordering algorithm.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-- ========================================================================== - === colamd/symamd - a sparse matrix column ordering algorithm ============ - ========================================================================== - - - colamd: an approximate minimum degree column ordering algorithm, - for LU factorization of symmetric or unsymmetric matrices, - QR factorization, least squares, interior point methods for - linear programming problems, and other related problems. - - symamd: an approximate minimum degree ordering algorithm for Cholesky - factorization of symmetric matrices. - - Purpose: - - Colamd computes a permutation Q such that the Cholesky factorization of - (AQ)'(AQ) has less fill-in and requires fewer floating point operations - than A'A. This also provides a good ordering for sparse partial - pivoting methods, P(AQ) = LU, where Q is computed prior to numerical - factorization, and P is computed during numerical factorization via - conventional partial pivoting with row interchanges. Colamd is the - column ordering method used in SuperLU, part of the ScaLAPACK library. - It is also available as built-in function in MATLAB Version 6, - available from MathWorks, Inc. (http://www.mathworks.com). This - routine can be used in place of colmmd in MATLAB. - - Symamd computes a permutation P of a symmetric matrix A such that the - Cholesky factorization of PAP' has less fill-in and requires fewer - floating point operations than A. Symamd constructs a matrix M such - that M'M has the same nonzero pattern of A, and then orders the columns - of M using colmmd. The column ordering of M is then returned as the - row and column ordering P of A. - - Authors: - - The authors of the code itself are Stefan I. Larimore and Timothy A. - Davis (davis), University of Florida. The algorithm was - developed in collaboration with John Gilbert, Xerox PARC, and Esmond - Ng, Oak Ridge National Laboratory. - - Date: - - September 8, 2003. Version 2.3. - - Acknowledgements: - - This work was supported by the National Science Foundation, under - grants DMS-9504974 and DMS-9803599. - - Copyright and License: - - Copyright (c) 1998-2003 by the University of Florida. - All Rights Reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use, copy, modify, and/or distribute - this program, provided that the Copyright, this License, and the - Availability of the original version is retained on all copies and made - accessible to the end-user of any code or package that includes COLAMD - or any modified version of COLAMD. - - Availability: - - The colamd/symamd library is available at - - @cis e.ufl .eduhttp://www.cise.ufl.edu/research/sparse/colamd/ - - This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.c - file. It requires the colamd.h file. It is required by the colamdmex.c - and symamdmex.c files, for the MATLAB interface to colamd and symamd. - - See the ChangeLog file for changes since Version 1.0. - - ========================================================================== - === Description of user-callable routines ================================ - ========================================================================== - - - ---------------------------------------------------------------------------- - colamd_recommended: - ---------------------------------------------------------------------------- - - C syntax: - - #include "colamd.h" - int colamd_recommended (int nnz, int n_row, int n_col) ; - - or as a C macro - - #include "colamd.h" - Alen = COLAMD_RECOMMENDED (int nnz, int n_row, int n_col) ; - - Purpose: - - Returns recommended value of Alen for use by colamd. Returns -1 - if any input argument is negative. The use of this routine - or macro is optional. Note that the macro uses its arguments - more than once, so be careful for side effects, if you pass - expressions as arguments to COLAMD_RECOMMENDED. Not needed for - symamd, which dynamically allocates its own memory. - - Arguments (all input arguments): - - int nnz ; Number of nonzeros in the matrix A. This must - be the same value as p [n_col] in the call to - colamd - otherwise you will get a wrong value - of the recommended memory to use. - - int n_row ; Number of rows in the matrix A. - - int n_col ; Number of columns in the matrix A. - - ---------------------------------------------------------------------------- - colamd_set_defaults: - ---------------------------------------------------------------------------- - - C syntax: - - #include "colamd.h" - colamd_set_defaults (double knobs [COLAMD_KNOBS]) ; - - Purpose: - - Sets the default parameters. The use of this routine is optional. - - Arguments: - - double knobs [COLAMD_KNOBS] ; Output only. - - Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) - entries are removed prior to ordering. Columns with more than - (knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to - ordering, and placed last in the output column ordering. - - Symamd: uses only knobs [COLAMD_DENSE_ROW], which is knobs [0]. - Rows and columns with more than (knobs [COLAMD_DENSE_ROW] * n) - entries are removed prior to ordering, and placed last in the - output ordering. - - COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1, - respectively, in colamd.h. Default values of these two knobs - are both 0.5. Currently, only knobs [0] and knobs [1] are - used, but future versions may use more knobs. If so, they will - be properly set to their defaults by the future version of - colamd_set_defaults, so that the code that calls colamd will - not need to change, assuming that you either use - colamd_set_defaults, or pass a (double *) NULL pointer as the - knobs array to colamd or symamd. - - ---------------------------------------------------------------------------- - colamd: - ---------------------------------------------------------------------------- - - C syntax: - - #include "colamd.h" - int colamd (int n_row, int n_col, int Alen, int *A, int *p, - double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS]) ; - - Purpose: - - Computes a column ordering (Q) of A such that P(AQ)=LU or - (AQ)'AQ=LL' have less fill-in and require fewer floating point - operations than factorizing the unpermuted matrix A or A'A, - respectively. - - Returns: - - TRUE (1) if successful, FALSE (0) otherwise. - - Arguments: - - int n_row ; Input argument. - - Number of rows in the matrix A. - Restriction: n_row >= 0. - Colamd returns FALSE if n_row is negative. - - int n_col ; Input argument. - - Number of columns in the matrix A. - Restriction: n_col >= 0. - Colamd returns FALSE if n_col is negative. - - int Alen ; Input argument. - - Restriction (see note): - Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col - Colamd returns FALSE if these conditions are not met. - - Note: this restriction makes an modest assumption regarding - the size of the two typedef's structures in colamd.h. - We do, however, guarantee that - - Alen >= colamd_recommended (nnz, n_row, n_col) - - or equivalently as a C preprocessor macro: - - Alen >= COLAMD_RECOMMENDED (nnz, n_row, n_col) - - will be sufficient. - - int A [Alen] ; Input argument, undefined on output. - - A is an integer array of size Alen. Alen must be at least as - large as the bare minimum value given above, but this is very - low, and can result in excessive run time. For best - performance, we recommend that Alen be greater than or equal to - colamd_recommended (nnz, n_row, n_col), which adds - nnz/5 to the bare minimum value given above. - - On input, the row indices of the entries in column c of the - matrix are held in A [(p [c]) ... (p [c+1]-1)]. The row indices - in a given column c need not be in ascending order, and - duplicate row indices may be be present. However, colamd will - work a little faster if both of these conditions are met - (Colamd puts the matrix into this format, if it finds that the - the conditions are not met). - - The matrix is 0-based. That is, rows are in the range 0 to - n_row-1, and columns are in the range 0 to n_col-1. Colamd - returns FALSE if any row index is out of range. - - The contents of A are modified during ordering, and are - undefined on output. - - int p [n_col+1] ; Both input and output argument. - - p is an integer array of size n_col+1. On input, it holds the - "pointers" for the column form of the matrix A. Column c of - the matrix A is held in A [(p [c]) ... (p [c+1]-1)]. The first - entry, p [0], must be zero, and p [c] <= p [c+1] must hold - for all c in the range 0 to n_col-1. The value p [n_col] is - thus the total number of entries in the pattern of the matrix A. - Colamd returns FALSE if these conditions are not met. - - On output, if colamd returns TRUE, the array p holds the column - permutation (Q, for P(AQ)=LU or (AQ)'(AQ)=LL'), where p [0] is - the first column index in the new ordering, and p [n_col-1] is - the last. That is, p [k] = j means that column j of A is the - kth pivot column, in AQ, where k is in the range 0 to n_col-1 - (p [0] = j means that column j of A is the first column in AQ). - - If colamd returns FALSE, then no permutation is returned, and - p is undefined on output. - - double knobs [COLAMD_KNOBS] ; Input argument. - - See colamd_set_defaults for a description. - - int stats [COLAMD_STATS] ; Output argument. - - Statistics on the ordering, and error status. - See colamd.h for related definitions. - Colamd returns FALSE if stats is not present. - - stats [0]: number of dense or empty rows ignored. - - stats [1]: number of dense or empty columns ignored (and - ordered last in the output permutation p) - Note that a row can become "empty" if it - contains only "dense" and/or "empty" columns, - and similarly a column can become "empty" if it - only contains "dense" and/or "empty" rows. - - stats [2]: number of garbage collections performed. - This can be excessively high if Alen is close - to the minimum required value. - - stats [3]: status code. < 0 is an error code. - > 1 is a warning or notice. - - 0 OK. Each column of the input matrix contained - row indices in increasing order, with no - duplicates. - - 1 OK, but columns of input matrix were jumbled - (unsorted columns or duplicate entries). Colamd - had to do some extra work to sort the matrix - first and remove duplicate entries, but it - still was able to return a valid permutation - (return value of colamd was TRUE). - - stats [4]: highest numbered column that - is unsorted or has duplicate - entries. - stats [5]: last seen duplicate or - unsorted row index. - stats [6]: number of duplicate or - unsorted row indices. - - -1 A is a null pointer - - -2 p is a null pointer - - -3 n_row is negative - - stats [4]: n_row - - -4 n_col is negative - - stats [4]: n_col - - -5 number of nonzeros in matrix is negative - - stats [4]: number of nonzeros, p [n_col] - - -6 p [0] is nonzero - - stats [4]: p [0] - - -7 A is too small - - stats [4]: required size - stats [5]: actual size (Alen) - - -8 a column has a negative number of entries - - stats [4]: column with < 0 entries - stats [5]: number of entries in col - - -9 a row index is out of bounds - - stats [4]: column with bad row index - stats [5]: bad row index - stats [6]: n_row, # of rows of matrx - - -10 (unused; see symamd.c) - - -999 (unused; see symamd.c) - - Future versions may return more statistics in the stats array. - - Example: - - See http://www.cise.ufl.edu/research/sparse/colamd/example.c - for a complete example. - - To order the columns of a 5-by-4 matrix with 11 nonzero entries in - the following nonzero pattern - - x 0 x 0 - x 0 x x - 0 x x 0 - 0 0 x x - x x 0 0 - - with default knobs and no output statistics, do the following: - - #include "colamd.h" - #define ALEN COLAMD_RECOMMENDED (11, 5, 4) - int A [ALEN] = {1, 2, 5, 3, 5, 1, 2, 3, 4, 2, 4} ; - int p [ ] = {0, 3, 5, 9, 11} ; - int stats [COLAMD_STATS] ; - colamd (5, 4, ALEN, A, p, (double *) NULL, stats) ; - - The permutation is returned in the array p, and A is destroyed. - - ---------------------------------------------------------------------------- - symamd: - ---------------------------------------------------------------------------- - - C syntax: - - #include "colamd.h" - int symamd (int n, int *A, int *p, int *perm, - double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS], - void (*allocate) (size_t, size_t), void (*release) (void *)) ; - - Purpose: - - The symamd routine computes an ordering P of a symmetric sparse - matrix A such that the Cholesky factorization PAP' = LL' remains - sparse. It is based on a column ordering of a matrix M constructed - so that the nonzero pattern of M'M is the same as A. The matrix A - is assumed to be symmetric; only the strictly lower triangular part - is accessed. You must pass your selected memory allocator (usually - calloc/free or mxCalloc/mxFree) to symamd, for it to allocate - memory for the temporary matrix M. - - Returns: - - TRUE (1) if successful, FALSE (0) otherwise. - - Arguments: - - int n ; Input argument. - - Number of rows and columns in the symmetrix matrix A. - Restriction: n >= 0. - Symamd returns FALSE if n is negative. - - int A [nnz] ; Input argument. - - A is an integer array of size nnz, where nnz = p [n]. - - The row indices of the entries in column c of the matrix are - held in A [(p [c]) ... (p [c+1]-1)]. The row indices in a - given column c need not be in ascending order, and duplicate - row indices may be present. However, symamd will run faster - if the columns are in sorted order with no duplicate entries. - - The matrix is 0-based. That is, rows are in the range 0 to - n-1, and columns are in the range 0 to n-1. Symamd - returns FALSE if any row index is out of range. - - The contents of A are not modified. - - int p [n+1] ; Input argument. - - p is an integer array of size n+1. On input, it holds the - "pointers" for the column form of the matrix A. Column c of - the matrix A is held in A [(p [c]) ... (p [c+1]-1)]. The first - entry, p [0], must be zero, and p [c] <= p [c+1] must hold - for all c in the range 0 to n-1. The value p [n] is - thus the total number of entries in the pattern of the matrix A. - Symamd returns FALSE if these conditions are not met. - - The contents of p are not modified. - - int perm [n+1] ; Output argument. - - On output, if symamd returns TRUE, the array perm holds the - permutation P, where perm [0] is the first index in the new - ordering, and perm [n-1] is the last. That is, perm [k] = j - means that row and column j of A is the kth column in PAP', - where k is in the range 0 to n-1 (perm [0] = j means - that row and column j of A are the first row and column in - PAP'). The array is used as a workspace during the ordering, - which is why it must be of length n+1, not just n. - - double knobs [COLAMD_KNOBS] ; Input argument. - - See colamd_set_defaults for a description. - - int stats [COLAMD_STATS] ; Output argument. - - Statistics on the ordering, and error status. - See colamd.h for related definitions. - Symamd returns FALSE if stats is not present. - - stats [0]: number of dense or empty row and columns ignored - (and ordered last in the output permutation - perm). Note that a row/column can become - "empty" if it contains only "dense" and/or - "empty" columns/rows. - - stats [1]: (same as stats [0]) - - stats [2]: number of garbage collections performed. - - stats [3]: status code. < 0 is an error code. - > 1 is a warning or notice. - - 0 OK. Each column of the input matrix contained - row indices in increasing order, with no - duplicates. - - 1 OK, but columns of input matrix were jumbled - (unsorted columns or duplicate entries). Symamd - had to do some extra work to sort the matrix - first and remove duplicate entries, but it - still was able to return a valid permutation - (return value of symamd was TRUE). - - stats [4]: highest numbered column that - is unsorted or has duplicate - entries. - stats [5]: last seen duplicate or - unsorted row index. - stats [6]: number of duplicate or - unsorted row indices. - - -1 A is a null pointer - - -2 p is a null pointer - - -3 (unused, see colamd.c) - - -4 n is negative - - stats [4]: n - - -5 number of nonzeros in matrix is negative - - stats [4]: # of nonzeros (p [n]). - - -6 p [0] is nonzero - - stats [4]: p [0] - - -7 (unused) - - -8 a column has a negative number of entries - - stats [4]: column with < 0 entries - stats [5]: number of entries in col - - -9 a row index is out of bounds - - stats [4]: column with bad row index - stats [5]: bad row index - stats [6]: n_row, # of rows of matrx - - -10 out of memory (unable to allocate temporary - workspace for M or count arrays using the - "allocate" routine passed into symamd). - - -999 internal error. colamd failed to order the - matrix M, when it should have succeeded. This - indicates a bug. If this (and *only* this) - error code occurs, please contact the authors. - Don't contact the authors if you get any other - error code. - - Future versions may return more statistics in the stats array. - - void * (*allocate) (size_t, size_t) - - A pointer to a function providing memory allocation. The - allocated memory must be returned initialized to zero. For a - C application, this argument should normally be a pointer to - calloc. For a MATLAB mexFunction, the routine mxCalloc is - passed instead. - - void (*release) (size_t, size_t) - - A pointer to a function that frees memory allocated by the - memory allocation routine above. For a C application, this - argument should normally be a pointer to free. For a MATLAB - mexFunction, the routine mxFree is passed instead. - - - ---------------------------------------------------------------------------- - colamd_report: - ---------------------------------------------------------------------------- - - C syntax: - - #include "colamd.h" - colamd_report (int stats [COLAMD_STATS]) ; - - Purpose: - - Prints the error status and statistics recorded in the stats - array on the standard error output (for a standard C routine) - or on the MATLAB output (for a mexFunction). - - Arguments: - - int stats [COLAMD_STATS] ; Input only. Statistics from colamd. - - - ---------------------------------------------------------------------------- - symamd_report: - ---------------------------------------------------------------------------- - - C syntax: - - #include "colamd.h" - symamd_report (int stats [COLAMD_STATS]) ; - - Purpose: - - Prints the error status and statistics recorded in the stats - array on the standard error output (for a standard C routine) - or on the MATLAB output (for a mexFunction). - - Arguments: - - int stats [COLAMD_STATS] ; Input only. Statistics from symamd. - -
#define ALIVE (0) | -
#define ASSERT | -( | -- | expression | ) | -((void) 0) | -
#define COL_IS_ALIVE | -( | -- | c | ) | -(Col [c].start >= ALIVE) | -
#define COL_IS_DEAD | -( | -- | c | ) | -(Col [c].start < ALIVE) | -
#define COL_IS_DEAD_PRINCIPAL | -( | -- | c | ) | -(Col [c].start == DEAD_PRINCIPAL) | -
#define DEAD (-1) | -
#define DEAD_NON_PRINCIPAL (-2) | -
#define DEAD_PRINCIPAL (-1) | -
#define DEBUG0 | -( | -- | params | ) | -; | -
#define DEBUG1 | -( | -- | params | ) | -; | -
#define DEBUG2 | -( | -- | params | ) | -; | -
#define DEBUG3 | -( | -- | params | ) | -; | -
#define DEBUG4 | -( | -- | params | ) | -; | -
#define EMPTY (-1) | -
#define FALSE (0) | -
#define KILL_NON_PRINCIPAL_COL | -( | -- | c | ) | -{ Col [c].start = DEAD_NON_PRINCIPAL ; } | -
#define KILL_PRINCIPAL_COL | -( | -- | c | ) | -{ Col [c].start = DEAD_PRINCIPAL ; } | -
#define KILL_ROW | -( | -- | r | ) | -{ Row [r].shared2.mark = DEAD ; } | -
#define MAX | -( | -- | a, | -
- | - | - | b | -
- | ) | -(((a) > (b)) ? (a) : (b)) | -
#define MIN | -( | -- | a, | -
- | - | - | b | -
- | ) | -(((a) < (b)) ? (a) : (b)) | -
#define NDEBUG | -
#define ONES_COMPLEMENT | -( | -- | r | ) | -(-(r)-1) | -
#define PRINTF printf | -
#define PRIVATE static | -
#define PUBLIC | -
#define ROW_IS_ALIVE | -( | -- | r | ) | -(Row [r].shared2.mark >= ALIVE) | -
#define ROW_IS_DEAD | -( | -- | r | ) | -ROW_IS_MARKED_DEAD (Row[r].shared2.mark) | -
#define ROW_IS_MARKED_DEAD | -( | -- | row_mark | ) | -(row_mark < ALIVE) | -
#define TRUE (1) | -
PRIVATE int clear_mark | -( | -int | -n_row, | -
- | - | Colamd_Row | -Row[] | -
- | ) | -- |
PUBLIC int colamd | -( | -int | -n_row, | -
- | - | int | -n_col, | -
- | - | int | -Alen, | -
- | - | int | -A[], | -
- | - | int | -p[], | -
- | - | double | -knobs[COLAMD_KNOBS], | -
- | - | int | -stats[COLAMD_STATS] | -
- | ) | -- |
PUBLIC int colamd_recommended | -( | -int | -nnz, | -
- | - | int | -n_row, | -
- | - | int | -n_col | -
- | ) | -- |
PUBLIC void colamd_report | -( | -int | -stats[COLAMD_STATS] | ) | -- |
PUBLIC void colamd_set_defaults | -( | -double | -knobs[COLAMD_KNOBS] | ) | -- |
PRIVATE void detect_super_cols | -( | -Colamd_Col | -Col[], | -
- | - | int | -A[], | -
- | - | int | -head[], | -
- | - | int | -row_start, | -
- | - | int | -row_length | -
- | ) | -- |
PRIVATE int find_ordering | -( | -int | -n_row, | -
- | - | int | -n_col, | -
- | - | int | -Alen, | -
- | - | Colamd_Row | -Row[], | -
- | - | Colamd_Col | -Col[], | -
- | - | int | -A[], | -
- | - | int | -head[], | -
- | - | int | -n_col2, | -
- | - | int | -max_deg, | -
- | - | int | -pfree | -
- | ) | -- |
PRIVATE int garbage_collection | -( | -int | -n_row, | -
- | - | int | -n_col, | -
- | - | Colamd_Row | -Row[], | -
- | - | Colamd_Col | -Col[], | -
- | - | int | -A[], | -
- | - | int * | -pfree | -
- | ) | -- |
PRIVATE int init_rows_cols | -( | -int | -n_row, | -
- | - | int | -n_col, | -
- | - | Colamd_Row | -Row[], | -
- | - | Colamd_Col | -Col[], | -
- | - | int | -A[], | -
- | - | int | -p[], | -
- | - | int | -stats[COLAMD_STATS] | -
- | ) | -- |
PRIVATE void init_scoring | -( | -int | -n_row, | -
- | - | int | -n_col, | -
- | - | Colamd_Row | -Row[], | -
- | - | Colamd_Col | -Col[], | -
- | - | int | -A[], | -
- | - | int | -head[], | -
- | - | double | -knobs[COLAMD_KNOBS], | -
- | - | int * | -p_n_row2, | -
- | - | int * | -p_n_col2, | -
- | - | int * | -p_max_deg | -
- | ) | -- |
PRIVATE void order_children | -( | -int | -n_col, | -
- | - | Colamd_Col | -Col[], | -
- | - | int | -p[] | -
- | ) | -- |
PRIVATE void print_report | -( | -char * | -method, | -
- | - | int | -stats[COLAMD_STATS] | -
- | ) | -- |
PUBLIC int symamd | -( | -int | -n, | -
- | - | int | -A[], | -
- | - | int | -p[], | -
- | - | int | -perm[], | -
- | - | double | -knobs[COLAMD_KNOBS], | -
- | - | int | -stats[COLAMD_STATS], | -
- | - | void *(*)(size_t, size_t) | -allocate, | -
- | - | void(*)(void *) | -release | -
- | ) | -- |
PUBLIC void symamd_report | -( | -int | -stats[COLAMD_STATS] | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Colamd prototypes and definitions. -More...
-#include <stdlib.h>
Go to the source code of this file.
--Classes | |
struct | Colamd_Col_struct |
struct | Colamd_Row_struct |
-Macros | |
#define | COLAMD_KNOBS 20 |
#define | COLAMD_STATS 20 |
#define | COLAMD_DENSE_ROW 0 |
#define | COLAMD_DENSE_COL 1 |
#define | COLAMD_DEFRAG_COUNT 2 |
#define | COLAMD_STATUS 3 |
#define | COLAMD_INFO1 4 |
#define | COLAMD_INFO2 5 |
#define | COLAMD_INFO3 6 |
#define | COLAMD_OK (0) |
#define | COLAMD_OK_BUT_JUMBLED (1) |
#define | COLAMD_ERROR_A_not_present (-1) |
#define | COLAMD_ERROR_p_not_present (-2) |
#define | COLAMD_ERROR_nrow_negative (-3) |
#define | COLAMD_ERROR_ncol_negative (-4) |
#define | COLAMD_ERROR_nnz_negative (-5) |
#define | COLAMD_ERROR_p0_nonzero (-6) |
#define | COLAMD_ERROR_A_too_small (-7) |
#define | COLAMD_ERROR_col_length_negative (-8) |
#define | COLAMD_ERROR_row_index_out_of_bounds (-9) |
#define | COLAMD_ERROR_out_of_memory (-10) |
#define | COLAMD_ERROR_internal_error (-999) |
#define | COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (Colamd_Col) / sizeof (int))) |
#define | COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int))) |
#define | COLAMD_RECOMMENDED(nnz, n_row, n_col) |
-Typedefs | |
typedef struct Colamd_Col_struct | Colamd_Col |
typedef struct Colamd_Row_struct | Colamd_Row |
-Functions | |
int | colamd_recommended (int nnz, int n_row, int n_col) |
void | colamd_set_defaults (double knobs[COLAMD_KNOBS]) |
int | colamd (int n_row, int n_col, int Alen, int A[], int p[], double knobs[COLAMD_KNOBS], int stats[COLAMD_STATS]) |
int | symamd (int n, int A[], int p[], int perm[], double knobs[COLAMD_KNOBS], int stats[COLAMD_STATS], void *(*allocate)(size_t, size_t), void(*release)(void *)) |
void | colamd_report (int stats[COLAMD_STATS]) |
void | symamd_report (int stats[COLAMD_STATS]) |
Colamd prototypes and definitions.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-- ========================================================================== - === colamd/symamd prototypes and definitions ============================= - ========================================================================== - - You must include this file (colamd.h) in any routine that uses colamd, - symamd, or the related macros and definitions. - - Authors: - - The authors of the code itself are Stefan I. Larimore and Timothy A. - Davis (davis), University of Florida. The algorithm was - developed in collaboration with John Gilbert, Xerox PARC, and Esmond - Ng, Oak Ridge National Laboratory. - - Date: - - September 8, 2003. Version 2.3. - - Acknowledgements: - - This work was supported by the National Science Foundation, under - grants DMS-9504974 and DMS-9803599. - - Notice: - - Copyright (c) 1998-2003 by the University of Florida. - All Rights Reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use, copy, modify, and/or distribute - this program, provided that the Copyright, this License, and the - Availability of the original version is retained on all copies and made - accessible to the end-user of any code or package that includes COLAMD - or any modified version of COLAMD. - - Availability: - - The colamd/symamd library is available at - - @cis e.ufl .eduhttp://www.cise.ufl.edu/research/sparse/colamd/ - - This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h - file. It is required by the colamd.c, colamdmex.c, and symamdmex.c - files, and by any C code that calls the routines whose prototypes are - listed below, or that uses the colamd/symamd definitions listed below. -
#define COLAMD_C | -( | -- | n_col | ) | -((int) (((n_col) + 1) * sizeof (Colamd_Col) / sizeof (int))) | -
#define COLAMD_DEFRAG_COUNT 2 | -
#define COLAMD_DENSE_COL 1 | -
#define COLAMD_DENSE_ROW 0 | -
#define COLAMD_ERROR_A_not_present (-1) | -
#define COLAMD_ERROR_A_too_small (-7) | -
#define COLAMD_ERROR_col_length_negative (-8) | -
#define COLAMD_ERROR_internal_error (-999) | -
#define COLAMD_ERROR_ncol_negative (-4) | -
#define COLAMD_ERROR_nnz_negative (-5) | -
#define COLAMD_ERROR_nrow_negative (-3) | -
#define COLAMD_ERROR_out_of_memory (-10) | -
#define COLAMD_ERROR_p0_nonzero (-6) | -
#define COLAMD_ERROR_p_not_present (-2) | -
#define COLAMD_ERROR_row_index_out_of_bounds (-9) | -
#define COLAMD_INFO1 4 | -
#define COLAMD_INFO2 5 | -
#define COLAMD_INFO3 6 | -
#define COLAMD_KNOBS 20 | -
#define COLAMD_OK (0) | -
#define COLAMD_OK_BUT_JUMBLED (1) | -
#define COLAMD_R | -( | -- | n_row | ) | -((int) (((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int))) | -
#define COLAMD_RECOMMENDED | -( | -- | nnz, | -
- | - | - | n_row, | -
- | - | - | n_col | -
- | ) | -- |
#define COLAMD_STATS 20 | -
#define COLAMD_STATUS 3 | -
typedef struct Colamd_Col_struct Colamd_Col | -
typedef struct Colamd_Row_struct Colamd_Row | -
int colamd | -( | -int | -n_row, | -
- | - | int | -n_col, | -
- | - | int | -Alen, | -
- | - | int | -A[], | -
- | - | int | -p[], | -
- | - | double | -knobs[COLAMD_KNOBS], | -
- | - | int | -stats[COLAMD_STATS] | -
- | ) | -- |
int colamd_recommended | -( | -int | -nnz, | -
- | - | int | -n_row, | -
- | - | int | -n_col | -
- | ) | -- |
void colamd_report | -( | -int | -stats[COLAMD_STATS] | ) | -- |
void colamd_set_defaults | -( | -double | -knobs[COLAMD_KNOBS] | ) | -- |
int symamd | -( | -int | -n, | -
- | - | int | -A[], | -
- | - | int | -p[], | -
- | - | int | -perm[], | -
- | - | double | -knobs[COLAMD_KNOBS], | -
- | - | int | -stats[COLAMD_STATS], | -
- | - | void *(*)(size_t, size_t) | -allocate, | -
- | - | void(*)(void *) | -release | -
- | ) | -- |
void symamd_report | -( | -int | -stats[COLAMD_STATS] | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "commWrapper.hpp"
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "mpi.h"
Go to the source code of this file.
--Classes | |
class | bcastStruct |
-Enumerations | |
enum | collAlg { SYNC -, ASYNC -, RING -, RINGMOD - } |
enum | collCommStatus { Started -, Finished -, Uninitialized - } |
enum collAlg | -
enum collCommStatus | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Broadcast an array of dtype numbers. -More...
-#include "superlu_ddefs.h"
-Functions | |
void | bcast_tree (void *buf, int count, MPI_Datatype dtype, int root, int tag, gridinfo_t *grid, int scope, int *recvcnt) |
Broadcast an array of dtype numbers.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
void bcast_tree | -( | -void * | -buf, | -
- | - | int | -count, | -
- | - | MPI_Datatype | -dtype, | -
- | - | int | -root, | -
- | - | int | -tag, | -
- | - | gridinfo_t * | -grid, | -
- | - | int | -scope, | -
- | - | int * | -recvcnt | -
- | ) | -- |
-Purpose -======= - Broadcast an array of *dtype* numbers. The communication pattern - is a tree with number of branches equal to NBRANCHES. - The process ranks are between 0 and Np-1. - - The following two pairs of graphs give different ways of viewing the same - algorithm. The first pair shows the trees as they should be visualized - when examining the algorithm. The second pair are isomorphic graphs of - of the first, which show the actual pattern of data movement. - Note that a tree broadcast with NBRANCHES = 2 is isomorphic with a - hypercube broadcast (however, it does not require the nodes be a - power of two to work). - - TREE BROADCAST, NBRANCHES = 2 * TREE BROADCAST, NBRANCHES = 3 - - root=2 -i=4 &______________ * - | \ * root=2 -i=2 &______ &______ * i=3 &______________________ - | \ | \ * | \ \ -i=1 &__ &__ &__ &__ * i=1 &______ &______ &__ - | \ | \ | \ | \ * | \ \ | \ \ | \ - 2 3 4 5 6 7 0 1 * 2 3 4 5 6 7 0 1 - - - ISOMORPHIC GRAPHS OF ABOVE, SHOWN IN MORE FAMILIAR TERMS: - - 2 2 - _________|_________ ___________|____________ - / | \ / | | \ - 6 4 3 5 0 3 4 - / \ | / \ | - 0 7 5 6 7 1 - | - 1 - - -Arguments -========= - -scope --
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions | |
void | C_BcTree_Create (C_Tree *tree, MPI_Comm comm, int *ranks, int rank_cnt, int msgSize, char precision) |
void | C_BcTree_Nullify (C_Tree *tree) |
yes_no_t | C_BcTree_IsRoot (C_Tree *tree) |
void | C_BcTree_forwardMessageSimple (C_Tree *tree, void *localBuffer, int msgSize) |
void | C_BcTree_waitSendRequest (C_Tree *tree) |
void | C_RdTree_Create (C_Tree *tree, MPI_Comm comm, int *ranks, int rank_cnt, int msgSize, char precision) |
void | C_RdTree_Nullify (C_Tree *tree) |
yes_no_t | C_RdTree_IsRoot (C_Tree *tree) |
void | C_RdTree_forwardMessageSimple (C_Tree *Tree, void *localBuffer, int msgSize) |
void | C_RdTree_waitSendRequest (C_Tree *Tree) |
void C_BcTree_Create | -( | -C_Tree * | -tree, | -
- | - | MPI_Comm | -comm, | -
- | - | int * | -ranks, | -
- | - | int | -rank_cnt, | -
- | - | int | -msgSize, | -
- | - | char | -precision | -
- | ) | -- |
void C_BcTree_forwardMessageSimple | -( | -C_Tree * | -tree, | -
- | - | void * | -localBuffer, | -
- | - | int | -msgSize | -
- | ) | -- |
void C_BcTree_Nullify | -( | -C_Tree * | -tree | ) | -- |
void C_BcTree_waitSendRequest | -( | -C_Tree * | -tree | ) | -- |
void C_RdTree_Create | -( | -C_Tree * | -tree, | -
- | - | MPI_Comm | -comm, | -
- | - | int * | -ranks, | -
- | - | int | -rank_cnt, | -
- | - | int | -msgSize, | -
- | - | char | -precision | -
- | ) | -- |
void C_RdTree_forwardMessageSimple | -( | -C_Tree * | -Tree, | -
- | - | void * | -localBuffer, | -
- | - | int | -msgSize | -
- | ) | -- |
void C_RdTree_Nullify | -( | -C_Tree * | -tree | ) | -- |
void C_RdTree_waitSendRequest | -( | -C_Tree * | -Tree | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Auxiliary routines to support communication in 3D algorithms. -More...
-#include "superlu_defs.h"
-Functions | |
int_t | Wait_LSend (int_t k, gridinfo_t *grid, int **ToSendR, MPI_Request *send_req, SCT_t *SCT) |
int_t | Wait_USend (MPI_Request *send_req, gridinfo_t *grid, SCT_t *SCT) |
int_t | Check_LRecv (MPI_Request *recv_req, int *msgcnt) |
int_t | Wait_UDiagBlockSend (MPI_Request *U_diag_blk_send_req, gridinfo_t *grid, SCT_t *SCT) |
int_t | Wait_LDiagBlockSend (MPI_Request *L_diag_blk_send_req, gridinfo_t *grid, SCT_t *SCT) |
int_t | Wait_UDiagBlock_Recv (MPI_Request *request, SCT_t *SCT) |
int_t | Test_UDiagBlock_Recv (MPI_Request *request, SCT_t *SCT) |
int_t | Wait_LDiagBlock_Recv (MPI_Request *request, SCT_t *SCT) |
int_t | Test_LDiagBlock_Recv (MPI_Request *request, SCT_t *SCT) |
int | Wait_LUDiagSend (int_t k, MPI_Request *U_diag_blk_send_req, MPI_Request *L_diag_blk_send_req, gridinfo_t *grid, SCT_t *SCT) |
int_t | LDiagBlockRecvWait (int_t k, int_t *factored_U, MPI_Request *L_diag_blk_recv_req, gridinfo_t *grid) |
Auxiliary routines to support communication in 3D algorithms.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 7.0) -- -Lawrence Berkeley National Lab, Oak Ridge National Lab -May 12, 2021 -
int_t Check_LRecv | -( | -MPI_Request * | -recv_req, | -
- | - | int * | -msgcnt | -
- | ) | -- |
int_t LDiagBlockRecvWait | -( | -int_t | -k, | -
- | - | int_t * | -factored_U, | -
- | - | MPI_Request * | -L_diag_blk_recv_req, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
int_t Test_LDiagBlock_Recv | -( | -MPI_Request * | -request, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
int_t Test_UDiagBlock_Recv | -( | -MPI_Request * | -request, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
int_t Wait_LDiagBlock_Recv | -( | -MPI_Request * | -request, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
int_t Wait_LDiagBlockSend | -( | -MPI_Request * | -L_diag_blk_send_req, | -
- | - | gridinfo_t * | -grid, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
int_t Wait_LSend | -( | -int_t | -k, | -
- | - | gridinfo_t * | -grid, | -
- | - | int ** | -ToSendR, | -
- | - | MPI_Request * | -send_req, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
int Wait_LUDiagSend | -( | -int_t | -k, | -
- | - | MPI_Request * | -U_diag_blk_send_req, | -
- | - | MPI_Request * | -L_diag_blk_send_req, | -
- | - | gridinfo_t * | -grid, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
int_t Wait_UDiagBlock_Recv | -( | -MPI_Request * | -request, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
int_t Wait_UDiagBlockSend | -( | -MPI_Request * | -U_diag_blk_send_req, | -
- | - | gridinfo_t * | -grid, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
int_t Wait_USend | -( | -MPI_Request * | -send_req, | -
- | - | gridinfo_t * | -grid, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Get HWPM, heavy-weight perfect matching. -More...
-#include "CombBLAS/CombBLAS.h"
#include "ApproxWeightPerfectMatching.h"
#include "superlu_ddefs.h"
Go to the source code of this file.
--Macros | |
#define | dHWPM_CombBLAS_hpp |
-Functions | |
void | dGetHWPM (SuperMatrix *A, gridinfo_t *grid, dScalePermstruct_t *ScalePermstruct) |
Get HWPM, heavy-weight perfect matching.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 6.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -April 2, 2020 -
#define dHWPM_CombBLAS_hpp | -
void dGetHWPM | -( | -SuperMatrix * | -A, | -
- | - | gridinfo_t * | -grid, | -
- | - | dScalePermstruct_t * | -ScalePermstruct | -
- | ) | -- |
-Purpose -======= - Get perm_r from HWPM, heavy-weight perfect matching, as a - numerical pivoting permutation. - -Arguments -========= - -A (input) SuperMatrix* - The distributed input matrix A of dimension (A->nrow, A->ncol). - A may be overwritten by diag(R)*A*diag(C)*Pc^T. - The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. - -perm (input) int_t* - Permutation vector describing the transformation performed to - the original matrix A. - -grid (input) gridinfo_t* - SuperLU's 2D process mesh. - -ScalePermstruct (output) dScalePermstruct_t* - ScalePermstruct->perm_r stores the permutation obtained from HWPM. - -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Macros | |
#define | SCHEDULE_STRATEGY guided |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-#define SCHEDULE_STRATEGY guided | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Macros | |
#define | SCHEDULE_STRATEGY dynamic |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-#define SCHEDULE_STRATEGY dynamic | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Macros | |
#define | SCHEDULE_STRATEGY dynamic |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-#define SCHEDULE_STRATEGY dynamic | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Get heavy-weight perfect matching (HWPM). -More...
--Functions | |
int | d_c2cpp_GetHWPM (SuperMatrix *A, gridinfo_t *grid, dScalePermstruct_t *ScalePermstruct) |
Get heavy-weight perfect matching (HWPM).
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 5.4) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -April 1, 2018 -Modified: April 2, 2020 -
int d_c2cpp_GetHWPM | -( | -SuperMatrix * | -A, | -
- | - | gridinfo_t * | -grid, | -
- | - | dScalePermstruct_t * | -ScalePermstruct | -
- | ) | -- |
-Purpose -======= - -Get heavy-weight perfect matching (HWPM). - -Reference: - - -Arguments -========= - -A (input) SuperMatrix* - The distributed input matrix A of dimension (A->nrow, A->ncol). - The type of A can be: Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE. - -grid (input) gridinfo_t* - SuperLU's 2D process mesh. - -ScalePermstruct (output) dScalePermstruct_t* - ScalePermstruct->perm_r stores the permutation obtained from HWPM. - -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
-Functions | |
int | dread_binary (FILE *fp, int_t *m, int_t *n, int_t *nnz, double **nzval, int_t **rowind, int_t **colptr) |
int | dwrite_binary (int_t n, int_t nnz, double *values, int_t *rowind, int_t *colptr) |
int dread_binary | -( | -FILE * | -fp, | -
- | - | int_t * | -m, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nnz, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Header for dcomplex.c. -More...
-Go to the source code of this file.
--Classes | |
struct | doublecomplex |
-Macros | |
#define | z_copy(c, a) |
Complex Copy c = a. More... | |
#define | z_add(c, a, b) |
Complex Addition c = a + b. More... | |
#define | z_sub(c, a, b) |
Complex Subtraction c = a - b. More... | |
#define | zd_mult(c, a, b) |
Complex-Double Multiplication. More... | |
#define | zz_mult(c, a, b) |
Complex-Complex Multiplication. More... | |
#define | z_eq(a, b) ( (a)->r == (b)->r && (a)->i == (b)->i ) |
Complex equality testing. More... | |
-Functions | |
void | slud_z_div (doublecomplex *, doublecomplex *, doublecomplex *) |
double | slud_z_abs (doublecomplex *) |
double | slud_z_abs1 (doublecomplex *) |
Header for dcomplex.c.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
#define z_add | -( | -- | c, | -
- | - | - | a, | -
- | - | - | b | -
- | ) | -- |
#define z_copy | -( | -- | c, | -
- | - | - | a | -
- | ) | -- |
Complex Copy c = a.
- -#define z_eq | -( | -- | a, | -
- | - | - | b | -
- | ) | -( (a)->r == (b)->r && (a)->i == (b)->i ) | -
Complex equality testing.
- -#define z_sub | -( | -- | c, | -
- | - | - | a, | -
- | - | - | b | -
- | ) | -- |
#define zd_mult | -( | -- | c, | -
- | - | - | a, | -
- | - | - | b | -
- | ) | -- |
Complex-Double Multiplication.
- -#define zz_mult | -( | -- | c, | -
- | - | - | a, | -
- | - | - | b | -
- | ) | -- |
double slud_z_abs | -( | -doublecomplex * | -z | ) | -- |
double slud_z_abs1 | -( | -doublecomplex * | -z | ) | -- |
void slud_z_div | -( | -doublecomplex * | -c, | -
- | - | doublecomplex * | -a, | -
- | - | doublecomplex * | -b | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Defines common arithmetic operations for complex type. -More...
--Functions | |
void | slud_z_div (doublecomplex *c, doublecomplex *a, doublecomplex *b) |
double | slud_z_abs (doublecomplex *z) |
double | slud_z_abs1 (doublecomplex *z) |
Defines common arithmetic operations for complex type.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
double slud_z_abs | -( | -doublecomplex * | -z | ) | -- |
double slud_z_abs1 | -( | -doublecomplex * | -z | ) | -- |
void slud_z_div | -( | -doublecomplex * | -c, | -
- | - | doublecomplex * | -a, | -
- | - | doublecomplex * | -b | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Distribute the input matrix in a distributed compressed row format. -More...
--Functions | |
int | dcreate_dist_matrix (SuperMatrix *A, int_t m, int_t n, int_t nnz, double *nzval_g, int_t *rowind_g, int_t *colptr_g, gridinfo_t *grid) |
Distribute the input matrix in a distributed compressed row format.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 3.2) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -October 2012 - - -Purpose -======= - -DCREATE_DIST_MATRIX reads the global matrix from three input arrays -and distribute it to the processes in a distributed compressed row format. - -Arguments -========= - -A (output) SuperMatrix* - Local matrix A in NR_loc format. - -M (input) int_t - The row number of the global matrix. - -N (input) int_t - The col number of the global matrix. - -NNZ (input) int_t - The number nonzeros in the global matrix. - -NZVAL_G (input) double* - Nonzero values of the global matrix. - -ROWIND_G (input) int_t* - Row indices of the global matrix. - -COLPTR_G (input) int_t* - Columns pointers of the global matrix. - -GRID (input) gridinof_t* - The 2D process mesh. - -
int dcreate_dist_matrix | -( | -SuperMatrix * | -A, | -
- | - | int_t | -m, | -
- | - | int_t | -n, | -
- | - | int_t | -nnz, | -
- | - | double * | -nzval_g, | -
- | - | int_t * | -rowind_g, | -
- | - | int_t * | -colptr_g, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read the matrix from data file. -More...
--Functions | |
int | dcreate_matrix_postfix3d (SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, char *postfix, gridinfo3d_t *grid3d) |
Read the matrix from data file.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 7.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley, -Oak Ridge National Lab. -May 12, 2021 -July 15, 2022 -
int dcreate_matrix_postfix3d | -( | -SuperMatrix * | -A, | -
- | - | int | -nrhs, | -
- | - | double ** | -rhs, | -
- | - | int * | -ldb, | -
- | - | double ** | -x, | -
- | - | int * | -ldx, | -
- | - | FILE * | -fp, | -
- | - | char * | -postfix, | -
- | - | gridinfo3d_t * | -grid3d | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read the matrix from data file. -More...
--Functions | |
int | dcreate_matrix_perturbed (SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, gridinfo_t *grid) |
int | dcreate_matrix_perturbed_postfix (SuperMatrix *A, int nrhs, double **rhs, int *ldb, double **x, int *ldx, FILE *fp, char *postfix, gridinfo_t *grid) |
Read the matrix from data file.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 5.1.3) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -December 31, 2016 -
int dcreate_matrix_perturbed | -( | -SuperMatrix * | -A, | -
- | - | int | -nrhs, | -
- | - | double ** | -rhs, | -
- | - | int * | -ldb, | -
- | - | double ** | -x, | -
- | - | int * | -ldx, | -
- | - | FILE * | -fp, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
int dcreate_matrix_perturbed_postfix | -( | -SuperMatrix * | -A, | -
- | - | int | -nrhs, | -
- | - | double ** | -rhs, | -
- | - | int * | -ldb, | -
- | - | double ** | -x, | -
- | - | int * | -ldx, | -
- | - | FILE * | -fp, | -
- | - | char * | -postfix, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Distribute the matrix onto the 2D process mesh. -More...
--Functions | |
float | ddistribute (superlu_dist_options_t *options, int_t n, SuperMatrix *A, Glu_freeable_t *Glu_freeable, dLUstruct_t *LUstruct, gridinfo_t *grid) |
Distribute the matrix onto the 2D process mesh.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 2.3) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -October 15, 2008 -
float ddistribute | -( | -superlu_dist_options_t * | -options, | -
- | - | int_t | -n, | -
- | - | SuperMatrix * | -A, | -
- | - | Glu_freeable_t * | -Glu_freeable, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
-Purpose -======= - Distribute the matrix onto the 2D process mesh. - -Arguments -========= - -options (input) superlu_dist_options_t * - options->Fact specifies whether or not the L and U structures will be r * = SamePattern_SameRowPerm: L and U structures are input, and - unchanged on exit. - = DOFACT or SamePattern: L and U structures are computed and output. - -n (input) int - Dimension of the matrix. - -A (input) SuperMatrix* - The original matrix A, permuted by columns, of dimension - (A->nrow, A->ncol). The type of A can be: - Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. - -LUstruct (input) dLUstruct_t* - Data structures for L and U factors. - -grid (input) gridinfo_t* - The 2D process mesh. - -Return value -============ - > 0, working storage (in bytes) required to perform redistribution. - (excluding LU factor size) -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Computes row and column scalings. -More...
--Functions | |
void | dgsequ_dist (SuperMatrix *A, double *r, double *c, double *rowcnd, double *colcnd, double *amax, int_t *info) |
Computes row and column scalings.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-void dgsequ_dist | -( | -SuperMatrix * | -A, | -
- | - | double * | -r, | -
- | - | double * | -c, | -
- | - | double * | -rowcnd, | -
- | - | double * | -colcnd, | -
- | - | double * | -amax, | -
- | - | int_t * | -info | -
- | ) | -- |
- Purpose - ======= - - DGSEQU_dist computes row and column scalings intended to equilibrate an - M-by-N sparse matrix A and reduce its condition number. R returns the row - scale factors and C the column scale factors, chosen to try to make - the largest element in each row and column of the matrix B with - elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. - - R(i) and C(j) are restricted to be between SMLNUM = smallest safe - number and BIGNUM = largest safe number. Use of these scaling - factors is not guaranteed to reduce the condition number of A but - works well in practice. - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input) SuperMatrix* - The matrix of dimension (A->nrow, A->ncol) whose equilibration - factors are to be computed. The type of A can be: - Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE. - - R (output) double*, size A->nrow - If INFO = 0 or INFO > M, R contains the row scale factors - for A. - - C (output) double*, size A->ncol - If INFO = 0, C contains the column scale factors for A. - - ROWCND (output) double* - If INFO = 0 or INFO > M, ROWCND contains the ratio of the - smallest R(i) to the largest R(i). If ROWCND >= 0.1 and - AMAX is neither too large nor too small, it is not worth - scaling by R. - - COLCND (output) double* - If INFO = 0, COLCND contains the ratio of the smallest - C(i) to the largest C(i). If COLCND >= 0.1, it is not - worth scaling by C. - - AMAX (output) double* - Absolute value of largest matrix element. If AMAX is very - close to overflow or very close to underflow, the matrix - should be scaled. - - INFO (output) int* - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, and i is - <= M: the i-th row of A is exactly zero - > M: the (i-M)-th column of A is exactly zero - - ===================================================================== -
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Fortran code for reading a sparse matrix in Harwell-Boeing format. -More...
--Functions/Subroutines | |
subroutine | dhbcode1 (nrow, ncol, nnzero, values, rowind, colptr) |
Fortran code for reading a sparse matrix in Harwell-Boeing format.
-subroutine dhbcode1 | -( | -integer | -nrow, | -
- | - | integer | -ncol, | -
- | - | integer | -nnzero, | -
- | - | real*8, dimension (*) | -values, | -
- | - | integer, dimension (*) | -rowind, | -
- | - | integer, dimension (*) | -colptr | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
File in SRC | Includes file in SRC/TRF3dV100 |
---|---|
pdgssvx3d.c | superlu_summit.h |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | dispatch_histogram.cuh |
file | dispatch_radix_sort.cuh |
file | dispatch_reduce.cuh |
file | dispatch_reduce_by_key.cuh |
file | dispatch_rle.cuh |
file | dispatch_scan.cuh |
file | dispatch_select_if.cuh |
file | dispatch_spmv_orig.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | agent_histogram.cuh |
file | agent_radix_sort_downsweep.cuh |
file | agent_radix_sort_upsweep.cuh |
file | agent_reduce.cuh |
file | agent_reduce_by_key.cuh |
file | agent_rle.cuh |
file | agent_scan.cuh |
file | agent_segment_fixup.cuh |
file | agent_select_if.cuh |
file | agent_spmv_orig.cuh |
file | single_pass_scan_operators.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Directories | |
directory | cub |
directory | TRF3dV100 |
-Files | |
file | acc_aux.c |
file | acc_aux.h [code] |
file | colamd.c |
A sparse matrix column ordering algorithm. | |
file | colamd.h [code] |
Colamd prototypes and definitions. | |
file | comm.c |
Broadcast an array of dtype numbers. | |
file | comm_tree.c |
file | communication_aux.c |
Auxiliary routines to support communication in 3D algorithms. | |
file | d_c2cpp_GetHWPM.cpp |
Get heavy-weight perfect matching (HWPM). | |
file | dbinary_io.c |
file | dcommunication_aux.c |
file | dcomplex.h [code] |
Header for dcomplex.c. | |
file | dcomplex_dist.c |
Defines common arithmetic operations for complex type. | |
file | ddistribute.c |
Distribute the matrix onto the 2D process mesh. | |
file | dgather.c |
file | dgsequ_dist.c |
Computes row and column scalings. | |
file | dHWPM_CombBLAS.hpp [code] |
Get HWPM, heavy-weight perfect matching. | |
file | dlangs_dist.c |
Returns the value of the one norm, the infinity norm, or the element of largest value. | |
file | dlaqgs_dist.c |
Equilibrates a general sparse M by N matrix A. | |
file | dldperm_dist.c |
Finds a row permutation so that the matrix has large entries on the diagonal. | |
file | dlook_ahead_update.c |
file | dlustruct_gpu.h [code] |
Descriptions and declarations for structures used in GPU. | |
file | dmach_dist.c |
file | dmemory_dist.c |
Memory utilities. | |
file | dmyblas2_dist.c |
Level 2 BLAS operations: solves and matvec, written in C. | |
file | dnrformat_loc3d.c |
file | dreadhb.c |
Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format. | |
file | dreadMM.c |
Contributed by Francois-Henry Rouet. | |
file | dreadrb.c |
Read a matrix stored in Rutherford-Boeing format. | |
file | dreadtriple.c |
file | dreadtriple_noheader.c |
file | dscatter.c |
file | dscatter3d.c |
file | dSchCompUdt-2Ddynamic.c |
file | dSchCompUdt-gpu.c |
file | dSchCompUdt-gpuOffload-alt.c |
file | dsp_blas2_dist.c |
Sparse BLAS 2, using some dense BLAS 2 operations. | |
file | dsp_blas3_dist.c |
Sparse BLAS3, using some dense BLAS3 operations. | |
file | dstatic_schedule.c |
Performs static scheduling for the look-ahead factorization algorithm. | |
file | dsuperlu_blas.c |
file | dsuperlu_gpu.cu |
file | dsuperlu_gpu.hip.cpp |
file | dtreeFactorization.c |
file | dtreeFactorizationGPU.c |
Factorization routines for the subtree using 2D process grid, with GPUs. | |
file | dtrfAux.c |
file | dtrfCommWrapper.c |
file | dutil_dist.c |
Several matrix utilities. | |
file | etree.c |
Elimination tree computation and layout routines. | |
file | get_perm_c.c |
Gets matrix permutation. | |
file | get_perm_c_parmetis.c |
Gets matrix permutation. | |
file | gpu_api_utils.c |
file | gpu_api_utils.h [code] |
file | gpu_wrapper.h [code] |
Wrappers for multiple types of GPUs. | |
file | html_mainpage.h [code] |
file | machines.h [code] |
These macros define which machine will be used. | |
file | mc64ad_dist.c |
Permute large entries to the main diagonal. | |
file | memory.c |
Memory utilities. | |
file | mmd.c |
Implements the minimum degree algorithm. | |
file | old_colamd.c |
An approximate minimum degree column ordering algorithm. | |
file | old_colamd.h [code] |
colamd include file | |
file | pd3dcomm.c |
file | pddistribute.c |
Re-distribute A on the 2D process mesh. | |
file | pdGetDiagU.c |
file | pdgsequ.c |
Computes row and column scalings. | |
file | pdgsmv.c |
Parallel sparse matrix-vector multiplication. | |
file | pdgsmv_AXglobal.c |
Performs sparse matrix-vector multiplication. | |
file | pdgsrfs.c |
Improves the computed solution to a system of linear equations and provides error bounds and backward error estimates. | |
file | pdgsrfs_ABXglobal.c |
Improves the computed solution and provies error bounds. | |
file | pdgssvx.c |
Solves a system of linear equations A*X=B. | |
file | pdgssvx3d.c |
Solves a system of linear equations A*X=B using 3D process grid. | |
file | pdgssvx_ABglobal.c |
Solves a system of linear equations A*X=B,. | |
file | pdgstrf.c |
Performs LU factorization in parallel. | |
file | pdgstrf2.c |
Performs panel LU factorization. | |
file | pdgstrf3d.c |
Performs LU factorization in 3D process grid. | |
file | pdgstrf_irecv.c |
Performs LU factorization in parallel. | |
file | pdgstrf_sherry.c |
file | pdgstrf_X1.c |
Performs the LU factorization in parallel. | |
file | pdgstrs.c |
Solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factors computed previously. | |
file | pdgstrs1.c |
Solves a system of distributed linear equations. | |
file | pdgstrs_Bglobal.c |
Solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factorization. | |
file | pdgstrs_Bglobal_Bsend.c |
Solves a system of distributed linear equations. | |
file | pdgstrs_lsum.c |
Perform local block modifications: lsum[i] -= L_i,k * X[k]. | |
file | pdgstrs_lsum_cuda.cu |
file | pdgstrs_lsum_cuda.hip.cpp |
file | pdgstrsL.c |
Solves a lower triangular system L*X = B, with L being the lower triangular factor computed previously by PDGSTRF. | |
file | pdlangs.c |
Returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest value. | |
file | pdlaqgs.c |
Equilibrates a general sparse M by N matrix. | |
file | pdsymbfact_distdata.c |
Redistribute the symbolic structure of L and U from the distribution. | |
file | pdutil.c |
Several matrix utilities. | |
file | ps3dcomm.c |
file | psdistribute.c |
Re-distribute A on the 2D process mesh. | |
file | psGetDiagU.c |
file | psgsequ.c |
Computes row and column scalings. | |
file | psgsequb.c |
Computes row and column scalings, restricting the scale factors to be power-of-radix. | |
file | psgsmv.c |
Parallel sparse matrix-vector multiplication. | |
file | psgsmv_AXglobal.c |
Performs sparse matrix-vector multiplication. | |
file | psgsmv_d2.c |
Parallel sparse matrix-vector multiplication. | |
file | psgsrfs.c |
Improves the computed solution to a system of linear equations and provides error bounds and backward error estimates. | |
file | psgsrfs_ABXglobal.c |
Improves the computed solution and provies error bounds. | |
file | psgsrfs_d2.c |
Improves the computed solution to a system of linear equations and provides error bounds and backward error estimates. | |
file | psgssvx.c |
Solves a system of linear equations A*X=B. | |
file | psgssvx3d.c |
Solves a system of linear equations A*X=B using 3D process grid. | |
file | psgssvx_ABglobal.c |
Solves a system of linear equations A*X=B,. | |
file | psgssvx_d2.c |
Solves a system of linear equations A*X=B. | |
file | psgstrf.c |
Performs LU factorization in parallel. | |
file | psgstrf2.c |
Performs panel LU factorization. | |
file | psgstrf3d.c |
Performs LU factorization in 3D process grid. | |
file | psgstrs.c |
Solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factors computed previously. | |
file | psgstrs1.c |
Solves a system of distributed linear equations. | |
file | psgstrs_Bglobal.c |
Solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factorization. | |
file | psgstrs_lsum.c |
Perform local block modifications: lsum[i] -= L_i,k * X[k]. | |
file | pslangs.c |
Returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest value. | |
file | pslaqgs.c |
Equilibrates a general sparse M by N matrix. | |
file | pssymbfact_distdata.c |
Redistribute the symbolic structure of L and U from the distribution. | |
file | psutil.c |
Several matrix utilities. | |
file | psymbfact.c |
Implements parallel symbolic factorization. | |
file | psymbfact.h [code] |
Definitions for parallel symbolic factorization routine. | |
file | psymbfact_util.c |
Utilities for parallel symbolic factorization routine. | |
file | pxerr_dist.c |
file | pz3dcomm.c |
file | pzdistribute.c |
Re-distribute A on the 2D process mesh. | |
file | pzGetDiagU.c |
file | pzgsequ.c |
Computes row and column scalings. | |
file | pzgsmv.c |
Parallel sparse matrix-vector multiplication. | |
file | pzgsmv_AXglobal.c |
Performs sparse matrix-vector multiplication. | |
file | pzgsrfs.c |
Improves the computed solution to a system of linear equations and provides error bounds and backward error estimates. | |
file | pzgsrfs_ABXglobal.c |
Improves the computed solution and provies error bounds. | |
file | pzgssvx.c |
Solves a system of linear equations A*X=B. | |
file | pzgssvx3d.c |
Solves a system of linear equations A*X=B using 3D process grid. | |
file | pzgssvx_ABglobal.c |
Solves a system of linear equations A*X=B,. | |
file | pzgstrf.c |
Performs LU factorization in parallel. | |
file | pzgstrf2.c |
Performs panel LU factorization. | |
file | pzgstrf3d.c |
Performs LU factorization in 3D process grid. | |
file | pzgstrf_irecv.c |
Performs LU factorization in parallel. | |
file | pzgstrs.c |
Solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factors computed previously. | |
file | pzgstrs1.c |
Solves a system of distributed linear equations. | |
file | pzgstrs_Bglobal.c |
Solves a system of distributed linear equations A*X = B with a general N-by-N matrix A using the LU factorization. | |
file | pzgstrs_lsum.c |
Perform local block modifications: lsum[i] -= L_i,k * X[k]. | |
file | pzlangs.c |
Returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest value. | |
file | pzlaqgs.c |
Equilibrates a general sparse M by N matrix. | |
file | pzsymbfact_distdata.c |
Redistribute the symbolic structure of L and U from the distribution. | |
file | pzutil.c |
Several matrix utilities. | |
file | s_c2cpp_GetHWPM.cpp |
Get heavy-weight perfect matching (HWPM). | |
file | sbinary_io.c |
file | scan.cu |
file | scatter.c |
file | scatter.h [code] |
file | scommunication_aux.c |
file | sdistribute.c |
Distribute the matrix onto the 2D process mesh. | |
file | sec_structs.c |
Auxiliary routines in 3D algorithms. | |
file | sgather.c |
file | sgsequ_dist.c |
Computes row and column scalings. | |
file | slangs_dist.c |
Returns the value of the one norm, the infinity norm, or the element of largest value Modified from SuperLU routine SLANGS. | |
file | slaqgs_dist.c |
Equlibrates a general sprase matrix. | |
file | sldperm_dist.c |
Finds a row permutation so that the matrix has large entries on the diagonal. | |
file | slook_ahead_update.c |
file | slustruct_gpu.h [code] |
Descriptions and declarations for structures used in GPU. | |
file | smach_dist.c |
file | smemory_dist.c |
Memory utilities. | |
file | smyblas2_dist.c |
Level 2 BLAS operations: solves and matvec, written in C. | |
file | snrformat_loc3d.c |
file | sp_colorder.c |
Permutes the columns of the original matrix. | |
file | sp_ienv.c |
Chooses machine-dependent parameters for the local environment. | |
file | sreadhb.c |
Read a FLOAT PRECISION matrix stored in Harwell-Boeing format. | |
file | sreadMM.c |
Contributed by Francois-Henry Rouet. | |
file | sreadrb.c |
Read a matrix stored in Rutherford-Boeing format. | |
file | sreadtriple.c |
file | sreadtriple_noheader.c |
file | sscatter.c |
file | sscatter3d.c |
file | sSchCompUdt-2Ddynamic.c |
file | sSchCompUdt-cuda.c |
file | sSchCompUdt-gpu.c |
file | ssp_blas2_dist.c |
Sparse BLAS 2, using some dense BLAS 2 operations. | |
file | ssp_blas3_dist.c |
Sparse BLAS3, using some dense BLAS3 operations. | |
file | sstatic_schedule.c |
Performs static scheduling for the look-ahead factorization algorithm. | |
file | ssuperlu_blas.c |
file | ssuperlu_gpu.cu |
file | ssuperlu_gpu.hip.cpp |
file | streeFactorization.c |
file | streeFactorizationGPU.c |
Factorization routines for the subtree using 2D process grid, with GPUs. | |
file | strfAux.c |
file | strfCommWrapper.c |
file | superlu_ddefs.h [code] |
Distributed SuperLU data types and function prototypes. | |
file | superlu_defs.h [code] |
Definitions which are precision-neutral. | |
file | superlu_dist_config.h [code] |
file | superlu_dist_version.c |
file | superlu_enum_consts.h [code] |
enum constants header file | |
file | superlu_FCnames.h [code] |
Macro definitions. | |
file | superlu_FortranCInterface.h [code] |
file | superlu_gpu.cu |
file | superlu_gpu_utils.cu |
file | superlu_gpu_utils.hip.cpp |
file | superlu_grid.c |
SuperLU grid utilities. | |
file | superlu_grid3d.c |
SuperLU grid utilities. | |
file | superlu_sdefs.h [code] |
Distributed SuperLU data types and function prototypes. | |
file | superlu_timer.c |
Returns the time in seconds used by the process. | |
file | superlu_zdefs.h [code] |
Distributed SuperLU data types and function prototypes. | |
file | supermatrix.h [code] |
Matrix type definitions. | |
file | supernodal_etree.c |
function to generate supernodal etree | |
file | supernodalForest.c |
SuperLU utilities. | |
file | sutil_dist.c |
Several matrix utilities. | |
file | symbfact.c |
Performs a symbolic factorization. | |
file | treeFactorization.c |
factorization routines in 3D algorithms | |
file | treeFactorizationGPU.c |
file | TreeInterface.cpp |
file | trfAux.c |
Auxiliary routines to support 3D algorithms. | |
file | util.c |
Utilities functions. | |
file | util_dist.h [code] |
Header for utilities. | |
file | wingetopt.c |
file | wingetopt.h [code] |
file | xerr_dist.c |
file | z_c2cpp_GetHWPM.cpp |
Get heavy-weight perfect matching (HWPM). | |
file | zbinary_io.c |
file | zcommunication_aux.c |
file | zdistribute.c |
Distribute the matrix onto the 2D process mesh. | |
file | zgather.c |
file | zgsequ_dist.c |
Computes row and column scalings. | |
file | zHWPM_CombBLAS.hpp [code] |
Get HWPM, heavy-weight perfect matching. | |
file | zlangs_dist.c |
Returns the one norm, or the Frobenius norm, or the infinity norm, or the element of largest value. | |
file | zlaqgs_dist.c |
Equilibrates a general sparse M by N matrix A. | |
file | zldperm_dist.c |
Finds a row permutation so that the matrix has large entries on the diagonal. | |
file | zlook_ahead_update.c |
file | zlustruct_gpu.h [code] |
Descriptions and declarations for structures used in GPU. | |
file | zmemory_dist.c |
Memory utilities. | |
file | zmyblas2_dist.c |
Level 2 BLAS operations: solves and matvec, written in C. | |
file | znrformat_loc3d.c |
file | zreadhb.c |
Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format. | |
file | zreadMM.c |
Contributed by Francois-Henry Rouet. | |
file | zreadrb.c |
Read a matrix stored in Rutherford-Boeing format. | |
file | zreadtriple.c |
file | zreadtriple_noheader.c |
file | zscatter.c |
file | zscatter3d.c |
file | zSchCompUdt-2Ddynamic.c |
file | zSchCompUdt-gpu.c |
file | zsp_blas2_dist.c |
Sparse BLAS 2, using some dense BLAS 2 operations. | |
file | zsp_blas3_dist.c |
Sparse BLAS3, using some dense BLAS3 operations. | |
file | zstatic_schedule.c |
Performs static scheduling for the look-ahead factorization algorithm. | |
file | zsuperlu_blas.c |
file | zsuperlu_gpu.cu |
file | zsuperlu_gpu.hip.cpp |
file | ztreeFactorization.c |
file | ztreeFactorizationGPU.c |
Factorization routines for the subtree using 2D process grid, with GPUs. | |
file | ztrfAux.c |
file | ztrfCommWrapper.c |
file | zutil_dist.c |
Several matrix utilities. | |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | dcreate_matrix.c |
Read the matrix from data file. | |
file | dcreate_matrix3d.c |
Read the matrix from data file. | |
file | dcreate_matrix_perturbed.c |
Read the matrix from data file. | |
file | dnrformat_loc3d.c |
file | dreadhb.c |
Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format. | |
file | dreadtriple.c |
file | pddrive.c |
Driver program for PDGSSVX example. | |
file | pddrive1.c |
Driver program for PDGSSVX example. | |
file | pddrive1_ABglobal.c |
Driver program for pdgssvx_ABglobal example. | |
file | pddrive2.c |
Driver program for PDGSSVX example. | |
file | pddrive2_ABglobal.c |
Driver program for pdgssvx_ABglobal example. | |
file | pddrive3.c |
Driver program for PDGSSVX example. | |
file | pddrive3_ABglobal.c |
Driver program for pdgssvx_ABglobal example. | |
file | pddrive3d.c |
Driver program for PDGSSVX3D example. | |
file | pddrive3d1.c |
Driver program for PDGSSVX3D example. | |
file | pddrive3d2.c |
Driver program for PDGSSVX3D example. | |
file | pddrive3d3.c |
Driver program for PDGSSVX3D example. | |
file | pddrive4.c |
This example illustrates how to divide up the processes into subgroups. | |
file | pddrive4_ABglobal.c |
This example illustrates how to divide up the processes into subgroups. | |
file | pddrive_ABglobal.c |
Driver program for pdgssvx_ABglobal example. | |
file | pddrive_spawn.c |
Driver program for PDGSSVX example. | |
file | psdrive.c |
Driver program for PSGSSVX example. | |
file | psdrive1.c |
Driver program for PSGSSVX example. | |
file | psdrive1_ABglobal.c |
Driver program for psgssvx_ABglobal example. | |
file | psdrive2.c |
Driver program for PSGSSVX example. | |
file | psdrive2_ABglobal.c |
Driver program for psgssvx_ABglobal example. | |
file | psdrive3.c |
Driver program for PSGSSVX example. | |
file | psdrive3_ABglobal.c |
Driver program for psgssvx_ABglobal example. | |
file | psdrive3d.c |
Driver program for PSGSSVX3D example. | |
file | psdrive3d1.c |
Driver program for PSGSSVX3D example. | |
file | psdrive3d2.c |
Driver program for PSGSSVX3D example. | |
file | psdrive3d3.c |
Driver program for PSGSSVX3D example. | |
file | psdrive4.c |
This example illustrates how to divide up the processes into subgroups. | |
file | psdrive4_ABglobal.c |
This example illustrates how to divide up the processes into subgroups. | |
file | psdrive_ABglobal.c |
Driver program for psgssvx_ABglobal example. | |
file | psgsrfs_tracking.c |
Improves the computed solution to a system of linear equations and provides error bounds and backward error estimates. | |
file | psgssvx_tracking.c |
Solves a system of linear equations A*X=B. | |
file | pzdrive.c |
Driver program for PZGSSVX example. | |
file | pzdrive1.c |
Driver program for PZGSSVX example. | |
file | pzdrive1_ABglobal.c |
Driver program for pzgssvx_ABglobal example. | |
file | pzdrive2.c |
Driver program for PZGSSVX example. | |
file | pzdrive2_ABglobal.c |
Driver program for pzgssvx_ABglobal example. | |
file | pzdrive3.c |
Driver program for PZGSSVX example. | |
file | pzdrive3_ABglobal.c |
Driver program for pzgssvx_ABglobal example. | |
file | pzdrive3d.c |
Driver program for PZGSSVX3D example. | |
file | pzdrive3d1.c |
Driver program for PZGSSVX3D example. | |
file | pzdrive3d2.c |
Driver program for PZGSSVX3D example. | |
file | pzdrive3d3.c |
Driver program for PZGSSVX3D example. | |
file | pzdrive4.c |
This example illustrates how to divide up the processes into subgroups. | |
file | pzdrive4_ABglobal.c |
This example illustrates how to divide up the processes into subgroups. | |
file | pzdrive_ABglobal.c |
Driver program for pzgssvx_ABglobal example. | |
file | pzdrive_spawn.c |
Driver program for PZGSSVX example. | |
file | pzgsmv.c |
file | pzgstrs_Bglobal_Bsend.c |
Solves a system of distributed linear equations. | |
file | pzgstrs_lsum_Bsend.c |
Performs block modifications. | |
file | screate_A_x_b.c |
Read the matrix from data file. | |
file | screate_matrix.c |
Read the matrix from data file. | |
file | screate_matrix3d.c |
Read the matrix from data file. | |
file | screate_matrix_perturbed.c |
Read the matrix from data file. | |
file | zcreate_matrix.c |
Read the matrix from data file. | |
file | zcreate_matrix3d.c |
Read the matrix from data file. | |
file | zcreate_matrix_perturbed.c |
Read the matrix from data file. | |
file | zlook_ahead_update.c |
file | zreadhb.c |
Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format. | |
file | zreadtriple.c |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Directories | |
directory | specializations |
-Files | |
file | block_adjacent_difference.cuh |
file | block_discontinuity.cuh |
file | block_exchange.cuh |
file | block_histogram.cuh |
file | block_load.cuh |
file | block_radix_rank.cuh |
file | block_radix_sort.cuh |
file | block_raking_layout.cuh |
file | block_reduce.cuh |
file | block_scan.cuh |
file | block_shuffle.cuh |
file | block_store.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | block_histogram_atomic.cuh |
file | block_histogram_sort.cuh |
file | block_reduce_raking.cuh |
file | block_reduce_raking_commutative_only.cuh |
file | block_reduce_warp_reductions.cuh |
file | block_scan_raking.cuh |
file | block_scan_warp_scans.cuh |
file | block_scan_warp_scans2.cuh |
file | block_scan_warp_scans3.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | grid_barrier.cuh |
file | grid_even_share.cuh |
file | grid_mapping.cuh |
file | grid_queue.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | dcreate_matrix.c |
Read the matrix from data file. | |
file | pdcompute_resid.c |
Test for small residual. | |
file | pdtest.c |
Driver program for testing PDGSSVX. | |
file | pzcompute_resid.c |
Test for small residual. | |
file | pztest.c |
Driver program for testing PZGSSVX. | |
file | zcreate_matrix.c |
Read the matrix from data file. | |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | c2f_dcreate_matrix_x_b.c |
Read the matrix from data file, then distribute it in a distributed CSR format. | |
file | c2f_zcreate_matrix_x_b.c |
Read the matrix from data file, then distribute it in a distributed CSR format. | |
file | c_fortran_pdgssvx_ABglobal.c |
file | c_fortran_slugrid.c |
file | dcreate_dist_matrix.c |
Distribute the input matrix in a distributed compressed row format. | |
file | dhbcode1.f90 |
Fortran code for reading a sparse matrix in Harwell-Boeing format. | |
file | f_pddrive_ABglobal.f |
file | f_pddrive_old.f90 |
file | sp_ienv.c |
Chooses machine-dependent parameters for the local environment. | |
file | superlu_c2f_dwrap.c |
file | superlu_c2f_wrap.c |
file | superlu_c2f_zwrap.c |
file | superlu_mod.f90 |
This module contains Fortran-side wrappers for the SuperLU get/set functions. | |
file | superlupara.f90 |
This module contains some parameter used in SuperLU for Fortran90 user. | |
file | zcreate_dist_matrix.c |
Distribute the input matrix in a distributed compressed row format. | |
file | zhbcode1.f90 |
Fortran code for reading a sparse matrix in Harwell-Boeing format. | |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | anc25d.cpp |
file | anc25d.hpp [code] |
file | commWrapper.cpp |
file | commWrapper.hpp [code] |
file | dAncestorFactor.cpp |
file | dsparseTreeFactor_summit.cpp |
file | dsparseTreeFactorGPU.cpp |
file | l_panels.cpp |
file | lu_common.hpp [code] |
file | LUgpuCHandle_interface.cpp |
file | lupanels.cpp |
file | lupanels.hpp [code] |
file | lupanels_comm3d.cpp |
file | lupanels_GPU.cpp |
file | lupanels_GPU.cuh |
file | lupanelsComm3dGPU.cpp |
file | pdgstrf3d_summit.cpp |
file | schurCompUpdate.cu |
file | superlu_summit.h [code] |
file | u_panels.cpp |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | thread_load.cuh |
file | thread_operators.cuh |
file | thread_reduce.cuh |
file | thread_scan.cuh |
file | thread_search.cuh |
file | thread_store.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | arg_index_input_iterator.cuh |
file | cache_modified_input_iterator.cuh |
file | cache_modified_output_iterator.cuh |
file | constant_input_iterator.cuh |
file | counting_input_iterator.cuh |
file | discard_output_iterator.cuh |
file | tex_obj_input_iterator.cuh |
file | tex_ref_input_iterator.cuh |
file | transform_input_iterator.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Directories | |
directory | dispatch |
-Files | |
file | device_histogram.cuh |
file | device_partition.cuh |
file | device_radix_sort.cuh |
file | device_reduce.cuh |
file | device_run_length_encode.cuh |
file | device_scan.cuh |
file | device_segmented_radix_sort.cuh |
file | device_segmented_reduce.cuh |
file | device_select.cuh |
file | device_spmv.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Directories | |
directory | specializations |
-Files | |
file | warp_reduce.cuh |
file | warp_scan.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Files | |
file | warp_reduce_shfl.cuh |
file | warp_reduce_smem.cuh |
file | warp_scan_shfl.cuh |
file | warp_scan_smem.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Directories | |
directory | agent |
directory | block |
directory | device |
directory | grid |
directory | host |
directory | iterator |
directory | thread |
directory | warp |
-Files | |
file | cub.cuh |
file | util_allocator.cuh |
file | util_arch.cuh |
file | util_debug.cuh |
file | util_device.cuh |
file | util_macro.cuh |
file | util_namespace.cuh |
file | util_ptx.cuh |
file | util_type.cuh |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Returns the value of the one norm, the infinity norm, or the element of largest value. -More...
--Functions | |
double | dlangs_dist (char *norm, SuperMatrix *A) |
Returns the value of the one norm, the infinity norm, or the element of largest value.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-double dlangs_dist | -( | -char * | -norm, | -
- | - | SuperMatrix * | -A | -
- | ) | -- |
- Purpose - ======= - - DLANGS_dist returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real matrix A. - - Description - =========== - - DLANGE returns the value - - DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in DLANGE as described above. - A (input) SuperMatrix* - The M by N sparse matrix A. - - ===================================================================== -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Equilibrates a general sparse M by N matrix A. -More...
--Macros | |
#define | THRESH (0.1) |
-Functions | |
void | dlaqgs_dist (SuperMatrix *A, double *r, double *c, double rowcnd, double colcnd, double amax, char *equed) |
Equilibrates a general sparse M by N matrix A.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-#define THRESH (0.1) | -
void dlaqgs_dist | -( | -SuperMatrix * | -A, | -
- | - | double * | -r, | -
- | - | double * | -c, | -
- | - | double | -rowcnd, | -
- | - | double | -colcnd, | -
- | - | double | -amax, | -
- | - | char * | -equed | -
- | ) | -- |
- Purpose - ======= - - DLAQGS_dist equilibrates a general sparse M by N matrix A using the row - and column scaling factors in the vectors R and C. - - See supermatrix.h for the definition of 'SuperMatrix' structure. - - Arguments - ========= - - A (input/output) SuperMatrix* - On exit, the equilibrated matrix. See EQUED for the form of - the equilibrated matrix. The type of A can be: - Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE. - - R (input) double*, dimension (A->nrow) - The row scale factors for A. - - C (input) double*, dimension (A->ncol) - The column scale factors for A. - - ROWCND (input) double - Ratio of the smallest R(i) to the largest R(i). - - COLCND (input) double - Ratio of the smallest C(i) to the largest C(i). - - AMAX (input) double - Absolute value of largest matrix entry. - - EQUED (output) char* - Specifies the form of equilibration that was done. - = 'N': No equilibration - = 'R': Row equilibration, i.e., A has been premultiplied by - diag(R). - = 'C': Column equilibration, i.e., A has been postmultiplied - by diag(C). - = 'B': Both row and column equilibration, i.e., A has been - replaced by diag(R) * A * diag(C). - - Internal Parameters - =================== - - THRESH is a threshold value used to decide if row or column scaling - should be done based on the ratio of the row or column scaling - factors. If ROWCND < THRESH, row scaling is done, and if - COLCND < THRESH, column scaling is done. - - LARGE and SMALL are threshold values used to decide if row scaling - should be done based on the absolute size of the largest matrix - element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. - - ===================================================================== -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Finds a row permutation so that the matrix has large entries on the diagonal. -More...
-#include "superlu_ddefs.h"
-Functions | |
int | mc64ad_dist (int *job, int *n, int_t *ne, int_t *ip, int_t *irn, double *a, int *num, int_t *cperm, int_t *liw, int_t *iw, int_t *ldw, double *dw, int *icntl, int *info) |
int | dldperm_dist (int job, int n, int_t nnz, int_t colptr[], int_t adjncy[], double nzval[], int_t *perm, double u[], double v[]) |
Finds a row permutation so that the matrix has large entries on the diagonal.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
int dldperm_dist | -( | -int | -job, | -
- | - | int | -n, | -
- | - | int_t | -nnz, | -
- | - | int_t | -colptr[], | -
- | - | int_t | -adjncy[], | -
- | - | double | -nzval[], | -
- | - | int_t * | -perm, | -
- | - | double | -u[], | -
- | - | double | -v[] | -
- | ) | -- |
-Purpose -======= - - DLDPERM finds a row permutation so that the matrix has large - entries on the diagonal. - -Arguments -========= - -job (input) int - Control the action. Possible values for JOB are: - = 1 : Compute a row permutation of the matrix so that the - permuted matrix has as many entries on its diagonal as - possible. The values on the diagonal are of arbitrary size. - HSL subroutine MC21A/AD is used for this. - = 2 : Compute a row permutation of the matrix so that the smallest - value on the diagonal of the permuted matrix is maximized. - = 3 : Compute a row permutation of the matrix so that the smallest - value on the diagonal of the permuted matrix is maximized. - The algorithm differs from the one used for JOB = 2 and may - have quite a different performance. - = 4 : Compute a row permutation of the matrix so that the sum - of the diagonal entries of the permuted matrix is maximized. - = 5 : Compute a row permutation of the matrix so that the product - of the diagonal entries of the permuted matrix is maximized - and vectors to scale the matrix so that the nonzero diagonal - entries of the permuted matrix are one in absolute value and - all the off-diagonal entries are less than or equal to one in - absolute value. - Restriction: 1 <= JOB <= 5. - -n (input) int - The order of the matrix. - -nnz (input) int - The number of nonzeros in the matrix. - -adjncy (input) int*, of size nnz - The adjacency structure of the matrix, which contains the row - indices of the nonzeros. - -colptr (input) int*, of size n+1 - The pointers to the beginning of each column in ADJNCY. - -nzval (input) double*, of size nnz - The nonzero values of the matrix. nzval[k] is the value of - the entry corresponding to adjncy[k]. - It is not used if job = 1. - -perm (output) int*, of size n - The permutation vector. perm[i] = j means row i in the - original matrix is in row j of the permuted matrix. - -u (output) double*, of size n - If job = 5, the natural logarithms of the row scaling factors. - -v (output) double*, of size n - If job = 5, the natural logarithms of the column scaling factors. - The scaled matrix B has entries b_ij = a_ij * exp(u_i + v_j). -
int mc64ad_dist | -( | -int * | -job, | -
- | - | int * | -n, | -
- | - | int_t * | -ne, | -
- | - | int_t * | -ip, | -
- | - | int_t * | -irn, | -
- | - | double * | -a, | -
- | - | int * | -num, | -
- | - | int_t * | -cperm, | -
- | - | int_t * | -liw, | -
- | - | int_t * | -iw, | -
- | - | int_t * | -ldw, | -
- | - | double * | -dw, | -
- | - | int * | -icntl, | -
- | - | int * | -info | -
- | ) | -- |
-This subroutine attempts to find a column permutation for an NxN -sparse matrix A = {a_ij} that makes the permuted matrix have N -entries on its diagonal. -If the matrix is structurally nonsingular, the subroutine optionally -returns a column permutation that maximizes the smallest element -on the diagonal, maximizes the sum of the diagonal entries, or -maximizes the product of the diagonal entries of the permuted matrix. -For the latter option, the subroutine also finds scaling factors -that may be used to scale the matrix so that the nonzero diagonal -entries of the permuted matrix are one in absolute value and all the -off-diagonal entries are less than or equal to one in absolute value. -The natural logarithms of the scaling factors u(i), i=1..N, for the -rows and v(j), j=1..N, for the columns are returned so that the -scaled matrix B = {b_ij} has entries b_ij = a_ij * EXP(u_i + v_j). -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include <assert.h>
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Descriptions and declarations for structures used in GPU. -More...
-Go to the source code of this file.
--Classes | |
struct | dSCUbuf_gpu_t |
struct | dLUstruct_gpu_t |
struct | dsluGPU_t |
-Macros | |
#define | MAX_NGPU_STREAMS 32 |
#define | checkGPUErrors(val) check ( (val), #val, __FILE__, __LINE__ ) |
-Functions | |
static void | check (gpuError_t result, char const *const func, const char *const file, int const line) |
int | dsparseTreeFactor_ASYNC_GPU (sForest_t *sforest, commRequests_t **comReqss, dscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, dLUValSubBuf_t **LUvsbs, ddiagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int ldt, dsluGPU_t *sluGPU, d2Hreduce_t *d2Hred, HyP_t *HyP, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info) |
int | dinitD2Hreduce (int next_k, d2Hreduce_t *d2Hred, int last_flag, HyP_t *HyP, dsluGPU_t *sluGPU, gridinfo_t *grid, dLUstruct_t *LUstruct, SCT_t *SCT) |
int | dreduceGPUlu (int last_flag, d2Hreduce_t *d2Hred, dsluGPU_t *sluGPU, SCT_t *SCT, gridinfo_t *grid, dLUstruct_t *LUstruct) |
int | dwaitGPUscu (int streamId, dsluGPU_t *sluGPU, SCT_t *SCT) |
int | dsendLUpanelGPU2HOST (int_t k0, d2Hreduce_t *d2Hred, dsluGPU_t *sluGPU, SuperLUStat_t *) |
int | dsendSCUdataHost2GPU (int_t streamId, int_t *lsub, int_t *usub, double *bigU, int_t bigu_send_size, int_t Remain_lbuf_send_size, dsluGPU_t *sluGPU, HyP_t *HyP) |
int | dinitSluGPU3D_t (dsluGPU_t *sluGPU, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, int_t *perm_c_supno, int_t n, int_t buffer_size, int_t bigu_size, int_t ldt, SuperLUStat_t *) |
int | dSchurCompUpdate_GPU (int_t streamId, int_t jj_cpu, int_t nub, int_t klst, int_t knsupc, int_t Rnbrow, int_t RemainBlk, int_t Remain_lbuf_send_size, int_t bigu_send_size, int_t ldu, int_t mcb, int_t buffer_size, int_t lsub_len, int_t usub_len, int_t ldt, int_t k0, dsluGPU_t *sluGPU, gridinfo_t *grid, SuperLUStat_t *) |
void | dCopyLUToGPU3D (int *isNodeInMyGrid, dLocalLU_t *A_host, dsluGPU_t *sluGPU, Glu_persist_t *Glu_persist, int_t n, gridinfo3d_t *grid3d, int_t buffer_size, int_t bigu_size, int_t ldt, SuperLUStat_t *) |
int | dreduceAllAncestors3d_GPU (int_t ilvl, int_t *myNodeCount, int_t **treePerm, dLUValSubBuf_t *LUvsb, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, dsluGPU_t *sluGPU, d2Hreduce_t *d2Hred, factStat_t *factStat, HyP_t *HyP, SCT_t *SCT, SuperLUStat_t *) |
void | dsyncAllfunCallStreams (dsluGPU_t *sluGPU, SCT_t *SCT) |
int | dfree_LUstruct_gpu (dsluGPU_t *sluGPU, SuperLUStat_t *) |
void | dPrint_matrix (char *desc, int_t m, int_t n, double *dA, int_t lda) |
Descriptions and declarations for structures used in GPU.
---- Distributed SuperLU routine (version 7.2) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley, -Georgia Institute of Technology, Oak Ridge National Laboratory -March 14, 2021 version 7.0.0 - -Last update: December 12, 2021 v7.2.0 -
#define checkGPUErrors | -( | -- | val | ) | -check ( (val), #val, __FILE__, __LINE__ ) | -
#define MAX_NGPU_STREAMS 32 | -
-
|
- -static | -
void dCopyLUToGPU3D | -( | -int * | -isNodeInMyGrid, | -
- | - | dLocalLU_t * | -A_host, | -
- | - | dsluGPU_t * | -sluGPU, | -
- | - | Glu_persist_t * | -Glu_persist, | -
- | - | int_t | -n, | -
- | - | gridinfo3d_t * | -grid3d, | -
- | - | int_t | -buffer_size, | -
- | - | int_t | -bigu_size, | -
- | - | int_t | -ldt, | -
- | - | SuperLUStat_t * | -- |
- | ) | -- |
int dfree_LUstruct_gpu | -( | -dsluGPU_t * | -sluGPU, | -
- | - | SuperLUStat_t * | -- |
- | ) | -- |
int dinitD2Hreduce | -( | -int | -next_k, | -
- | - | d2Hreduce_t * | -d2Hred, | -
- | - | int | -last_flag, | -
- | - | HyP_t * | -HyP, | -
- | - | dsluGPU_t * | -sluGPU, | -
- | - | gridinfo_t * | -grid, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
int dinitSluGPU3D_t | -( | -dsluGPU_t * | -sluGPU, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo3d_t * | -grid3d, | -
- | - | int_t * | -perm_c_supno, | -
- | - | int_t | -n, | -
- | - | int_t | -buffer_size, | -
- | - | int_t | -bigu_size, | -
- | - | int_t | -ldt, | -
- | - | SuperLUStat_t * | -- |
- | ) | -- |
void dPrint_matrix | -( | -char * | -desc, | -
- | - | int_t | -m, | -
- | - | int_t | -n, | -
- | - | double * | -dA, | -
- | - | int_t | -lda | -
- | ) | -- |
int dreduceAllAncestors3d_GPU | -( | -int_t | -ilvl, | -
- | - | int_t * | -myNodeCount, | -
- | - | int_t ** | -treePerm, | -
- | - | dLUValSubBuf_t * | -LUvsb, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo3d_t * | -grid3d, | -
- | - | dsluGPU_t * | -sluGPU, | -
- | - | d2Hreduce_t * | -d2Hred, | -
- | - | factStat_t * | -factStat, | -
- | - | HyP_t * | -HyP, | -
- | - | SCT_t * | -SCT, | -
- | - | SuperLUStat_t * | -- |
- | ) | -- |
int dreduceGPUlu | -( | -int | -last_flag, | -
- | - | d2Hreduce_t * | -d2Hred, | -
- | - | dsluGPU_t * | -sluGPU, | -
- | - | SCT_t * | -SCT, | -
- | - | gridinfo_t * | -grid, | -
- | - | dLUstruct_t * | -LUstruct | -
- | ) | -- |
int dSchurCompUpdate_GPU | -( | -int_t | -streamId, | -
- | - | int_t | -jj_cpu, | -
- | - | int_t | -nub, | -
- | - | int_t | -klst, | -
- | - | int_t | -knsupc, | -
- | - | int_t | -Rnbrow, | -
- | - | int_t | -RemainBlk, | -
- | - | int_t | -Remain_lbuf_send_size, | -
- | - | int_t | -bigu_send_size, | -
- | - | int_t | -ldu, | -
- | - | int_t | -mcb, | -
- | - | int_t | -buffer_size, | -
- | - | int_t | -lsub_len, | -
- | - | int_t | -usub_len, | -
- | - | int_t | -ldt, | -
- | - | int_t | -k0, | -
- | - | dsluGPU_t * | -sluGPU, | -
- | - | gridinfo_t * | -grid, | -
- | - | SuperLUStat_t * | -- |
- | ) | -- |
int dsendLUpanelGPU2HOST | -( | -int_t | -k0, | -
- | - | d2Hreduce_t * | -d2Hred, | -
- | - | dsluGPU_t * | -sluGPU, | -
- | - | SuperLUStat_t * | -- |
- | ) | -- |
int dsendSCUdataHost2GPU | -( | -int_t | -streamId, | -
- | - | int_t * | -lsub, | -
- | - | int_t * | -usub, | -
- | - | double * | -bigU, | -
- | - | int_t | -bigu_send_size, | -
- | - | int_t | -Remain_lbuf_send_size, | -
- | - | dsluGPU_t * | -sluGPU, | -
- | - | HyP_t * | -HyP | -
- | ) | -- |
int dsparseTreeFactor_ASYNC_GPU | -( | -sForest_t * | -sforest, | -
- | - | commRequests_t ** | -comReqss, | -
- | - | dscuBufs_t * | -scuBufs, | -
- | - | packLUInfo_t * | -packLUInfo, | -
- | - | msgs_t ** | -msgss, | -
- | - | dLUValSubBuf_t ** | -LUvsbs, | -
- | - | ddiagFactBufs_t ** | -dFBufs, | -
- | - | factStat_t * | -factStat, | -
- | - | factNodelists_t * | -fNlists, | -
- | - | gEtreeInfo_t * | -gEtreeInfo, | -
- | - | superlu_dist_options_t * | -options, | -
- | - | int_t * | -gIperm_c_supno, | -
- | - | int | -ldt, | -
- | - | dsluGPU_t * | -sluGPU, | -
- | - | d2Hreduce_t * | -d2Hred, | -
- | - | HyP_t * | -HyP, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo3d_t * | -grid3d, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | double | -thresh, | -
- | - | SCT_t * | -SCT, | -
- | - | int | -tag_ub, | -
- | - | int * | -info | -
- | ) | -- |
void dsyncAllfunCallStreams | -( | -dsluGPU_t * | -sluGPU, | -
- | - | SCT_t * | -SCT | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
-Functions | |
double | dmach_dist (char *cmach) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-double dmach_dist | -( | -char * | -cmach | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Memory utilities. -More...
-#include "superlu_ddefs.h"
-Functions | |
void * | duser_malloc_dist (int_t bytes, int_t which_end) |
void | duser_free_dist (int_t bytes, int_t which_end) |
int_t | dQuerySpace_dist (int_t n, dLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, superlu_dist_mem_usage_t *mem_usage) |
void | dallocateA_dist (int_t n, int_t nnz, double **a, int_t **asub, int_t **xa) |
double * | doubleMalloc_dist (int_t n) |
double * | doubleCalloc_dist (int_t n) |
double | dgetLUMem (int_t nodeId, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
double | dmemForest (sForest_t *sforest, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
void | d3D_printMemUse (dtrf3Dpartition_t *trf3Dpartition, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
-Variables | |
SuperLU_LU_stack_t | stack |
Memory utilities.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 4.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -October 1, 2014 -
void d3D_printMemUse | -( | -dtrf3Dpartition_t * | -trf3Dpartition, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo3d_t * | -grid3d | -
- | ) | -- |
void dallocateA_dist | -( | -int_t | -n, | -
- | - | int_t | -nnz, | -
- | - | double ** | -a, | -
- | - | int_t ** | -asub, | -
- | - | int_t ** | -xa | -
- | ) | -- |
double dgetLUMem | -( | -int_t | -nodeId, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo3d_t * | -grid3d | -
- | ) | -- |
double dmemForest | -( | -sForest_t * | -sforest, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo3d_t * | -grid3d | -
- | ) | -- |
double* doubleCalloc_dist | -( | -int_t | -n | ) | -- |
double* doubleMalloc_dist | -( | -int_t | -n | ) | -- |
int_t dQuerySpace_dist | -( | -int_t | -n, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo_t * | -grid, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | superlu_dist_mem_usage_t * | -mem_usage | -
- | ) | -- |
-mem_usage consists of the following fields:
-
|
- -extern | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Level 2 BLAS operations: solves and matvec, written in C. -More...
--Functions | |
void | dlsolve (int ldm, int ncol, double *M, double *rhs) |
void | dusolve (int ldm, int ncol, double *M, double *rhs) |
void | dmatvec (int ldm, int nrow, int ncol, double *M, double *vec, double *Mxvec) |
Level 2 BLAS operations: solves and matvec, written in C.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- SuperLU routine (version 2.0) -- -Univ. of California Berkeley, Xerox Palo Alto Research Center, -and Lawrence Berkeley National Lab. -November 15, 1997 -
void dlsolve | -( | -int | -ldm, | -
- | - | int | -ncol, | -
- | - | double * | -M, | -
- | - | double * | -rhs | -
- | ) | -- |
-Solves a dense UNIT lower triangular system. The unit lower -triangular matrix is stored in a 2D array M(1:nrow,1:ncol). -The solution will be returned in the rhs vector. -
void dmatvec | -( | -int | -ldm, | -
- | - | int | -nrow, | -
- | - | int | -ncol, | -
- | - | double * | -M, | -
- | - | double * | -vec, | -
- | - | double * | -Mxvec | -
- | ) | -- |
-Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec. -The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[]. -
void dusolve | -( | -int | -ldm, | -
- | - | int | -ncol, | -
- | - | double * | -M, | -
- | - | double * | -rhs | -
- | ) | -- |
-Solves a dense upper triangular system. The upper triangular matrix is -stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned -in the rhs vector. -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Contributed by Francois-Henry Rouet. -More...
--Functions | |
void | dreadMM_dist (FILE *fp, int_t *m, int_t *n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) |
static void | dreadrhs (int m, double *b) |
Contributed by Francois-Henry Rouet.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-void dreadMM_dist | -( | -FILE * | -fp, | -
- | - | int_t * | -m, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nonz, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
brief
--Output parameters -================= - (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of - nonzeros in columns of matrix A; (*nzval)[*] the numerical values; - column i of A is given by (*nzval)[k], k = (*rowind)[i],..., - (*rowind)[i+1]-1. -
-
|
- -static | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Read a matrix stored in Rutherford-Boeing format. -More...
--Functions | |
static int | DumpLine (FILE *fp) |
Eat up the rest of the current line. More... | |
static int | ParseIntFormat (char *buf, int_t *num, int_t *size) |
static int | ParseFloatFormat (char *buf, int_t *num, int_t *size) |
static int | ReadVector (FILE *fp, int_t n, int_t *where, int_t perline, int_t persize) |
static int | dReadValues (FILE *fp, int_t n, double *destination, int_t perline, int_t persize) |
static void | FormFullA (int_t n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) |
void | dreadrb_dist (int iam, FILE *fp, int_t *nrow, int_t *ncol, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) |
Read a matrix stored in Rutherford-Boeing format.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 4.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -August 15, 2014 - -
P<h1>urpose
-Read a DOUBLE PRECISION matrix stored in Rutherford-Boeing format as described below.
-Line 1 (A72, A8) Col. 1 - 72 Title (TITLE) Col. 73 - 80 Matrix name / identifier (MTRXID)
-Line 2 (I14, 3(1X, I13)) Col. 1 - 14 Total number of lines excluding header (TOTCRD) Col. 16 - 28 Number of lines for pointers (PTRCRD) Col. 30 - 42 Number of lines for row (or variable) indices (INDCRD) Col. 44 - 56 Number of lines for numerical values (VALCRD)
-Line 3 (A3, 11X, 4(1X, I13)) Col. 1 - 3 Matrix type (see below) (MXTYPE) Col. 15 - 28 Compressed Column: Number of rows (NROW) Elemental: Largest integer used to index variable (MVAR) Col. 30 - 42 Compressed Column: Number of columns (NCOL) Elemental: Number of element matrices (NELT) Col. 44 - 56 Compressed Column: Number of entries (NNZERO) Elemental: Number of variable indeces (NVARIX) Col. 58 - 70 Compressed Column: Unused, explicitly zero Elemental: Number of elemental matrix entries (NELTVL)
-Line 4 (2A16, A20) Col. 1 - 16 Fortran format for pointers (PTRFMT) Col. 17 - 32 Fortran format for row (or variable) indices (INDFMT) Col. 33 - 52 Fortran format for numerical values of coefficient matrix (VALFMT) (blank in the case of matrix patterns)
-The three character type field on line 3 describes the matrix type. The following table lists the permitted values for each of the three characters. As an example of the type field, RSA denotes that the matrix is real, symmetric, and assembled.
-First Character: R Real matrix C Complex matrix I integer matrix P Pattern only (no numerical values supplied) Q Pattern only (numerical values supplied in associated auxiliary value file)
-Second Character: S Symmetric U Unsymmetric H Hermitian Z Skew symmetric R Rectangular
-Third Character: A Compressed column form E Elemental form
-void dreadrb_dist | -( | -int | -iam, | -
- | - | FILE * | -fp, | -
- | - | int_t * | -nrow, | -
- | - | int_t * | -ncol, | -
- | - | int_t * | -nonz, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
-
|
- -static | -
-
|
- -static | -
Eat up the rest of the current line.
-
-
|
- -static | -
-On input, nonz/nzval/rowind/colptr represents lower part of a symmetric -matrix. On exit, it represents the full matrix with lower and upper parts. -
-
|
- -static | -
-
|
- -static | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions | |
void | dreadtriple_noheader (FILE *fp, int_t *m, int_t *n, int_t *nonz, double **nzval, int_t **rowind, int_t **colptr) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-void dreadtriple_noheader | -( | -FILE * | -fp, | -
- | - | int_t * | -m, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nonz, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr | -
- | ) | -- |
brief
--Output parameters -================= - (nzval, rowind, colptr): (*rowind)[*] contains the row subscripts of - nonzeros in columns of matrix A; (*nzval)[*] the numerical values; - column i of A is given by (*nzval)[k], k = (*rowind)[i],..., - (*rowind)[i+1]-1. -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
-Macros | |
#define | ISORT |
#define | SCATTER_U_CPU scatter_u |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-#define ISORT | -
#define SCATTER_U_CPU scatter_u | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Sparse BLAS 2, using some dense BLAS 2 operations. -More...
-#include "superlu_ddefs.h"
-Functions | |
void | dusolve (int, int, double *, double *) |
void | dlsolve (int, int, double *, double *) |
void | dmatvec (int, int, int, double *, double *, double *) |
int | sp_dtrsv_dist (char *uplo, char *trans, char *diag, SuperMatrix *L, SuperMatrix *U, double *x, int *info) |
int | sp_dgemv_dist (char *trans, double alpha, SuperMatrix *A, double *x, int incx, double beta, double *y, int incy) |
SpGEMV. More... | |
Sparse BLAS 2, using some dense BLAS 2 operations.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
void dlsolve | -( | -int | -ldm, | -
- | - | int | -ncol, | -
- | - | double * | -M, | -
- | - | double * | -rhs | -
- | ) | -- |
-Solves a dense UNIT lower triangular system. The unit lower -triangular matrix is stored in a 2D array M(1:nrow,1:ncol). -The solution will be returned in the rhs vector. -
void dmatvec | -( | -int | -ldm, | -
- | - | int | -nrow, | -
- | - | int | -ncol, | -
- | - | double * | -M, | -
- | - | double * | -vec, | -
- | - | double * | -Mxvec | -
- | ) | -- |
-Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec. -The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[]. -
void dusolve | -( | -int | -ldm, | -
- | - | int | -ncol, | -
- | - | double * | -M, | -
- | - | double * | -rhs | -
- | ) | -- |
-Solves a dense upper triangular system. The upper triangular matrix is -stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned -in the rhs vector. -
int sp_dgemv_dist | -( | -char * | -trans, | -
- | - | double | -alpha, | -
- | - | SuperMatrix * | -A, | -
- | - | double * | -x, | -
- | - | int | -incx, | -
- | - | double | -beta, | -
- | - | double * | -y, | -
- | - | int | -incy | -
- | ) | -- |
SpGEMV.
-- Purpose - ======= - - sp_dgemv_dist() performs one of the matrix-vector operations - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, - where alpha and beta are scalars, x and y are vectors and A is a - sparse A->nrow by A->ncol matrix. - - Parameters - ========== - - TRANS - (input) char* - On entry, TRANS specifies the operation to be performed as - follows: - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. - - ALPHA - (input) double - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = SLU_NC or SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. - In the future, more general A can be handled. - - X - (input) double*, array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - - INCX - (input) int - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - - BETA - (input) double - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - - Y - (output) double*, array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - (input) int - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - - ==== Sparse Level 2 Blas routine. -
int sp_dtrsv_dist | -( | -char * | -uplo, | -
- | - | char * | -trans, | -
- | - | char * | -diag, | -
- | - | SuperMatrix * | -L, | -
- | - | SuperMatrix * | -U, | -
- | - | double * | -x, | -
- | - | int * | -info | -
- | ) | -- |
- Purpose - ======= - - sp_dtrsv_dist() solves one of the systems of equations - A*x = b, or A'*x = b, - where b and x are n element vectors and A is a sparse unit , or - non-unit, upper or lower triangular matrix. - No test for singularity or near-singularity is included in this - routine. Such tests must be performed before calling this routine. - - Parameters - ========== - - uplo - (input) char* - On entry, uplo specifies whether the matrix is an upper or - lower triangular matrix as follows: - uplo = 'U' or 'u' A is an upper triangular matrix. - uplo = 'L' or 'l' A is a lower triangular matrix. - - trans - (input) char* - On entry, trans specifies the equations to be solved as - follows: - trans = 'N' or 'n' A*x = b. - trans = 'T' or 't' A'*x = b. - trans = 'C' or 'c' A'*x = b. - - diag - (input) char* - On entry, diag specifies whether or not A is unit - triangular as follows: - diag = 'U' or 'u' A is assumed to be unit triangular. - diag = 'N' or 'n' A is not assumed to be unit - triangular. - - L - (input) SuperMatrix* - The factor L from the factorization Pr*A*Pc=L*U. Use - compressed row subscripts storage for supernodes, i.e., - L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. - - U - (input) SuperMatrix* - The factor U from the factorization Pr*A*Pc=L*U. - U has types: Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU. - - x - (input/output) double* - Before entry, the incremented array X must contain the n - element right-hand side vector b. On exit, X is overwritten - with the solution vector x. - - info - (output) int* - If *info = -i, the i-th argument had an illegal value. -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Sparse BLAS3, using some dense BLAS3 operations. -More...
-#include "superlu_ddefs.h"
-Functions | |
int | sp_dgemm_dist (char *transa, int n, double alpha, SuperMatrix *A, double *b, int ldb, double beta, double *c, int ldc) |
Sparse BLAS3, using some dense BLAS3 operations.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 1.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -September 1, 1999 -
int sp_dgemm_dist | -( | -char * | -transa, | -
- | - | int | -n, | -
- | - | double | -alpha, | -
- | - | SuperMatrix * | -A, | -
- | - | double * | -b, | -
- | - | int | -ldb, | -
- | - | double | -beta, | -
- | - | double * | -c, | -
- | - | int | -ldc | -
- | ) | -- |
- Purpose - ======= - - sp_d performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - - Parameters - ========== - - TRANSA - (input) char* - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - TRANSA = 'N' or 'n', op( A ) = A. - TRANSA = 'T' or 't', op( A ) = A'. - TRANSA = 'C' or 'c', op( A ) = conjg( A' ). - Unchanged on exit. - - TRANSB - (input) char* - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - TRANSB = 'N' or 'n', op( B ) = B. - TRANSB = 'T' or 't', op( B ) = B'. - TRANSB = 'C' or 'c', op( B ) = conjg( B' ). - Unchanged on exit. - - M - (input) int - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - (input) int - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - (input) int - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - (input) double - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = SLU_NC or SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. - In the future, more general A can be handled. - - B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - (input) int - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least max( 1, n ). - Unchanged on exit. - - BETA - (input) double - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - - C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*B + beta*C ). - - LDC - (input) int - On entry, LDC specifies the first dimension of C as declared - in the calling (sub)program. LDC must be at least max(1,m). - Unchanged on exit. - - ==== Sparse Level 3 Blas routine. -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include <cstdio>
#include "superlu_ddefs.h"
#include "lupanels.hpp"
#include "lupanels_GPU.cuh"
-Macros | |
#define | NDEBUG |
-Functions | |
int | getBufferOffset (int k0, int k1, int winSize, int winParity, int halfWin) |
#define NDEBUG | -
int getBufferOffset | -( | -int | -k0, | -
- | - | int | -k1, | -
- | - | int | -winSize, | -
- | - | int | -winParity, | -
- | - | int | -halfWin | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Performs static scheduling for the look-ahead factorization algorithm. -More...
-#include "superlu_ddefs.h"
-Functions | |
void | isort (int_t N, int_t *ARRAY1, int_t *ARRAY2) |
void | isort1 (int_t N, int_t *ARRAY) |
int | dstatic_schedule (superlu_dist_options_t *options, int m, int n, dLUstruct_t *LUstruct, gridinfo_t *grid, SuperLUStat_t *stat, int_t *perm_c_supno, int_t *iperm_c_supno, int *info) |
Performs static scheduling for the look-ahead factorization algorithm.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 4.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -August 15, 2014 - -Modified: February 20, 2020, changed to be precision-dependent. - -Reference: - -
int dstatic_schedule | -( | -superlu_dist_options_t * | -options, | -
- | - | int | -m, | -
- | - | int | -n, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo_t * | -grid, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | int_t * | -perm_c_supno, | -
- | - | int_t * | -iperm_c_supno, | -
- | - | int * | -info | -
- | ) | -- |
void isort | -( | -int_t | -N, | -
- | - | int_t * | -ARRAY1, | -
- | - | int_t * | -ARRAY2 | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "dsuperlu_gpu.cu"
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Factorization routines for the subtree using 2D process grid, with GPUs. -More...
-#include "dlustruct_gpu.h"
-Functions | |
int | dsparseTreeFactor_ASYNC_GPU (sForest_t *sforest, commRequests_t **comReqss, dscuBufs_t *scuBufs, packLUInfo_t *packLUInfo, msgs_t **msgss, dLUValSubBuf_t **LUvsbs, ddiagFactBufs_t **dFBufs, factStat_t *factStat, factNodelists_t *fNlists, gEtreeInfo_t *gEtreeInfo, superlu_dist_options_t *options, int_t *gIperm_c_supno, int ldt, dsluGPU_t *sluGPU, d2Hreduce_t *d2Hred, HyP_t *HyP, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d, SuperLUStat_t *stat, double thresh, SCT_t *SCT, int tag_ub, int *info) |
Factorization routines for the subtree using 2D process grid, with GPUs.
---- Distributed SuperLU routine (version 7.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley, -Georgia Institute of Technology, Oak Ridge National Laboratory -May 12, 2021 -
int dsparseTreeFactor_ASYNC_GPU | -( | -sForest_t * | -sforest, | -
- | - | commRequests_t ** | -comReqss, | -
- | - | dscuBufs_t * | -scuBufs, | -
- | - | packLUInfo_t * | -packLUInfo, | -
- | - | msgs_t ** | -msgss, | -
- | - | dLUValSubBuf_t ** | -LUvsbs, | -
- | - | ddiagFactBufs_t ** | -dFBufs, | -
- | - | factStat_t * | -factStat, | -
- | - | factNodelists_t * | -fNlists, | -
- | - | gEtreeInfo_t * | -gEtreeInfo, | -
- | - | superlu_dist_options_t * | -options, | -
- | - | int_t * | -gIperm_c_supno, | -
- | - | int | -ldt, | -
- | - | dsluGPU_t * | -sluGPU, | -
- | - | d2Hreduce_t * | -d2Hred, | -
- | - | HyP_t * | -HyP, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo3d_t * | -grid3d, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | double | -thresh, | -
- | - | SCT_t * | -SCT, | -
- | - | int | -tag_ub, | -
- | - | int * | -info | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "superlu_ddefs.h"
-Macros | |
#define | BL 32 |
#define | BL 32 |
#define | BL 32 |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-#define BL 32 | -
#define BL 32 | -
#define BL 32 | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Several matrix utilities. -More...
--Functions | |
for (j=0;j< N;++j) for(i=0 | |
if (! L->Store) | |
void | dCopy_CompRowLoc_Matrix_dist (SuperMatrix *A, SuperMatrix *B) |
void | dZero_CompRowLoc_Matrix_dist (SuperMatrix *A) |
Sets all entries of a matrix to zero, A_{i,j}=0, for i,j=1,..,n. More... | |
void | dScaleAddId_CompRowLoc_Matrix_dist (SuperMatrix *A, double c) |
Scale and add I: scales a matrix and adds an identity. A_{i,j} = c * A_{i,j} + \delta_{i,j} for i,j=1,...,n and \delta_{i,j} is the Kronecker delta. More... | |
void | dScaleAdd_CompRowLoc_Matrix_dist (SuperMatrix *A, SuperMatrix *B, double c) |
Scale and add: adds a scalar multiple of one matrix to another. A_{i,j} = c * A_{i,j} + B_{i,j}$ for i,j=1,...,n. More... | |
void | dScalePermstructInit (const int_t m, const int_t n, dScalePermstruct_t *ScalePermstruct) |
Allocate storage in ScalePermstruct. More... | |
void | dScalePermstructFree (dScalePermstruct_t *ScalePermstruct) |
Deallocate ScalePermstruct. More... | |
int | dAllocGlu_3d (int_t n, int_t nsupers, dLUstruct_t *LUstruct) |
int | dDeAllocGlu_3d (dLUstruct_t *LUstruct) |
int | dDeAllocLlu_3d (int_t n, dLUstruct_t *LUstruct, gridinfo3d_t *grid3d) |
void | dGenXtrue_dist (int_t n, int_t nrhs, double *x, int_t ldx) |
void | dFillRHS_dist (char *trans, int_t nrhs, double *x, int_t ldx, SuperMatrix *A, double *rhs, int_t ldb) |
Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's. More... | |
void | dfill_dist (double *a, int_t alen, double dval) |
Fills a double precision array with a given value. More... | |
void | dinf_norm_error_dist (int_t n, int_t nrhs, double *x, int_t ldx, double *xtrue, int_t ldxtrue, gridinfo_t *grid) |
Check the inf-norm of the error vector. More... | |
void | Printdouble5 (char *name, int_t len, double *x) |
int | file_Printdouble5 (FILE *fp, char *name, int_t len, double *x) |
void | dPrintLblocks (int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, dLocalLU_t *Llu) |
Print the blocks in the factored matrix L. More... | |
void | dZeroLblocks (int iam, int n, gridinfo_t *grid, dLUstruct_t *LUstruct) |
Sets all entries of matrix L to zero. More... | |
void | dDumpLblocks (int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, dLocalLU_t *Llu) |
Dump the factored matrix L using matlab triple-let format. More... | |
void | dComputeLevelsets (int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, dLocalLU_t *Llu, int_t *levels) |
void | dGenCOOLblocks (int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, dLocalLU_t *Llu, int_t **cooRows, int_t **cooCols, double **cooVals, int_t *n, int_t *nnzL) |
void | dGenCSCLblocks (int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, dLocalLU_t *Llu, double **nzval, int_t **rowind, int_t **colptr, int_t *n, int_t *nnzL) |
void | dGenCSRLblocks (int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, dLocalLU_t *Llu, double **nzval, int_t **colind, int_t **rowptr, int_t *n, int_t *nnzL) |
void | dPrintUblocks (int iam, int_t nsupers, gridinfo_t *grid, Glu_persist_t *Glu_persist, dLocalLU_t *Llu) |
Print the blocks in the factored matrix U. More... | |
void | dZeroUblocks (int iam, int n, gridinfo_t *grid, dLUstruct_t *LUstruct) |
Sets all entries of matrix U to zero. More... | |
int | dprint_gsmv_comm (FILE *fp, int_t m_loc, pdgsmv_comm_t *gsmv_comm, gridinfo_t *grid) |
void | dGenXtrueRHS (int nrhs, SuperMatrix *A, Glu_persist_t *Glu_persist, gridinfo_t *grid, double **xact, int *ldx, double **b, int *ldb) |
-Variables | |
int | i |
int | j |
i< M;++i) Y[i+j *ldy]=X[i+j *ldx];}voiddCreate_SuperNode_Matrix_dist(SuperMatrix *L, int_t m, int_t n, int_t nnz, double *nzval, int_t *nzval_colptr, int_t *rowind, int_t *rowind_colptr, int_t *col_to_sup, int_t *sup_to_col, Stype_t stype, Dtype_t dtype, Mtype_t mtype){ SCformat *Lstore;L-> | Stype = stype |
L | Dtype = dtype |
L | Mtype = mtype |
L | nrow = m |
L | ncol = n |
L | Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) ) |
Several matrix utilities.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-- -- Distributed SuperLU routine (version 7.1.0) -- - Lawrence Berkeley National Lab, Univ. of California Berkeley. - March 15, 2003 - October 5, 2021 - */ - - - - -void -dCreate_CompCol_Matrix_dist(SuperMatrix *A, int_t m, int_t n, int_t nnz, - double *nzval, int_t *rowind, int_t *colptr, - Stype_t stype, Dtype_t dtype, Mtype_t mtype) -{ - NCformat *Astore; - - A->Stype = stype; - A->Dtype = dtype; - A->Mtype = mtype; - A->nrow = m; - A->ncol = n; - A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) ); - if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); - Astore = (NCformat *) A->Store; - Astore->nnz = nnz; - Astore->nzval = nzval; - Astore->rowind = rowind; - Astore->colptr = colptr; -} - -void -dCreate_CompRowLoc_Matrix_dist(SuperMatrix *A, int_t m, int_t n, - int_t nnz_loc, int_t m_loc, int_t fst_row, - double *nzval, int_t *colind, int_t *rowptr, - Stype_t stype, Dtype_t dtype, Mtype_t mtype) -{ - NRformat_loc *Astore; - - A->Stype = stype; - A->Dtype = dtype; - A->Mtype = mtype; - A->nrow = m; - A->ncol = n; - A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat_loc) ); - if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); - Astore = (NRformat_loc *) A->Store; - Astore->nnz_loc = nnz_loc; - Astore->fst_row = fst_row; - Astore->m_loc = m_loc; - Astore->nzval = nzval; - Astore->colind = colind; - Astore->rowptr = rowptr; -} - -/*! -Convert a row compressed storage into a column compressed storage. - */ -void -dCompRow_to_CompCol_dist(int_t m, int_t n, int_t nnz, - double *a, int_t *colind, int_t *rowptr, - double **at, int_t **rowind, int_t **colptr) -{ - register int i, j, col, relpos; - int_t *marker; - - /* Allocate storage for another copy of the matrix. */ - *at = (double *) doubleMalloc_dist(nnz); - *rowind = intMalloc_dist(nnz); - *colptr = intMalloc_dist(n+1); - marker = intCalloc_dist(n); - - /* Get counts of each column of A, and set up column pointers */ - for (i = 0; i < m; ++i) - for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]]; - (*colptr)[0] = 0; - for (j = 0; j < n; ++j) { - (*colptr)[j+1] = (*colptr)[j] + marker[j]; - marker[j] = (*colptr)[j]; - } - - /* Transfer the matrix into the compressed column storage. */ - for (i = 0; i < m; ++i) { - for (j = rowptr[i]; j < rowptr[i+1]; ++j) { - col = colind[j]; - relpos = marker[col]; - (*rowind)[relpos] = i; - (*at)[relpos] = a[j]; - ++marker[col]; - } - } - - SUPERLU_FREE(marker); -} - -/*! -Copy matrix A into matrix B. */ -void -dCopy_CompCol_Matrix_dist(SuperMatrix *A, SuperMatrix *B) -{ - NCformat *Astore, *Bstore; - int ncol, nnz, i; - - B->Stype = A->Stype; - B->Dtype = A->Dtype; - B->Mtype = A->Mtype; - B->nrow = A->nrow;; - B->ncol = ncol = A->ncol; - Astore = (NCformat *) A->Store; - Bstore = (NCformat *) B->Store; - Bstore->nnz = nnz = Astore->nnz; - for (i = 0; i < nnz; ++i) - ((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i]; - for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i]; - for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i]; -} - - -void dPrint_CompCol_Matrix_dist(SuperMatrix *A) -{ - NCformat *Astore; - register int i; - double *dp; - - printf("\nCompCol matrix: "); - printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - Astore = (NCformat *) A->Store; - printf("nrow %lld, ncol %lld, nnz %lld\n", (long long) A->nrow, - (long long) A->ncol, (long long) Astore->nnz); - if ( (dp = (double *) Astore->nzval) != NULL ) { - printf("nzval:\n"); - for (i = 0; i < Astore->nnz; ++i) printf("%f ", dp[i]); - } - printf("\nrowind:\n"); - for (i = 0; i < Astore->nnz; ++i) - printf("%lld ", (long long) Astore->rowind[i]); - printf("\ncolptr:\n"); - for (i = 0; i <= A->ncol; ++i) - printf("%lld ", (long long) Astore->colptr[i]); - printf("\nend CompCol matrix.\n"); -} - -void dPrint_Dense_Matrix_dist(SuperMatrix *A) -{ - DNformat *Astore; - register int i; - double *dp; - - printf("\nDense matrix: "); - printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - Astore = (DNformat *) A->Store; - dp = (double *) Astore->nzval; - printf("nrow %lld, ncol %lld, lda %lld\n", - (long long) A->nrow, (long long) A->ncol, (long long) Astore->lda); - printf("\nnzval: "); - for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]); - printf("\nend Dense matrix.\n"); -} - -int dPrint_CompRowLoc_Matrix_dist(SuperMatrix *A) -{ - NRformat_loc *Astore; - int_t nnz_loc, m_loc; - double *dp; - - printf("\n==== CompRowLoc matrix: "); - printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - Astore = (NRformat_loc *) A->Store; - printf("nrow %ld, ncol %ld\n", - (long int) A->nrow, (long int) A->ncol); - nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; - printf("nnz_loc %ld, m_loc %ld, fst_row %ld\n", (long int) nnz_loc, - (long int) m_loc, (long int) Astore->fst_row); - PrintInt10("rowptr", m_loc+1, Astore->rowptr); - PrintInt10("colind", nnz_loc, Astore->colind); - if ( (dp = (double *) Astore->nzval) != NULL ) - Printdouble5("nzval", nnz_loc, dp); - printf("==== end CompRowLoc matrix\n"); - return 0; -} - -int file_dPrint_CompRowLoc_Matrix_dist(FILE *fp, SuperMatrix *A) -{ - NRformat_loc *Astore; - int_t nnz_loc, m_loc; - double *dp; - - fprintf(fp, "\n==== CompRowLoc matrix: "); - fprintf(fp, "Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - Astore = (NRformat_loc *) A->Store; - fprintf(fp, "nrow %ld, ncol %ld\n", (long int) A->nrow, (long int) A->ncol); - nnz_loc = Astore->nnz_loc; m_loc = Astore->m_loc; - fprintf(fp, "nnz_loc %ld, m_loc %ld, fst_row %ld\n", (long int) nnz_loc, - (long int) m_loc, (long int) Astore->fst_row); - file_PrintInt10(fp, "rowptr", m_loc+1, Astore->rowptr); - file_PrintInt10(fp, "colind", nnz_loc, Astore->colind); - if ( (dp = (double *) Astore->nzval) != NULL ) - file_Printdouble5(fp, "nzval", nnz_loc, dp); - fprintf(fp, "==== end CompRowLoc matrix\n"); - return 0; -} - -void -dCreate_Dense_Matrix_dist(SuperMatrix *X, int_t m, int_t n, double *x, - int_t ldx, Stype_t stype, Dtype_t dtype, - Mtype_t mtype) -{ - DNformat *Xstore; - - X->Stype = stype; - X->Dtype = dtype; - X->Mtype = mtype; - X->nrow = m; - X->ncol = n; - X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); - if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store"); - Xstore = (DNformat *) X->Store; - Xstore->lda = ldx; - Xstore->nzval = (double *) x; -} - -void -dCopy_Dense_Matrix_dist(int_t M, int_t N, double *X, int_t ldx, - double *Y, int_t ldy) -{ -/*! - - -- Purpose - ======= - - Copies a two-dimensional matrix X to another matrix Y. -
int dAllocGlu_3d | -( | -int_t | -n, | -
- | - | int_t | -nsupers, | -
- | - | dLUstruct_t * | -LUstruct | -
- | ) | -- |
void dComputeLevelsets | -( | -int | -iam, | -
- | - | int_t | -nsupers, | -
- | - | gridinfo_t * | -grid, | -
- | - | Glu_persist_t * | -Glu_persist, | -
- | - | dLocalLU_t * | -Llu, | -
- | - | int_t * | -levels | -
- | ) | -- |
\Compute the level sets in the L factor
-void dCopy_CompRowLoc_Matrix_dist | -( | -SuperMatrix * | -A, | -
- | - | SuperMatrix * | -B | -
- | ) | -- |
int dDeAllocGlu_3d | -( | -dLUstruct_t * | -LUstruct | ) | -- |
int dDeAllocLlu_3d | -( | -int_t | -n, | -
- | - | dLUstruct_t * | -LUstruct, | -
- | - | gridinfo3d_t * | -grid3d | -
- | ) | -- |
void dDumpLblocks | -( | -int | -iam, | -
- | - | int_t | -nsupers, | -
- | - | gridinfo_t * | -grid, | -
- | - | Glu_persist_t * | -Glu_persist, | -
- | - | dLocalLU_t * | -Llu | -
- | ) | -- |
Dump the factored matrix L using matlab triple-let format.
-void dfill_dist | -( | -double * | -a, | -
- | - | int_t | -alen, | -
- | - | double | -dval | -
- | ) | -- |
Fills a double precision array with a given value.
- -void dFillRHS_dist | -( | -char * | -trans, | -
- | - | int_t | -nrhs, | -
- | - | double * | -x, | -
- | - | int_t | -ldx, | -
- | - | SuperMatrix * | -A, | -
- | - | double * | -rhs, | -
- | - | int_t | -ldb | -
- | ) | -- |
Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's.
-void dGenCOOLblocks | -( | -int | -iam, | -
- | - | int_t | -nsupers, | -
- | - | gridinfo_t * | -grid, | -
- | - | Glu_persist_t * | -Glu_persist, | -
- | - | dLocalLU_t * | -Llu, | -
- | - | int_t ** | -cooRows, | -
- | - | int_t ** | -cooCols, | -
- | - | double ** | -cooVals, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nnzL | -
- | ) | -- |
\Dump the factored matrix L using matlab triple-let format
- -void dGenCSCLblocks | -( | -int | -iam, | -
- | - | int_t | -nsupers, | -
- | - | gridinfo_t * | -grid, | -
- | - | Glu_persist_t * | -Glu_persist, | -
- | - | dLocalLU_t * | -Llu, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -rowind, | -
- | - | int_t ** | -colptr, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nnzL | -
- | ) | -- |
\Dump the factored matrix L using CSC format
-void dGenCSRLblocks | -( | -int | -iam, | -
- | - | int_t | -nsupers, | -
- | - | gridinfo_t * | -grid, | -
- | - | Glu_persist_t * | -Glu_persist, | -
- | - | dLocalLU_t * | -Llu, | -
- | - | double ** | -nzval, | -
- | - | int_t ** | -colind, | -
- | - | int_t ** | -rowptr, | -
- | - | int_t * | -n, | -
- | - | int_t * | -nnzL | -
- | ) | -- |
\Dump the factored matrix L using CSR format
-void dGenXtrue_dist | -( | -int_t | -n, | -
- | - | int_t | -nrhs, | -
- | - | double * | -x, | -
- | - | int_t | -ldx | -
- | ) | -- |
void dGenXtrueRHS | -( | -int | -nrhs, | -
- | - | SuperMatrix * | -A, | -
- | - | Glu_persist_t * | -Glu_persist, | -
- | - | gridinfo_t * | -grid, | -
- | - | double ** | -xact, | -
- | - | int * | -ldx, | -
- | - | double ** | -b, | -
- | - | int * | -ldb | -
- | ) | -- |
void dinf_norm_error_dist | -( | -int_t | -n, | -
- | - | int_t | -nrhs, | -
- | - | double * | -x, | -
- | - | int_t | -ldx, | -
- | - | double * | -xtrue, | -
- | - | int_t | -ldxtrue, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
Check the inf-norm of the error vector.
-int dprint_gsmv_comm | -( | -FILE * | -fp, | -
- | - | int_t | -m_loc, | -
- | - | pdgsmv_comm_t * | -gsmv_comm, | -
- | - | gridinfo_t * | -grid | -
- | ) | -- |
void dPrintLblocks | -( | -int | -iam, | -
- | - | int_t | -nsupers, | -
- | - | gridinfo_t * | -grid, | -
- | - | Glu_persist_t * | -Glu_persist, | -
- | - | dLocalLU_t * | -Llu | -
- | ) | -- |
Print the blocks in the factored matrix L.
-void dPrintUblocks | -( | -int | -iam, | -
- | - | int_t | -nsupers, | -
- | - | gridinfo_t * | -grid, | -
- | - | Glu_persist_t * | -Glu_persist, | -
- | - | dLocalLU_t * | -Llu | -
- | ) | -- |
Print the blocks in the factored matrix U.
-void dScaleAdd_CompRowLoc_Matrix_dist | -( | -SuperMatrix * | -A, | -
- | - | SuperMatrix * | -B, | -
- | - | double | -c | -
- | ) | -- |
Scale and add: adds a scalar multiple of one matrix to another. A_{i,j} = c * A_{i,j} + B_{i,j}$ for i,j=1,...,n.
- -void dScaleAddId_CompRowLoc_Matrix_dist | -( | -SuperMatrix * | -A, | -
- | - | double | -c | -
- | ) | -- |
Scale and add I: scales a matrix and adds an identity. A_{i,j} = c * A_{i,j} + \delta_{i,j} for i,j=1,...,n and \delta_{i,j} is the Kronecker delta.
-void dScalePermstructFree | -( | -dScalePermstruct_t * | -ScalePermstruct | ) | -- |
Deallocate ScalePermstruct.
-void dScalePermstructInit | -( | -const int_t | -m, | -
- | - | const int_t | -n, | -
- | - | dScalePermstruct_t * | -ScalePermstruct | -
- | ) | -- |
Allocate storage in ScalePermstruct.
-void dZero_CompRowLoc_Matrix_dist | -( | -SuperMatrix * | -A | ) | -- |
Sets all entries of a matrix to zero, A_{i,j}=0, for i,j=1,..,n.
-void dZeroLblocks | -( | -int | -iam, | -
- | - | int | -n, | -
- | - | gridinfo_t * | -grid, | -
- | - | dLUstruct_t * | -LUstruct | -
- | ) | -- |
Sets all entries of matrix L to zero.
-void dZeroUblocks | -( | -int | -iam, | -
- | - | int | -n, | -
- | - | gridinfo_t * | -grid, | -
- | - | dLUstruct_t * | -LUstruct | -
- | ) | -- |
Sets all entries of matrix U to zero.
-int file_Printdouble5 | -( | -FILE * | -fp, | -
- | - | char * | -name, | -
- | - | int_t | -len, | -
- | - | double * | -x | -
- | ) | -- |
-
|
- -pure virtual | -
if | -( | -! | -L->Store | ) | -- |
void Printdouble5 | -( | -char * | -name, | -
- | - | int_t | -len, | -
- | - | double * | -x | -
- | ) | -- |
L Dtype = dtype | -
int i | -
int j | -
L Mtype = mtype | -
L ncol = n | -
L nrow = m | -
L Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) ) | -
i< M; ++i) Y[i + j*ldy] = X[i + j*ldx];}voiddCreate_SuperNode_Matrix_dist(SuperMatrix *L, int_t m, int_t n, int_t nnz, double *nzval, int_t *nzval_colptr, int_t *rowind, int_t *rowind_colptr, int_t *col_to_sup, int_t *sup_to_col, Stype_t stype, Dtype_t dtype, Mtype_t mtype){ SCformat *Lstore; L-> Stype = stype | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Elimination tree computation and layout routines. -More...
--Functions | |
static int_t * | mxCallocInt (int_t n) |
static void | initialize_disjoint_sets (int_t n, int_t **pp) |
static int_t | make_set (int_t i, int_t *pp) |
static int_t | link (int_t s, int_t t, int_t *pp) |
static int_t | find (int_t i, int_t *pp) |
static void | finalize_disjoint_sets (int_t *pp) |
int | sp_symetree_dist (int_t *acolst, int_t *acolend, int_t *arow, int_t n, int_t *parent) |
Symmetric elimination tree. More... | |
int | sp_coletree_dist (int_t *acolst, int_t *acolend, int_t *arow, int_t nr, int_t nc, int_t *parent) |
Nonsymmetric elimination tree. More... | |
static void | etdfs (int_t v, int_t first_kid[], int_t next_kid[], int_t post[], int_t *postnum) |
static void | nr_etdfs (int_t n, int_t *parent, int_t *first_kid, int_t *next_kid, int_t *post, int_t postnum) |
int_t * | TreePostorder_dist (int_t n, int_t *parent) |
-Variables | |
static int_t * | first_kid |
Depth-first search from vertext. More... | |
static int_t * | next_kid |
static int_t * | post |
static int_t | postnum |
Elimination tree computation and layout routines.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-- Implementation of disjoint set union routines. - Elements are integers in 0..n-1, and the - names of the sets themselves are of type int. - - Calls are: - initialize_disjoint_sets (n) initial call. - s = make_set (i) returns a set containing only i. - s = link (t, u) returns s = t union u, destroying t and u. - s = find (i) return name of set containing i. - finalize_disjoint_sets final call. - - This implementation uses path compression but not weighted union. - See Tarjan's book for details. - John Gilbert, CMI, 1987. - - Implemented path-halving by XL 7/5/95. -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
int sp_coletree_dist | -( | -int_t * | -acolst, | -
- | - | int_t * | -acolend, | -
- | - | int_t * | -arow, | -
- | - | int_t | -nr, | -
- | - | int_t | -nc, | -
- | - | int_t * | -parent | -
- | ) | -- |
Nonsymmetric elimination tree.
-- Find the elimination tree for A'*A. - This uses something similar to Liu's algorithm. - It runs in time O(nz(A)*log n) and does not form A'*A. - - Input: - Sparse matrix A. Numeric values are ignored, so any - explicit zeros are treated as nonzero. - Output: - Integer array of parents representing the elimination - tree of the symbolic product A'*A. Each vertex is a - column of A, and nc means a root of the elimination forest. - - John R. Gilbert, Xerox, 10 Dec 1990 - Based on code by JRG dated 1987, 1988, and 1990. -
int sp_symetree_dist | -( | -int_t * | -acolst, | -
- | - | int_t * | -acolend, | -
- | - | int_t * | -arow, | -
- | - | int_t | -n, | -
- | - | int_t * | -parent | -
- | ) | -- |
Symmetric elimination tree.
-- p = spsymetree (A); - - Find the elimination tree for symmetric matrix A. - This uses Liu's algorithm, and runs in time O(nz*log n). - - Input: - Square sparse matrix A. No check is made for symmetry; - elements below and on the diagonal are ignored. - Numeric values are ignored, so any explicit zeros are - treated as nonzero. - Output: - Integer array of parents representing the etree, with n - meaning a root of the elimination forest. - Note: - This routine uses only the upper triangle, while sparse - Cholesky (as in spchol.c) uses only the lower. Matlab's - dense Cholesky uses only the upper. This routine could - be modified to use the lower triangle either by transposing - the matrix or by traversing it by rows with auxiliary - pointer and link arrays. - - John R. Gilbert, Xerox, 10 Dec 1990 - Based on code by JRG dated 1987, 1988, and 1990. - Modified by X.S. Li, November 1999. -
-
|
- -static | -
Depth-first search from vertext.
-- q = TreePostorder_dist (n, p); - - Postorder a tree. - Input: - p is a vector of parent pointers for a forest whose - vertices are the integers 0 to n-1; p[root]==n. - Output: - q is a vector indexed by 0..n-1 such that q[i] is the - i-th vertex in a postorder numbering of the tree. - - ( 2/7/95 modified by X.Li: - q is a vector indexed by 0:n-1 such that vertex i is the - q[i]-th vertex in a postorder numbering of the tree. - That is, this is the inverse of the previous q. ) - - In the child structure, lower-numbered children are represented - first, so that a tree which is already numbered in postorder - will not have its order changed. - - Written by John Gilbert, Xerox, 10 Dec 1990. - Based on code written by John Gilbert at CMI in 1987. --
-
|
- -static | -
-
|
- -static | -
-
|
- -static | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions/Subroutines | |
program | f_pddrive_abglobal |
program f_pddrive_abglobal | -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
► EXAMPLE | |
► FORTRAN | |
► SRC | |
► TEST |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Gets matrix permutation. -More...
--Macros | |
#define | METISOPTIONS 40 |
-Functions | |
void | get_metis (int_t n, int_t bnz, int_t *b_colptr, int_t *b_rowind, int_t *perm_c) |
void | get_colamd_dist (const int m, const int n, const int nnz, int_t *colptr, int_t *rowind, int_t *perm_c) |
void | getata_dist (const int_t m, const int_t n, const int_t nz, int_t *colptr, int_t *rowind, int_t *atanz, int_t **ata_colptr, int_t **ata_rowind) |
void | at_plus_a_dist (const int_t n, const int_t nz, int_t *colptr, int_t *rowind, int_t *bnz, int_t **b_colptr, int_t **b_rowind) |
void | get_perm_c_dist (int_t pnum, int_t ispec, SuperMatrix *A, int_t *perm_c) |
Gets matrix permutation.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 2.1) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley, -November 1, 2007 -Feburary 20, 2008 -
Last update: 7/27/2011 fix a bug with metis ordering on empty graph.
-#define METISOPTIONS 40 | -
void at_plus_a_dist | -( | -const int_t | -n, | -
- | - | const int_t | -nz, | -
- | - | int_t * | -colptr, | -
- | - | int_t * | -rowind, | -
- | - | int_t * | -bnz, | -
- | - | int_t ** | -b_colptr, | -
- | - | int_t ** | -b_rowind | -
- | ) | -- |
-Purpose -======= - -Form the structure of A'+A. A is an n-by-n matrix in column oriented -format represented by (colptr, rowind). The output A'+A is in column -oriented format (symmetrically, also row oriented), represented by -(b_colptr, b_rowind). -
void get_colamd_dist | -( | -const int | -m, | -
- | - | const int | -n, | -
- | - | const int | -nnz, | -
- | - | int_t * | -colptr, | -
- | - | int_t * | -rowind, | -
- | - | int_t * | -perm_c | -
- | ) | -- |
void get_metis | -( | -int_t | -n, | -
- | - | int_t | -bnz, | -
- | - | int_t * | -b_colptr, | -
- | - | int_t * | -b_rowind, | -
- | - | int_t * | -perm_c | -
- | ) | -- |
void get_perm_c_dist | -( | -int_t | -pnum, | -
- | - | int_t | -ispec, | -
- | - | SuperMatrix * | -A, | -
- | - | int_t * | -perm_c | -
- | ) | -- |
-Purpose -======= - -GET_PERM_C_DIST obtains a permutation matrix Pc, by applying the multiple -minimum degree ordering code by Joseph Liu to matrix A'*A or A+A', -or using approximate minimum degree column ordering by Davis et. al. -The LU factorization of A*Pc tends to have less fill than the LU -factorization of A. - -Arguments -========= - -ispec (input) colperm_t - Specifies what type of column permutation to use to reduce fill. - = NATURAL: natural ordering (i.e., Pc = I) - = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A - = MMD_ATA: minimum degree ordering on structure of A'*A - = METIS_AT_PLUS_A: MeTis on A'+A - -A (input) SuperMatrix* - Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number - of the linear equations is A->nrow. Currently, the type of A - can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE. - In the future, more general A can be handled. - -perm_c (output) int* - Column permutation vector of size A->ncol, which defines the - permutation matrix Pc; perm_c[i] = j means column i of A is - in position j in A*Pc. -
void getata_dist | -( | -const int_t | -m, | -
- | - | const int_t | -n, | -
- | - | const int_t | -nz, | -
- | - | int_t * | -colptr, | -
- | - | int_t * | -rowind, | -
- | - | int_t * | -atanz, | -
- | - | int_t ** | -ata_colptr, | -
- | - | int_t ** | -ata_rowind | -
- | ) | -- |
-Purpose -======= - -Form the structure of A'*A. A is an m-by-n matrix in column oriented -format represented by (colptr, rowind). The output A'*A is in column -oriented format (symmetrically, also row oriented), represented by -(ata_colptr, ata_rowind). - -This routine is modified from GETATA routine by Tim Davis. -The complexity of this algorithm is: SUM_{i=1,m} r(i)^2, -i.e., the sum of the square of the row counts. - -Questions -========= - o Do I need to withhold the *dense* rows? - o How do I know the number of nonzeros in A'*A? -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Gets matrix permutation. -More...
-#include <limits.h>
#include <math.h>
#include "superlu_dist_config.h"
#include "parmetis.h"
#include "superlu_ddefs.h"
-Functions | |
static float | a_plus_at_CompRow_loc (int, int_t *, int, int_t *, int_t, int_t *, int_t *, int, int_t *, int_t *, int_t **, int_t **, gridinfo_t *) |
float | get_perm_c_parmetis (SuperMatrix *A, int_t *perm_r, int_t *perm_c, int nprocs_i, int noDomains, int_t **sizes, int_t **fstVtxSep, gridinfo_t *grid, MPI_Comm *metis_comm) |
Gets matrix permutation.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed symbolic factorization auxialiary routine (version 2.1) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley - July 2003 -INRIA France - January 2004 -Laura Grigori - -November 1, 2007 -
-
|
- -static | -
-Purpose -======= - -Form the structure of Pr*A +A'Pr'. A is an n-by-n matrix in -NRformat_loc format, represented by (rowptr, colind). The output -B=Pr*A +A'Pr' is in NRformat_loc format (symmetrically, also row -oriented), represented by (b_rowptr, b_colind). - -The input matrix A is distributed in block row format on nprocs_i -processors. The output matrix B is distributed in block row format -on nprocs_o processors, where nprocs_o <= nprocs_i. On output, the -matrix B has its rows permuted according to perm_r. - -Sketch of the algorithm -======================= - -Let iam by my process number. Let fst_row, lst_row = m_loc + -fst_row be the first/last row stored on iam. - -Compute Pr' - the inverse row permutation, stored in iperm_r. - -Compute the transpose of the block row of Pr*A that iam owns: - T[:,Pr(fst_row:lst_row)] = Pr' * A[:,fst_row:lst_row] * Pr' - - -All to all communication such that every processor iam receives all -the blocks of the transpose matrix that it needs, that is - T[fst_row:lst_row, :] - -Compute B = A[fst_row:lst_row, :] + T[fst_row:lst_row, :] - -If Pr != I or nprocs_i != nprocs_o then permute the rows of B (that -is compute Pr*B) and redistribute from nprocs_i to nprocs_o -according to the block row distribution in vtxdist_i, vtxdist_o. -
float get_perm_c_parmetis | -( | -SuperMatrix * | -A, | -
- | - | int_t * | -perm_r, | -
- | - | int_t * | -perm_c, | -
- | - | int | -nprocs_i, | -
- | - | int | -noDomains, | -
- | - | int_t ** | -sizes, | -
- | - | int_t ** | -fstVtxSep, | -
- | - | gridinfo_t * | -grid, | -
- | - | MPI_Comm * | -metis_comm | -
- | ) | -- |
-Purpose -======= - -GET_PERM_C_PARMETIS obtains a permutation matrix Pc, by applying a -graph partitioning algorithm to the symmetrized graph A+A'. The -multilevel graph partitioning algorithm used is the -ParMETIS_V3_NodeND routine available in the parallel graph -partitioning package parMETIS. - -The number of independent sub-domains noDomains computed by this -algorithm has to be a power of 2. Hence noDomains is the larger -number power of 2 that is smaller than nprocs_i, where nprocs_i = nprow -* npcol is the number of processors used in SuperLU_DIST. - -Arguments -========= - -A (input) SuperMatrix* - Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number - of the linear equations is A->nrow. Matrix A is distributed - in NRformat_loc format. - -perm_r (input) int_t* - Row permutation vector of size A->nrow, which defines the - permutation matrix Pr; perm_r[i] = j means row i of A is in - position j in Pr*A. - -perm_c (output) int_t* - Column permutation vector of size A->ncol, which defines the - permutation matrix Pc; perm_c[i] = j means column i of A is - in position j in A*Pc. - -nprocs_i (input) int* - Number of processors the input matrix is distributed on in a block - row format. It corresponds to number of processors used in - SuperLU_DIST. - -noDomains (input) int*, must be power of 2 - Number of independent domains to be computed by the graph - partitioning algorithm. ( noDomains <= nprocs_i ) - -sizes (output) int_t**, of size 2 * noDomains - Returns pointer to an array containing the number of nodes - for each sub-domain and each separator. Separators are stored - from left to right. - Memory for the array is allocated in this routine. - -fstVtxSep (output) int_t**, of size 2 * noDomains - Returns pointer to an array containing first node for each - sub-domain and each separator. - Memory for the array is allocated in this routine. - -Return value -============ - < 0, number of bytes allocated on return from the symbolic factorization. - > 0, number of bytes allocated when out of memory. -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
-Functions | |
void | DisplayHeader () |
const char * | gpublasGetErrorString (gpublasStatus_t status) |
gpuError_t | checkGPU (gpuError_t result) |
gpublasStatus_t | checkGPUblas (gpublasStatus_t result) |
gpublasHandle_t | create_handle () |
void | destroy_handle (gpublasHandle_t handle) |
void | printGPUStats (int nsupers, SuperLUStat_t *stat, gridinfo3d_t *grid3d) |
Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-gpuError_t checkGPU | -( | -gpuError_t | -result | ) | -- |
gpublasStatus_t checkGPUblas | -( | -gpublasStatus_t | -result | ) | -- |
gpublasHandle_t create_handle | -( | -) | -- |
void destroy_handle | -( | -gpublasHandle_t | -handle | ) | -- |
void DisplayHeader | -( | -) | -- |
const char* gpublasGetErrorString | -( | -gpublasStatus_t | -status | ) | -- |
void printGPUStats | -( | -int | -nsupers, | -
- | - | SuperLUStat_t * | -stat, | -
- | - | gridinfo3d_t * | -grid3d | -
- | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Go to the source code of this file.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 4.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -October 1, 2014 -Modified: - May 22, 2022 version 8.0.0 -
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Wrappers for multiple types of GPUs. -More...
- -Go to the source code of this file.
--Macros | |
#define | gpublasCheckErrors(fn) |
Wrappers for multiple types of GPUs.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
---- Distributed SuperLU routine (version 8.0) -- -Lawrence Berkeley National Lab, Univ. of California Berkeley. -May 22, 2022 -
#define gpublasCheckErrors | -( | -- | fn | ) | -- |
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
This page explains how to interpret the graphs that are generated by doxygen.
-Consider the following example:
This will result in the following graph:
-The boxes in the above graph have the following meaning:
-The arrows have the following meaning:
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
Go to the source code of this file.
-Copyright (c) 2003, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from U.S. Dept. of Energy)
-All rights reserved.
-The source code is distributed under BSD license, see the file License.txt at the top-level directory.
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
SuperLU_DIST is a general purpose distributed-memory parallel library for the direct solution of large, sparse, nonsymmetric systems of linear equations. The library is written in C and is callable from either C or Fortran program. It uses MPI and OpenMP to support various forms of parallelism, and is GPU capable (CUDA, HIP, ...) . It supports both real and complex datatypes, both single and double precision, and 64-bit integer indexing. The library routines performs an LU decomposition with static pivoting and triangular system solves through forward and back substitution. The LU factorization routines can handle non-square matrices but the triangular solves are performed only for square matrices.
-The matrix may be preordered (before factorization) either through library or user supplied routines. This preordering for sparsity is completely separate from the factorization. Working precision or extra precision iterative refinement subroutines are provided for improved backward stability and forward accuracy. Routines are also provided to equilibrate the system, calculate the relative backward error, and estimate error bounds for the refined solutions.
-The SuperLU main web site is https://portal.nersc.gov/project/sparse/superlu/
-
- SuperLU Distributed
- 8.1.0
-
- gpu3d-batch
- |
-
#include "lupanels.hpp"