-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Integrate Modified AC-SpGEMM / GALATIC #26
base: master
Are you sure you want to change the base?
Changes from all commits
b9bd4bc
179666b
5080cc3
4cb56c7
ca71c72
b9e075c
2c7bdb9
99c74ba
04138ae
50fa929
ce4a855
dac2a22
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[submodule "ext/moderngpu"] | ||
path = ext/moderngpu | ||
url = https://[email protected]/ctcyang/moderngpu.git | ||
[submodule "ext/cub"] | ||
path = ext/cub | ||
url = https://[email protected]/NVlabs/cub.git | ||
[submodule "ext/GALATIC"] | ||
path = ext/GALATIC | ||
url = https://[email protected]/richardlett/GALATIC.git |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ set( PROJ_PATH ${CMAKE_SOURCE_DIR}) | |
set( PROJ_OUT_PATH ${CMAKE_BINARY_DIR}) | ||
set( PROJ_HEADERS "" ) | ||
set( PROJ_LIBRARIES "" ) | ||
set( PROJ_INCLUDES "./" "ext/moderngpu/include" "ext/cub/cub") | ||
set( PROJ_INCLUDES "./" "ext/moderngpu/include" "ext/GALATIC") | ||
set( mgpu_SRC_FILES "ext/moderngpu/src/mgpucontext.cu" "ext/moderngpu/src/mgpuutil.cpp") | ||
set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin ) | ||
#set( CUDA_CURAND_LIBRARY "$ENV{CUDA_HOME}/lib64/libcurand.so" ) | ||
|
@@ -23,12 +23,12 @@ set( CUDA_CUSPARSE_LIBRARY "$ENV{CUDA_HOME}/lib64/libcusparse.so" ) | |
#FILE( GLOB_RECURSE PROJ_LIBRARIES ext/cublas1.1/*.cu ) | ||
FILE( GLOB_RECURSE PROJ_HEADERS graphblas/*.hpp) | ||
# nvcc flags | ||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch=sm_35 -lineinfo -O3 -use_fast_math -Xptxas=-v") | ||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch=sm_35 -lineinfo -O3 -use_fast_math -Xptxas=-v --expt-relaxed-constexpr ") | ||
#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-fpermissive;-arch=sm_35;-lineinfo;-Xptxas=-v;-dlcm=ca;-maxrregcount=64) | ||
#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_21) | ||
# needed for cudamalloc | ||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | ||
set(CMAKE_CXX_FLAGS "-fpermissive -g -m64 -std=c++11" ) | ||
set(CMAKE_CXX_FLAGS "-fpermissive -g -std=c++14" ) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: one space instead of two between |
||
#set(CMAKE_CXX_FLAGS "-fpermissive -pg -m64 -std=c++11" ) | ||
#set(CMAKE_CXX_FLAGS "-fpermissive -g -m64 -std=c++11 -H" ) | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ include common.mk | |
#------------------------------------------------------------------------------- | ||
|
||
# Includes | ||
INC += -I$(MGPU_DIR) -I$(CUB_DIR) -I$(BOOST_DIR) -I$(GRB_DIR) | ||
INC += -I$(MGPU_DIR) -I$(BOOST_DIR) -I$(GRB_DIR) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: one space instead of two between |
||
|
||
#------------------------------------------------------------------------------- | ||
# Dependency Lists | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
#ifndef GRAPHBLAS_BACKEND_CUDA_SPGEMM_HPP_ | ||
#define GRAPHBLAS_BACKEND_CUDA_SPGEMM_HPP_ | ||
|
||
#include "GALATICMinimumIncludes.cuh" | ||
|
||
#include "graphblas/backend/cuda/sparse_matrix.hpp" | ||
|
||
#include <cuda.h> | ||
|
@@ -108,6 +110,182 @@ Info spgemmMasked(SparseMatrix<c>* C, | |
C->csc_initialized_ = false; | ||
return GrB_SUCCESS; | ||
} | ||
// Shallow copy graphblast sparsematrix -> Galatic dCSR format | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: please add a newline between following template function and above function. |
||
template<typename T> | ||
static void matrixToGalatic(const SparseMatrix<T> *input , dCSR<T>& output) { | ||
output.col_ids = reinterpret_cast<unsigned int*>(input->d_csrColInd_); | ||
output.data = input->d_csrVal_; | ||
output.row_offsets = reinterpret_cast<unsigned int*>(input->d_csrRowPtr_); | ||
output.rows = input->nrows_; | ||
output.cols = input->ncols_; | ||
output.nnz = input->nvals_; | ||
} | ||
|
||
// Shallow copy Galatic dCSR format -> graphblast sparsematrix | ||
template<typename T> | ||
static void galaticToSparse(SparseMatrix<T> *output , const dCSR<T>& input) { | ||
output->d_csrColInd_ = reinterpret_cast<Index*>(input.col_ids); | ||
output->d_csrVal_ = input.data; | ||
output->d_csrRowPtr_ = reinterpret_cast<Index*>(input.row_offsets); | ||
output->nvals_ = input.nnz; | ||
output->ncapacity_ = input.nnz; | ||
} | ||
|
||
// Nullize pointers in Galatic's sparse matrices; | ||
// Galatic's destructors check for null. Doing this will prevent double | ||
// freeing when shallowcopying with matrixToGalatic & galaticToSparse | ||
template<typename T> | ||
static void nullizeGalaticMatrix(dCSR<T>& m) { | ||
m.data = nullptr; | ||
m.col_ids = nullptr; | ||
m.row_offsets = nullptr; | ||
} | ||
|
||
// A generic shim between graphblast's and GALATIC's semiring interfaces | ||
template<typename NativeSR, typename a, typename b, typename c> | ||
struct GalaticSemiring : SemiRing<a, b, c> { | ||
NativeSR nativeSemiring; | ||
|
||
__device__ c multiply(const a& left, const b& right) const | ||
{ return nativeSemiring.mul_op(left, right); } | ||
__device__ c add(const c& left,const c& right) const | ||
{ return nativeSemiring.add_op(left, right); } | ||
__device__ static c AdditiveIdentity() | ||
{ return NativeSR::identity(); } | ||
}; | ||
|
||
template <typename c, typename a, typename b, typename SemiringT> | ||
Info GALATIC_spgemm(SparseMatrix<c>* C, | ||
SemiringT op, | ||
const SparseMatrix<a>* A, | ||
const SparseMatrix<b>* B, | ||
Descriptor* desc) { | ||
|
||
Index A_nrows, A_ncols, A_nvals; | ||
Index B_nrows, B_ncols, B_nvals; | ||
Index C_nrows, C_ncols, C_nvals; | ||
|
||
A_nrows = A->nrows_; | ||
A_ncols = A->ncols_; | ||
A_nvals = A->nvals_; | ||
B_nrows = B->nrows_; | ||
B_ncols = B->ncols_; | ||
B_nvals = B->nvals_; | ||
C_nrows = C->nrows_; | ||
C_ncols = C->ncols_; | ||
|
||
// Dimension compatibility check | ||
if ((A_ncols != B_nrows) || (C_ncols != B_ncols) || (C_nrows != A_nrows)) { | ||
std::cout << "Dim mismatch mxm" << std::endl; | ||
std::cout << A_ncols << " " << B_nrows << std::endl; | ||
std::cout << C_ncols << " " << B_ncols << std::endl; | ||
std::cout << C_nrows << " " << A_nrows << std::endl; | ||
return GrB_DIMENSION_MISMATCH; | ||
} | ||
|
||
if (C->d_csrColInd_ != NULL) { | ||
CUDA_CALL(cudaFree(C->d_csrColInd_)); | ||
CUDA_CALL(cudaFree(C->d_csrVal_)); | ||
C->d_csrColInd_ = NULL; | ||
C->d_csrVal_ = NULL; | ||
} | ||
|
||
if (C->d_csrRowPtr_ != NULL) { | ||
CUDA_CALL(cudaFree(C->d_csrRowPtr_)); | ||
C->d_csrRowPtr_ = NULL; | ||
} | ||
|
||
if (C->h_csrColInd_ != NULL) { | ||
free(C->h_csrColInd_); | ||
free(C->h_csrVal_); | ||
C->h_csrColInd_ = NULL; | ||
C->h_csrVal_ = NULL; | ||
} | ||
|
||
dCSR<c> outMatrixGPU; | ||
dCSR<a> leftInputMatrixGPU; | ||
dCSR<b> rightInputMatrixGPU; | ||
|
||
//shallow copy input matrices to galatic format | ||
matrixToGalatic(A, leftInputMatrixGPU); | ||
matrixToGalatic(B, rightInputMatrixGPU); | ||
|
||
GPUMatrixMatrixMultiplyTraits DefaultTraits; | ||
|
||
// GALATIC has its own semiring interface; | ||
// GalaticSemiring is a shim here for conversion of graphblast-style | ||
// SemiringT type. GalaticSemiring definition is above this function | ||
GalaticSemiring<SemiringT, a, b, c> semiring_shim; | ||
semiring_shim.nativeSemiring = op; | ||
|
||
ExecutionStats stats; | ||
try { | ||
Desc_value nt_mode; | ||
CHECK(desc->get(GrB_NT, &nt_mode)); | ||
const int num_threads = static_cast<int>(nt_mode); | ||
|
||
switch (num_threads) { | ||
case 64: | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
64, 4, 2, 8, 4, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
(leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
case 128: | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
128, 4, 2, 4, 4, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
( leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
case 512: | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
512, 1, 1, 1, 2, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
(leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
default: // 256 | ||
ACSpGEMM::MultiplyImplementation<GalaticSemiring<SemiringT, a, b, c>, | ||
256, 4, 2, 4, 4, 16, 512, 8, 0, a, b, c, | ||
GalaticSemiring<SemiringT, a, b, c>> | ||
(leftInputMatrixGPU, rightInputMatrixGPU, | ||
outMatrixGPU, DefaultTraits, stats, semiring_shim); | ||
break; | ||
} | ||
} catch(std::exception& e) { | ||
std::cerr | ||
<< "Exception occured in GALATIC SpGEMM, called from GALATIC_spgemm\n" | ||
<< "Exception:\n" | ||
<< e.what() | ||
<< std::endl; | ||
return GrB_OUT_OF_MEMORY; //the most likely issue, fixme | ||
} | ||
|
||
// shallow copy to native format. | ||
galaticToSparse(C , outMatrixGPU); | ||
|
||
// prevent allocations being freed twice when destructors are ran, | ||
// as we are doing shallow copies: | ||
// | ||
// A, B -> leftInputMatrixGPU, rightInputMatrixGPU | ||
// outMatrixGPU -> C. | ||
nullizeGalaticMatrix(outMatrixGPU); | ||
nullizeGalaticMatrix(leftInputMatrixGPU); | ||
nullizeGalaticMatrix(rightInputMatrixGPU); | ||
|
||
if (C->h_csrRowPtr_ == NULL) | ||
C->h_csrRowPtr_ = reinterpret_cast<Index*>(malloc((A_nrows+1)* | ||
sizeof(Index))); | ||
C->h_csrColInd_ = reinterpret_cast<Index*>(malloc(C->ncapacity_*sizeof(Index))); | ||
C->h_csrVal_ = reinterpret_cast<c*>(malloc(C->ncapacity_*sizeof(c))); | ||
|
||
C->need_update_ = true; // Set flag that we need to copy data from GPU | ||
C->csr_initialized_ = true; | ||
C->csc_initialized_ = false; | ||
return GrB_SUCCESS; | ||
} | ||
|
||
template <typename c, typename a, typename b, typename m, | ||
typename BinaryOpT, typename SemiringT> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nitpick: Do you mind changing both of these submodule URLs to a consistent format such as
https://github.com/ctcyang/moderngpu.git
andhttps://github.com/richardlett/GALATIC.git
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changed them both to ssh as that will be more universal until repo is public (automatically uses your ssh-key)