From 6c8ff4e21fc498a48da54fa9191cddfb67b0004a Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 26 Jul 2018 04:23:51 +0800 Subject: [PATCH] Re-structure. * Put KmMatrix under gpu/matrix * Use shared utils for both kmeans and KmMatrix. * Rename the namespaces. --- CMakeLists.txt | 3 +- src/common/utils.h | 33 ++++++++ src/gpu/kmeans/KmMatrix/KmConfig.h | 81 ------------------- src/gpu/kmeans/KmMatrix/utils.cuh | 69 ---------------- src/gpu/kmeans/kmeans_general.h | 3 +- src/gpu/kmeans/kmeans_h2o4gpu.cu | 6 +- src/gpu/kmeans/kmeans_init.cu | 32 ++++---- src/gpu/kmeans/kmeans_init.cuh | 21 ++--- src/gpu/kmeans/kmeans_labels.h | 37 ++------- src/gpu/{kmeans => matrix}/KmMatrix/Arith.cu | 18 +++-- src/gpu/{kmeans => matrix}/KmMatrix/Arith.hpp | 10 +-- .../{kmeans => matrix}/KmMatrix/Generator.cuh | 20 ++--- .../{kmeans => matrix}/KmMatrix/Generator.hpp | 8 +- .../KmMatrix/GeneratorKernels.cu | 8 +- .../{kmeans => matrix}/KmMatrix/KmMatrix.cpp | 26 +++--- .../{kmeans => matrix}/KmMatrix/KmMatrix.hpp | 15 ++-- .../KmMatrix/KmMatrixCuda.cu | 15 ++-- .../KmMatrix/KmMatrixCuda.cuh | 8 +- .../KmMatrix/KmMatrixProxy.cpp | 8 +- src/gpu/{kmeans => matrix}/KmMatrix/blas.cuh | 34 ++++---- .../{kmeans/KmMatrix => utils}/GpuInfo.cuh | 11 ++- src/gpu/utils/utils.cuh | 75 ++++++++++++----- tests/cpp/gpu/KmMatrix/test_arith.cu | 6 +- tests/cpp/gpu/KmMatrix/test_matrix.cu | 44 +++++----- tests/cpp/gpu/KmMatrix/test_proxy.cu | 14 ++-- tests/cpp/gpu/kmeans/test_kmeans_init.cu | 16 ++-- 26 files changed, 263 insertions(+), 358 deletions(-) delete mode 100644 src/gpu/kmeans/KmMatrix/KmConfig.h delete mode 100644 src/gpu/kmeans/KmMatrix/utils.cuh rename src/gpu/{kmeans => matrix}/KmMatrix/Arith.cu (94%) rename src/gpu/{kmeans => matrix}/KmMatrix/Arith.hpp (90%) rename src/gpu/{kmeans => matrix}/KmMatrix/Generator.cuh (84%) rename src/gpu/{kmeans => matrix}/KmMatrix/Generator.hpp (80%) rename src/gpu/{kmeans => matrix}/KmMatrix/GeneratorKernels.cu (95%) rename src/gpu/{kmeans => matrix}/KmMatrix/KmMatrix.cpp (95%) rename src/gpu/{kmeans => matrix}/KmMatrix/KmMatrix.hpp (94%) rename src/gpu/{kmeans => matrix}/KmMatrix/KmMatrixCuda.cu (96%) rename src/gpu/{kmeans => matrix}/KmMatrix/KmMatrixCuda.cuh (95%) rename src/gpu/{kmeans => matrix}/KmMatrix/KmMatrixProxy.cpp (95%) rename src/gpu/{kmeans => matrix}/KmMatrix/blas.cuh (94%) rename src/gpu/{kmeans/KmMatrix => utils}/GpuInfo.cuh (88%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 70e8dd524..c696cad27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,8 +94,7 @@ if(USE_CUDA) FILE(GLOB_RECURSE GPU_SOURCES src/*.cu src/*.cuh - src/gpu/kmeans/KmMatrix/*.cpp - src/gpu/kmeans/KmMatrix/*.hpp + src/gpu/matrix/*.cpp src/common/*.cpp src/common/*.h) diff --git a/src/common/utils.h b/src/common/utils.h index 10c97ac03..a2fb9ef7b 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -4,8 +4,14 @@ */ #pragma once #include + +#include +#include + #include "cblas/cblas.h" +#define USE_CUDA() 1 + template void self_dot(std::vector array_in, int n, int dim, std::vector& dots); @@ -19,3 +25,30 @@ void compute_distances(std::vector data_in, std::vector centroids_in, std::vector &pairwise_distances, int n, int dim, int k); + +// Matrix host dev +#define HG_HOSTDEV __host__ __device__ +#define HG_DEV __device__ +#define HG_DEVINLINE __device__ __forceinline__ +#define HG_HOSTDEVINLINE __host__ __device__ __forceinline__ + +#define h2o4gpu_error(x) error(x, __FILE__, __LINE__); + +inline void error(const char* e, const char* file, int line) +{ + std::stringstream ss; + ss << e << " - " << file << "(" << line << ")"; + //throw error_text; + std::cerr << ss.str() << std::endl; + exit(-1); +} + +#define h2o4gpu_check(condition, msg) check(condition, msg, __FILE__, __LINE__); + +inline void check(bool val, const char* e, const char* file, int line) +{ + if (!val) + { + error(e, file, line); + } +} diff --git a/src/gpu/kmeans/KmMatrix/KmConfig.h b/src/gpu/kmeans/KmMatrix/KmConfig.h deleted file mode 100644 index fa718a221..000000000 --- a/src/gpu/kmeans/KmMatrix/KmConfig.h +++ /dev/null @@ -1,81 +0,0 @@ -/*! - * Copyright 2018 H2O.ai, Inc. - * License Apache License Version 2.0 (see LICENSE for details) - */ - -#ifndef KM_CONFIG_H_ -#define KM_CONFIG_H_ - -#define USE_CUDA() 1 - -#include "stdio.h" - -// Matrix host dev -#define M_HOSTDEV __host__ __device__ -#define M_DEV __device__ -#define M_DEVINLINE __device__ __forceinline__ -#define M_HOSTDEVINLINE __host__ __device__ __forceinline__ - -#define CUDA_CHECK(cmd) do { \ - cudaError_t e = cmd; \ - if( e != cudaSuccess ) { \ - printf("Cuda failure %s:%d '%s'\n", \ - __FILE__,__LINE__,cudaGetErrorString(e)); \ - fflush( stdout ); \ - exit(EXIT_FAILURE); \ - } \ - } while(0) - -#define CUBLAS_CHECK(cmd) do { \ - cublasStatus_t status = cmd; \ - if ( status != CUBLAS_STATUS_SUCCESS) { \ - const char* errmsg = nullptr; \ - switch(status) { \ - case CUBLAS_STATUS_NOT_INITIALIZED: \ - errmsg = "library not initialized"; \ - break; \ - \ - case CUBLAS_STATUS_ALLOC_FAILED: \ - errmsg = "resource allocation failed"; \ - break; \ - \ - case CUBLAS_STATUS_INVALID_VALUE: \ - errmsg = "an invalid numeric value was used as an argument"; \ - break; \ - \ - case CUBLAS_STATUS_ARCH_MISMATCH: \ - errmsg = "an absent device architectural feature is required"; \ - break; \ - \ - case CUBLAS_STATUS_MAPPING_ERROR: \ - errmsg = "an access to GPU memory space failed"; \ - break; \ - \ - case CUBLAS_STATUS_EXECUTION_FAILED: \ - errmsg = "the GPU program failed to execute"; \ - break; \ - \ - case CUBLAS_STATUS_INTERNAL_ERROR: \ - errmsg = "an internal operation failed"; \ - break; \ - \ - default: \ - errmsg = "unknown error"; \ - break; \ - } \ - printf("%s", errmsg); \ - } \ - } while (false) - -#define M_ERROR(msg) \ - printf("%s\n\t in %s, %u, %s\n", msg, __FILE__, __LINE__, \ - __PRETTY_FUNCTION__); \ - abort(); - -#define M_USER_ERROR(msg) \ - fprintf(stderr, \ - "%s\n\t in %s, %u, %s\n", msg, __FILE__, __LINE__, \ - __PRETTY_FUNCTION__); \ - exit(1) - -#endif // KM_CONFIG_H_ diff --git a/src/gpu/kmeans/KmMatrix/utils.cuh b/src/gpu/kmeans/KmMatrix/utils.cuh deleted file mode 100644 index 606359fda..000000000 --- a/src/gpu/kmeans/KmMatrix/utils.cuh +++ /dev/null @@ -1,69 +0,0 @@ -/*! - * Copyright 2018 H2O.ai, Inc. - * License Apache License Version 2.0 (see LICENSE for details) - */ - -#ifndef UTILS_CUH_ -#define UTILS_CUH_ - -#include "GpuInfo.cuh" - -namespace H2O4GPU { -namespace KMeans { - -M_DEVINLINE size_t global_thread_idx () { - return threadIdx.x + blockIdx.x * blockDim.x; -} - -M_DEVINLINE size_t global_thread_idy () { - return threadIdx.y + blockIdx.y * blockDim.y; -} - -M_DEVINLINE size_t grid_stride_x () { - return blockDim.x * gridDim.x; -} - -M_DEVINLINE size_t grid_stride_y () { - return blockDim.y * gridDim.y; -} - -template -T1 M_HOSTDEVINLINE div_roundup(const T1 a, const T2 b) { - return static_cast(ceil(static_cast(a) / b)); -} - - -// Work around for shared memory -// https://stackoverflow.com/questions/20497209/getting-cuda-error-declaration-is-incompatible-with-previous-variable-name -template -struct KmSharedMem; - -template <> -struct KmSharedMem { - __device__ float * ptr() { - extern __shared__ __align__(sizeof(float)) float s_float[]; - return s_float; - } -}; - -template <> -struct KmSharedMem { - __device__ double * ptr() { - extern __shared__ __align__(sizeof(double)) double s_double[]; - return s_double; - } -}; - -template <> -struct KmSharedMem { - __device__ int * ptr() { - extern __shared__ __align__(sizeof(int)) int s_int[]; - return s_int; - } -}; - - -} // KMeans -} // H2O4GPU - -#endif // UTILS_CUH_ \ No newline at end of file diff --git a/src/gpu/kmeans/kmeans_general.h b/src/gpu/kmeans/kmeans_general.h index 9697a48e3..d32dcbce8 100644 --- a/src/gpu/kmeans/kmeans_general.h +++ b/src/gpu/kmeans/kmeans_general.h @@ -4,6 +4,7 @@ */ #pragma once #include "../../common/logger.h" +#include "../utils/utils.cuh" #include "stdio.h" #define MAX_NGPUS 16 @@ -13,8 +14,6 @@ // TODO(pseudotensor): Avoid throw for python exception handling. Need to avoid all exit's and return exit code all the way back. #define gpuErrchk(ans) { gpu_assert((ans), __FILE__, __LINE__); } -#define safe_cuda(ans) throw_on_cuda_error((ans), __FILE__, __LINE__); -#define safe_cublas(ans) throw_on_cublas_error((ans), __FILE__, __LINE__); #define CUDACHECK(cmd) do { \ cudaError_t e = cmd; \ diff --git a/src/gpu/kmeans/kmeans_h2o4gpu.cu b/src/gpu/kmeans/kmeans_h2o4gpu.cu index a07fd5087..224070503 100644 --- a/src/gpu/kmeans/kmeans_h2o4gpu.cu +++ b/src/gpu/kmeans/kmeans_h2o4gpu.cu @@ -774,9 +774,9 @@ int kmeans_fit(int verbose, int seed, int gpu_idtry, int n_gputry, thrust::device, data[i]->begin(), data[i]->end(), h_init_data.begin()); } - H2O4GPU::KMeans::KmMatrix init_data(h_init_data, rows, cols); - H2O4GPU::KMeans::KmMatrix final_centroids_matrix = - H2O4GPU::KMeans::KmeansLlInit(seed, 1.5)(init_data, k); + h2o4gpu::kMeans::KmMatrix init_data(h_init_data, rows, cols); + h2o4gpu::kMeans::KmMatrix final_centroids_matrix = + h2o4gpu::kMeans::KmeansLlInit(seed, 1.5)(init_data, k); thrust::host_vector final_centroids (final_centroids_matrix.size()); thrust::copy( final_centroids_matrix.dev_ptr(), diff --git a/src/gpu/kmeans/kmeans_init.cu b/src/gpu/kmeans/kmeans_init.cu index 3a93cc138..091023a9c 100644 --- a/src/gpu/kmeans/kmeans_init.cu +++ b/src/gpu/kmeans/kmeans_init.cu @@ -15,14 +15,16 @@ #include "kmeans_init.cuh" -#include "KmMatrix/KmMatrix.hpp" -#include "KmMatrix/Arith.hpp" -#include "KmMatrix/utils.cuh" -#include "KmMatrix/GpuInfo.cuh" -#include "KmMatrix/blas.cuh" +#include "../matrix/KmMatrix/KmMatrix.hpp" +#include "../matrix/KmMatrix/Arith.hpp" +#include "../matrix/KmMatrix/blas.cuh" +#include "../utils/utils.cuh" +#include "../utils/GpuInfo.cuh" -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace kMeans { + +using namespace Matrix; namespace kernel { // X^2 + Y^2, here only calculates the + operation. @@ -116,7 +118,7 @@ KmMatrix PairWiseDistanceOp::operator()(KmMatrix& _data, data_dot_.k_param(), centroids_dot_.k_param()); - CUDA_CHECK(cudaGetLastError()); + safe_cuda(cudaGetLastError()); cublasHandle_t handle = GpuInfo::ins().cublas_handle(); @@ -191,7 +193,7 @@ KmMatrix GreedyRecluster::recluster(KmMatrix& _centroids, size_t _k) { void *d_temp_storage = NULL; // determine the temp_storage_bytes - CUDA_CHECK(cub::DeviceHistogram::HistogramEven( + safe_cuda(cub::DeviceHistogram::HistogramEven( d_temp_storage, temp_storage_bytes, min_indices.dev_ptr(), weights.dev_ptr(), @@ -200,8 +202,8 @@ KmMatrix GreedyRecluster::recluster(KmMatrix& _centroids, size_t _k) { (T)min_indices.rows(), (int)_centroids.rows())); - CUDA_CHECK(cudaMalloc((void**)&d_temp_storage, temp_storage_bytes)); - CUDA_CHECK(cub::DeviceHistogram::HistogramEven( + safe_cuda(cudaMalloc((void**)&d_temp_storage, temp_storage_bytes)); + safe_cuda(cub::DeviceHistogram::HistogramEven( d_temp_storage, temp_storage_bytes, min_indices.dev_ptr(), // d_samples weights.dev_ptr(), // d_histogram @@ -209,7 +211,7 @@ KmMatrix GreedyRecluster::recluster(KmMatrix& _centroids, size_t _k) { (T)0.0, // lower_level (T)min_indices.rows(), // upper_level (int)_centroids.rows())); // num_samples - CUDA_CHECK(cudaFree(d_temp_storage)); + safe_cuda(cudaFree(d_temp_storage)); // Sort the indices by weights in ascending order, then use those at front // as result. @@ -352,7 +354,7 @@ KmeansLlInit::operator()(KmMatrix& _data, size_t _k) { "k must be less than or equal to the number of data points" ", k: %lu, data points: %lu", _k, _data.rows()); - M_USER_ERROR(err_msg); + h2o4gpu_error(err_msg); } if (seed_ < 0) { @@ -433,5 +435,5 @@ INSTANTIATE(int) #undef INSTANTIATE } -} // namespace Kmeans -} // namespace H2O4GPU +} // namespace kMeans +} // namespace h2o4gpu diff --git a/src/gpu/kmeans/kmeans_init.cuh b/src/gpu/kmeans/kmeans_init.cuh index 6f067fc9f..28a055c2a 100644 --- a/src/gpu/kmeans/kmeans_init.cuh +++ b/src/gpu/kmeans/kmeans_init.cuh @@ -9,17 +9,18 @@ #include -#include "KmMatrix/KmConfig.h" -#include "KmMatrix/KmMatrix.hpp" -#include "KmMatrix/utils.cuh" -#include "KmMatrix/Generator.hpp" -#include "KmMatrix/Generator.cuh" -#include "KmMatrix/GpuInfo.cuh" +#include "../matrix/KmMatrix/KmMatrix.hpp" +#include "../matrix/KmMatrix/Generator.hpp" +#include "../matrix/KmMatrix/Generator.cuh" +#include "../utils/GpuInfo.cuh" +#include "../utils/utils.cuh" constexpr double ESP = 1e-8; -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace kMeans { + +using namespace Matrix; namespace detail { @@ -202,7 +203,7 @@ struct KmeansLlInit : public KmeansInitBase { // FIXME: Make kmeans++ a derived class of KmeansInitBase -} // namespace Kmeans -} // namespace H2O4GPU +} // namespace kMeans +} // namespace h2o4gpu #endif // KMEANS_INIT_H_ \ No newline at end of file diff --git a/src/gpu/kmeans/kmeans_labels.h b/src/gpu/kmeans/kmeans_labels.h index ada7f286e..87b7b5ed9 100644 --- a/src/gpu/kmeans/kmeans_labels.h +++ b/src/gpu/kmeans/kmeans_labels.h @@ -9,9 +9,13 @@ #include #include #include -#include "kmeans_general.h" #include +#include "kmeans_general.h" +#include "../utils/utils.cuh" + +using namespace h2o4gpu; + inline void gpu_assert(cudaError_t code, const char *file, int line, bool abort=true) { if (code != cudaSuccess) { fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); @@ -23,20 +27,6 @@ inline void gpu_assert(cudaError_t code, const char *file, int line, bool abort= } } - -inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file, - int line) { - if (code != cudaSuccess) { - std::stringstream ss; - ss << file << "(" << line << ")"; - std::string file_and_line; - ss >> file_and_line; - thrust::system_error(code, thrust::cuda_category(), file_and_line); - } - - return code; -} - #ifdef CUBLAS_API_H_ // cuBLAS API errors static const char *cudaGetErrorEnum(cublasStatus_t error) @@ -72,23 +62,6 @@ static const char *cudaGetErrorEnum(cublasStatus_t error) } #endif -inline cublasStatus_t throw_on_cublas_error(cublasStatus_t code, const char *file, - int line) { - - - if (code != CUBLAS_STATUS_SUCCESS) { - fprintf(stderr,"cublas error: %s %s %d\n", cudaGetErrorEnum(code), file, line); - std::stringstream ss; - ss << file << "(" << line << ")"; - std::string file_and_line; - ss >> file_and_line; - thrust::system_error(code, thrust::cuda_category(), file_and_line); - } - - return code; -} - - extern cudaStream_t cuda_stream[MAX_NGPUS]; template diff --git a/src/gpu/kmeans/KmMatrix/Arith.cu b/src/gpu/matrix/KmMatrix/Arith.cu similarity index 94% rename from src/gpu/kmeans/KmMatrix/Arith.cu rename to src/gpu/matrix/KmMatrix/Arith.cu index 88bc9d920..2f9c4ccd5 100644 --- a/src/gpu/kmeans/KmMatrix/Arith.cu +++ b/src/gpu/matrix/KmMatrix/Arith.cu @@ -1,6 +1,8 @@ #include "Arith.hpp" -namespace H2O4GPU { -namespace KMeans { +#include "../../utils/GpuInfo.cuh" + +namespace h2o4gpu { +namespace Matrix { namespace kernel { @@ -121,7 +123,9 @@ KmMatrix ArgMinOp::argmin(KmMatrix& _val, KmMatrixDim _dim) { return _res; } else { // FIXME - M_ERROR("Not implemented"); + h2o4gpu_error("Not implemented"); + KmMatrix res; + return res; } } @@ -135,7 +139,9 @@ KmMatrix MinOp::min(KmMatrix& _val, KmMatrixDim _dim) { return _res; } else { // FIXME - M_ERROR("Not implemented"); + h2o4gpu_error("Not implemented"); + KmMatrix res; + return res; } } @@ -159,5 +165,5 @@ INSTANTIATE(double) INSTANTIATE(float) INSTANTIATE(int) -} // namespace KMenas -} // namespace H204GPU \ No newline at end of file +} // namespace Matrix +} // namespace h2o4gpu \ No newline at end of file diff --git a/src/gpu/kmeans/KmMatrix/Arith.hpp b/src/gpu/matrix/KmMatrix/Arith.hpp similarity index 90% rename from src/gpu/kmeans/KmMatrix/Arith.hpp rename to src/gpu/matrix/KmMatrix/Arith.hpp index 3ae493455..7760cd6aa 100644 --- a/src/gpu/kmeans/KmMatrix/Arith.hpp +++ b/src/gpu/matrix/KmMatrix/Arith.hpp @@ -3,10 +3,10 @@ #include "KmMatrix.hpp" #include "blas.cuh" -#include "utils.cuh" +#include "../../utils/utils.cuh" -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { // FIXME: Using struct for operations is just keeping the possibility of // creating an unified operations for KmMatrix. For example, let KmMatrix @@ -52,7 +52,7 @@ struct MinOp { KmMatrix min(KmMatrix& _val, KmMatrixDim _dim); }; -} // namespace KMenas -} // namespace H204GPU +} // namespace Matrix +} // namespace h2o4gpu #endif // M_ARITH_HPP_ diff --git a/src/gpu/kmeans/KmMatrix/Generator.cuh b/src/gpu/matrix/KmMatrix/Generator.cuh similarity index 84% rename from src/gpu/kmeans/KmMatrix/Generator.cuh rename to src/gpu/matrix/KmMatrix/Generator.cuh index 890729f3c..061edbe96 100644 --- a/src/gpu/kmeans/KmMatrix/Generator.cuh +++ b/src/gpu/matrix/KmMatrix/Generator.cuh @@ -10,11 +10,11 @@ #include "Generator.hpp" #include "KmMatrix.hpp" -#include "utils.cuh" +#include "../../utils/utils.cuh" -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { namespace kernel { // Split the definition to avoid multiple definition. @@ -45,9 +45,9 @@ struct UniformGenerator : public GeneratorBase { random_numbers_ = KmMatrix (1, size_); if (dev_states_ != nullptr) { - CUDA_CHECK(cudaFree(dev_states_)); + safe_cuda(cudaFree(dev_states_)); } - CUDA_CHECK(cudaMalloc((void **)&dev_states_, size_ * sizeof(curandState))); + safe_cuda(cudaMalloc((void **)&dev_states_, size_ * sizeof(curandState))); kernel::setup_random_states<<>>( seed_, dev_states_, size_); } @@ -60,7 +60,7 @@ struct UniformGenerator : public GeneratorBase { UniformGenerator (size_t _size, int _seed) { if (_size == 0) { - M_ERROR("Zero size for generate is not allowed."); + h2o4gpu_error("Zero size for generate is not allowed."); } initialize(_size); } @@ -70,7 +70,7 @@ struct UniformGenerator : public GeneratorBase { ~UniformGenerator () { if (dev_states_ != nullptr) { - CUDA_CHECK(cudaFree(dev_states_)); + safe_cuda(cudaFree(dev_states_)); } } @@ -87,7 +87,7 @@ struct UniformGenerator : public GeneratorBase { KmMatrix generate(size_t _size) override { if (_size == 0) { - M_ERROR("Zero size for generate is not allowed."); + h2o4gpu_error("Zero size for generate is not allowed."); } if (_size != size_) { initialize(_size); @@ -96,5 +96,5 @@ struct UniformGenerator : public GeneratorBase { } }; -} // H2O4GPU -} // KMeans \ No newline at end of file +} // namespace h2o4gpu +} // namespace Matrix \ No newline at end of file diff --git a/src/gpu/kmeans/KmMatrix/Generator.hpp b/src/gpu/matrix/KmMatrix/Generator.hpp similarity index 80% rename from src/gpu/kmeans/KmMatrix/Generator.hpp rename to src/gpu/matrix/KmMatrix/Generator.hpp index 7200b85ff..9ef1ae9ac 100644 --- a/src/gpu/kmeans/KmMatrix/Generator.hpp +++ b/src/gpu/matrix/KmMatrix/Generator.hpp @@ -8,8 +8,8 @@ #include "KmMatrix.hpp" -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { template class GeneratorBase { @@ -18,8 +18,8 @@ class GeneratorBase { virtual KmMatrix generate(size_t _size) = 0; }; -} -} +} // namespace Matrix +} // namespace h2o4gpu #endif // GENERATOR_HPP_ diff --git a/src/gpu/kmeans/KmMatrix/GeneratorKernels.cu b/src/gpu/matrix/KmMatrix/GeneratorKernels.cu similarity index 95% rename from src/gpu/kmeans/KmMatrix/GeneratorKernels.cu rename to src/gpu/matrix/KmMatrix/GeneratorKernels.cu index 28b521f26..e81885f12 100644 --- a/src/gpu/kmeans/KmMatrix/GeneratorKernels.cu +++ b/src/gpu/matrix/KmMatrix/GeneratorKernels.cu @@ -6,8 +6,8 @@ #include #include -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { namespace kernel { __global__ void setup_random_states(int _seed, curandState *_state, @@ -59,5 +59,5 @@ __global__ void generate_uniform_kernel(int *_res, } } // namespace kernel -} // namespace KMeans -} // namespace H2O4GPU \ No newline at end of file +} // namespace Matrix +} // namespace h2o4gpu \ No newline at end of file diff --git a/src/gpu/kmeans/KmMatrix/KmMatrix.cpp b/src/gpu/matrix/KmMatrix/KmMatrix.cpp similarity index 95% rename from src/gpu/kmeans/KmMatrix/KmMatrix.cpp rename to src/gpu/matrix/KmMatrix/KmMatrix.cpp index d350bd7e2..67d3f3207 100644 --- a/src/gpu/kmeans/KmMatrix/KmMatrix.cpp +++ b/src/gpu/matrix/KmMatrix/KmMatrix.cpp @@ -4,13 +4,13 @@ */ #include "KmMatrix.hpp" -#include "KmConfig.h" + #if USE_CUDA() #include "KmMatrixCuda.cuh" #endif -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { // ============================== // KmMatrixImpl implementation @@ -222,7 +222,7 @@ KmMatrixProxy KmMatrix::row(size_t idx, bool dev_mem) { template KmMatrixProxy KmMatrix::col(size_t idx) { - M_ERROR("Not implemented."); + h2o4gpu_error("Not implemented."); return KmMatrixProxy(*this, 0, 0, 0); } @@ -236,14 +236,14 @@ KmMatrix KmMatrix::rows(KmMatrix& _index) { } res = impls[(int)Backend::CUDADense]->rows(_index); } else { - M_ERROR("Not implemented."); + h2o4gpu_error("Not implemented."); } return res; } template KmMatrix KmMatrix::cols(KmMatrix& _index) { - M_ERROR("Not implemented."); + h2o4gpu_error("Not implemented."); KmMatrix res; return res; } @@ -254,7 +254,7 @@ bool KmMatrix::operator==(KmMatrix& _rhs) { bool res = impls[(int)Backend::CUDADense]->equal(_rhs); return res; } else { - M_ERROR("Not implemented."); + h2o4gpu_error("Not implemented."); return false; } } @@ -266,17 +266,17 @@ KmMatrix KmMatrix::stack(KmMatrix &_second, if (_dim == KmMatrixDim::ROW) { if (cols() != _second.cols()) { - M_ERROR("Columns of first is not equal to second."); + h2o4gpu_error("Columns of first is not equal to second."); } if (backend_ == Backend::CUDADense) { res = impls[(int)Backend::CUDADense]->stack(_second, _dim); } else { - M_ERROR("Not implemented."); + h2o4gpu_error("Not implemented."); } } else { - M_ERROR("Not implemented."); + h2o4gpu_error("Not implemented."); } return res; @@ -342,7 +342,7 @@ KmMatrix stack(KmMatrix& _first, KmMatrix& _second, template KmMatrix KmMatrix::cols(KmMatrix& _index); \ template bool KmMatrix::operator==(KmMatrix &_rhs); \ template KmMatrix KmMatrix::stack(KmMatrix &_second, \ - H2O4GPU::KMeans::KmMatrixDim _dim); \ + KmMatrixDim _dim); \ /* Helper functions */ \ template std::ostream& operator<<(std::ostream& os, KmMatrix& m); \ template KmMatrix stack(KmMatrix& _first, KmMatrix& _second, \ @@ -354,5 +354,5 @@ INSTANTIATE(double) INSTANTIATE(int) #undef INSTANTIATE -} // namespace KMeans -} // namepsace H2O4GPU +} // namespace Matrix +} // namepsace h2o4gpu diff --git a/src/gpu/kmeans/KmMatrix/KmMatrix.hpp b/src/gpu/matrix/KmMatrix/KmMatrix.hpp similarity index 94% rename from src/gpu/kmeans/KmMatrix/KmMatrix.hpp rename to src/gpu/matrix/KmMatrix/KmMatrix.hpp index 31ad2dcd4..dbeb1943e 100644 --- a/src/gpu/kmeans/KmMatrix/KmMatrix.hpp +++ b/src/gpu/matrix/KmMatrix/KmMatrix.hpp @@ -11,15 +11,14 @@ #include #include #include - -#include "KmConfig.h" +#include "../../../common/utils.h" #if USE_CUDA() #include "KmMatrixCuda.cuh" #endif -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { template class KmMatrixProxy; @@ -53,13 +52,13 @@ struct kParam { cols = _other.cols; ptr = _other.ptr; } - M_HOSTDEV void operator=(const kParam& _other) { + HG_HOSTDEV void operator=(const kParam& _other) { rows = _other.rows; cols = _other.cols; ptr = _other.ptr; } - M_HOSTDEV size_t size() const { + HG_HOSTDEV size_t size() const { return rows * cols; } }; @@ -193,7 +192,7 @@ struct KmMatrixSizeError: public std::runtime_error {} }; -} // namespace KMeans -} // namespace H2O4GPU +} // namespace Matrix +} // namespace h2o4gpu #endif diff --git a/src/gpu/kmeans/KmMatrix/KmMatrixCuda.cu b/src/gpu/matrix/KmMatrix/KmMatrixCuda.cu similarity index 96% rename from src/gpu/kmeans/KmMatrix/KmMatrixCuda.cu rename to src/gpu/matrix/KmMatrix/KmMatrixCuda.cu index a2d9a3c29..6b6052ce1 100644 --- a/src/gpu/kmeans/KmMatrix/KmMatrixCuda.cu +++ b/src/gpu/matrix/KmMatrix/KmMatrixCuda.cu @@ -12,8 +12,8 @@ #include "KmMatrixCuda.cuh" #include "KmMatrix.hpp" -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { template CudaKmMatrixImpl::CudaKmMatrixImpl(KmMatrix * _par) : @@ -173,7 +173,7 @@ KmMatrix CudaKmMatrixImpl::stack(KmMatrix& _second, KmMatrixDim _dim) { if (_dim == KmMatrixDim::ROW) { if (KmMatrixImpl::matrix_->cols() != _second.cols()) { - M_ERROR("Columns of first is not equal to second."); + h2o4gpu_error("Columns of first is not equal to second."); } host_to_device(); @@ -195,7 +195,9 @@ KmMatrix CudaKmMatrixImpl::stack(KmMatrix& _second, return res; } else { // FIXME - M_ERROR("Not implemented."); + h2o4gpu_error("Not implemented."); + KmMatrix res; + return res; } } @@ -229,5 +231,6 @@ INSTANTIATE(double) INSTANTIATE(int) #undef INSTANTIATE -} // namespace H204GPU -} // namespace Array + +} // namespace Matrix +} // namespace h2o4gpu diff --git a/src/gpu/kmeans/KmMatrix/KmMatrixCuda.cuh b/src/gpu/matrix/KmMatrix/KmMatrixCuda.cuh similarity index 95% rename from src/gpu/kmeans/KmMatrix/KmMatrixCuda.cuh rename to src/gpu/matrix/KmMatrix/KmMatrixCuda.cuh index 50500f20e..02f8f6adc 100644 --- a/src/gpu/kmeans/KmMatrix/KmMatrixCuda.cuh +++ b/src/gpu/matrix/KmMatrix/KmMatrixCuda.cuh @@ -10,8 +10,8 @@ #include "thrust/device_vector.h" #include -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { template class KmMatrix; @@ -81,7 +81,7 @@ class CudaKmMatrixImpl : public KmMatrixImpl { virtual bool on_device() const override; }; -} // MkMatrix -} // H204GPU +} // namespace Matrix +} // namespace h2o4gpu #endif diff --git a/src/gpu/kmeans/KmMatrix/KmMatrixProxy.cpp b/src/gpu/matrix/KmMatrix/KmMatrixProxy.cpp similarity index 95% rename from src/gpu/kmeans/KmMatrix/KmMatrixProxy.cpp rename to src/gpu/matrix/KmMatrix/KmMatrixProxy.cpp index 0bdfa9ebf..9e1d5ffd9 100644 --- a/src/gpu/kmeans/KmMatrix/KmMatrixProxy.cpp +++ b/src/gpu/matrix/KmMatrix/KmMatrixProxy.cpp @@ -5,8 +5,8 @@ #include "KmMatrix.hpp" -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { template KmMatrixProxy::KmMatrixProxy(KmMatrix& _other, @@ -64,5 +64,5 @@ INSTANTIATE(float) INSTANTIATE(double) INSTANTIATE(int) -} -} +} // namespace Matrix +} // namespace h2o4gpu diff --git a/src/gpu/kmeans/KmMatrix/blas.cuh b/src/gpu/matrix/KmMatrix/blas.cuh similarity index 94% rename from src/gpu/kmeans/KmMatrix/blas.cuh rename to src/gpu/matrix/KmMatrix/blas.cuh index 20cf73151..50b69898b 100644 --- a/src/gpu/kmeans/KmMatrix/blas.cuh +++ b/src/gpu/matrix/KmMatrix/blas.cuh @@ -7,12 +7,12 @@ #define KM_BLAS_CUH_ #include -#include "KmConfig.h" +#include "../../utils/utils.cuh" // C++ Wrappers for cublas -namespace H2O4GPU { -namespace KMeans { +namespace h2o4gpu { +namespace Matrix { namespace Blas { // LEVEL 1 @@ -20,7 +20,7 @@ inline void axpy(cublasHandle_t handle, int n, const double *alpha, const double *x, int incx, double *y, int incy) { - CUBLAS_CHECK(cublasDaxpy(handle, n, + safe_cublas(cublasDaxpy(handle, n, alpha, x, incx, y, incy));} @@ -29,7 +29,7 @@ inline void axpy(cublasHandle_t handle, int n, const float *alpha, const float *x, int incx, float *y, int incy) { - CUBLAS_CHECK(cublasSaxpy(handle, n, + safe_cublas(cublasSaxpy(handle, n, alpha, x, incx, y, incy));} @@ -38,7 +38,7 @@ inline void axpy(cublasHandle_t handle, int n, const int *alpha, const int *x, int incx, int *y, int incy) { - CUBLAS_CHECK(cublasSaxpy(handle, n, + safe_cublas(cublasSaxpy(handle, n, (const float *)alpha, (const float *)x, incx, (float *)y, incy));} @@ -57,7 +57,7 @@ inline void gemm(cublasHandle_t handle, const float *beta, /* host or device pointer */ float *C, int ldc) { - CUBLAS_CHECK(cublasSgemm(handle, + safe_cublas(cublasSgemm(handle, transa, transb, m, n, k, alpha, /* host or device pointer */ @@ -80,7 +80,7 @@ inline void gemm(cublasHandle_t handle, const double *beta, /* host or device pointer */ double *C, int ldc) { - CUBLAS_CHECK(cublasDgemm(handle, + safe_cublas(cublasDgemm(handle, transa, transb, m, @@ -109,7 +109,7 @@ inline void gemm(cublasHandle_t handle, const int *beta, /* host or device pointer */ int *C, int ldc) { - CUBLAS_CHECK(cublasSgemm(handle, + safe_cublas(cublasSgemm(handle, transa, transb, m, n, k, (const float*)alpha, /* host or device pointer */ @@ -129,7 +129,7 @@ inline void gemm_batched(cublasHandle_t handle, const double *beta, double *Carray[], int ldc, int batchCount) { - CUBLAS_CHECK(cublasDgemmBatched(handle, + safe_cublas(cublasDgemmBatched(handle, transa, transb, m, n, k, @@ -151,7 +151,7 @@ inline void gemm_batched(cublasHandle_t handle, const float *beta, float *Carray[], int ldc, int batchCount) { - CUBLAS_CHECK(cublasSgemmBatched(handle, + safe_cublas(cublasSgemmBatched(handle, transa, transb, m, n, k, @@ -173,7 +173,7 @@ inline void gemm_batched(cublasHandle_t handle, const int *beta, float *Carray[], int ldc, int batchCount) { - CUBLAS_CHECK(cublasSgemmBatched(handle, + safe_cublas(cublasSgemmBatched(handle, transa, transb, m, n, k, @@ -196,7 +196,7 @@ inline void gemm_strided_batched( const double* beta, double* C, int ldC, int strideC, int batchCount) { - CUBLAS_CHECK(cublasDgemmStridedBatched(handle, + safe_cublas(cublasDgemmStridedBatched(handle, transA, transB, M, N, K, @@ -221,7 +221,7 @@ inline void gemm_strided_batched( const float* beta, float* C, int ldC, int strideC, int batchCount) { - CUBLAS_CHECK(cublasSgemmStridedBatched(handle, + safe_cublas(cublasSgemmStridedBatched(handle, transA, transB, M, N, K, @@ -246,7 +246,7 @@ inline void gemm_strided_batched( const int* beta, int* C, int ldC, int strideC, int batchCount) { - CUBLAS_CHECK(cublasSgemmStridedBatched(handle, + safe_cublas(cublasSgemmStridedBatched(handle, transA, transB, M, N, K, @@ -262,7 +262,7 @@ inline void gemm_strided_batched( } } // Blas -} // KMeans -} // H2O4GPU +} // Matrix +} // h2o4gpu #endif // KM_BLAS_CUH_ \ No newline at end of file diff --git a/src/gpu/kmeans/KmMatrix/GpuInfo.cuh b/src/gpu/utils/GpuInfo.cuh similarity index 88% rename from src/gpu/kmeans/KmMatrix/GpuInfo.cuh rename to src/gpu/utils/GpuInfo.cuh index 22fe53e8d..0a57644f7 100644 --- a/src/gpu/kmeans/KmMatrix/GpuInfo.cuh +++ b/src/gpu/utils/GpuInfo.cuh @@ -6,13 +6,14 @@ #ifndef GPU_INFO_HPP_ #define GPU_INFO_HPP_ -#include "KmConfig.h" +#include "utils.cuh" #include #include #include +namespace h2o4gpu { // Singleton class storing gpu info. // Call GpuInfo::ins() to use the class; class GpuInfo { @@ -23,19 +24,19 @@ class GpuInfo { public: GpuInfo () { - CUDA_CHECK(cudaGetDeviceCount(&n_gpu_)); + safe_cuda(cudaGetDeviceCount(&n_gpu_)); n_sm_ = (int*) malloc (n_gpu_); handles_ = (cublasHandle_t*) malloc (n_gpu_); for (int i = 0; i < n_gpu_; ++i) { cudaDeviceGetAttribute(&n_sm_[i], cudaDevAttrMultiProcessorCount, i); - CUBLAS_CHECK(cublasCreate(&handles_[i])); + safe_cublas(cublasCreate(&handles_[i])); } } ~GpuInfo () { free (n_sm_); for (int i = 0; i < n_gpu_; ++i) { - CUBLAS_CHECK(cublasDestroy(handles_[i])); + safe_cublas(cublasDestroy(handles_[i])); } free (handles_); } @@ -74,4 +75,6 @@ class GpuInfo { } }; +} // namespace h2o4gpu + #endif // GPU_INFO_HPP_ diff --git a/src/gpu/utils/utils.cuh b/src/gpu/utils/utils.cuh index 3f35f375d..fed9e46e1 100644 --- a/src/gpu/utils/utils.cuh +++ b/src/gpu/utils/utils.cuh @@ -9,29 +9,10 @@ #include #include +#include "../../common/utils.h" + namespace h2o4gpu { -#define h2o4gpu_error(x) error(x, __FILE__, __LINE__); - - inline void error(const char* e, const char* file, int line) - { - std::stringstream ss; - ss << e << " - " << file << "(" << line << ")"; - //throw error_text; - std::cerr << ss.str() << std::endl; - exit(-1); - } - -#define h2o4gpu_check(condition, msg) check(condition, msg, __FILE__, __LINE__); - - inline void check(bool val, const char* e, const char* file, int line) - { - if (!val) - { - error(e, file, line); - } - } - #define safe_cuda(ans) throw_on_cuda_error((ans), __FILE__, __LINE__) @@ -366,4 +347,56 @@ namespace h2o4gpu return idx * col_size; }); } + +HG_DEVINLINE size_t global_thread_idx () { + return threadIdx.x + blockIdx.x * blockDim.x; +} + +HG_DEVINLINE size_t global_thread_idy () { + return threadIdx.y + blockIdx.y * blockDim.y; +} + +HG_DEVINLINE size_t grid_stride_x () { + return blockDim.x * gridDim.x; +} + +HG_DEVINLINE size_t grid_stride_y () { + return blockDim.y * gridDim.y; +} + +template +T1 HG_HOSTDEVINLINE div_roundup(const T1 a, const T2 b) { + return static_cast(ceil(static_cast(a) / b)); +} + + +// Work around for shared memory +// https://stackoverflow.com/questions/20497209/getting-cuda-error-declaration-is-incompatible-with-previous-variable-name +template +struct KernelSharedMem; + +template <> +struct KernelSharedMem { + __device__ float * ptr() { + extern __shared__ __align__(sizeof(float)) float s_float[]; + return s_float; + } +}; + +template <> +struct KernelSharedMem { + __device__ double * ptr() { + extern __shared__ __align__(sizeof(double)) double s_double[]; + return s_double; + } +}; + +template <> +struct KernelSharedMem { + __device__ int * ptr() { + extern __shared__ __align__(sizeof(int)) int s_int[]; + return s_int; + } +}; + } diff --git a/tests/cpp/gpu/KmMatrix/test_arith.cu b/tests/cpp/gpu/KmMatrix/test_arith.cu index fda0db018..81bf4101c 100644 --- a/tests/cpp/gpu/KmMatrix/test_arith.cu +++ b/tests/cpp/gpu/KmMatrix/test_arith.cu @@ -2,12 +2,12 @@ #include #include -#include "../../../../src/gpu/kmeans/KmMatrix/KmMatrix.hpp" -#include "../../../../src/gpu/kmeans/KmMatrix/Arith.hpp" +#include "../../../../src/gpu/matrix/KmMatrix/KmMatrix.hpp" +#include "../../../../src/gpu/matrix/KmMatrix/Arith.hpp" #include -using namespace H2O4GPU::KMeans; +using namespace h2o4gpu::Matrix; constexpr float esp = 0.001f; diff --git a/tests/cpp/gpu/KmMatrix/test_matrix.cu b/tests/cpp/gpu/KmMatrix/test_matrix.cu index 3b36b35fe..e1aae97f1 100644 --- a/tests/cpp/gpu/KmMatrix/test_matrix.cu +++ b/tests/cpp/gpu/KmMatrix/test_matrix.cu @@ -1,7 +1,7 @@ #include #include -#include "../../../../src/gpu/kmeans/KmMatrix/KmMatrix.hpp" +#include "../../../../src/gpu/matrix/KmMatrix/KmMatrix.hpp" #include // r --gtest_filter=KmMatrix.KmMatrixEqual @@ -10,7 +10,7 @@ TEST(KmMatrix, KmMatrixEqual) { for (size_t i = 0; i < 2048 * 1024; ++i) { vec[i] = i; } - H2O4GPU::KMeans::KmMatrix mat (vec, 2048, 1024); + h2o4gpu::Matrix::KmMatrix mat (vec, 2048, 1024); ASSERT_TRUE (mat == mat); @@ -18,7 +18,7 @@ TEST(KmMatrix, KmMatrixEqual) { for (size_t i = 0; i < 2048 * 1024; ++i) { vec2[i] = i + i; } - H2O4GPU::KMeans::KmMatrix mat2 (vec2, 2048, 1024); + h2o4gpu::Matrix::KmMatrix mat2 (vec2, 2048, 1024); ASSERT_FALSE(mat == mat2); } @@ -29,9 +29,9 @@ TEST(KmMatrix, KmMatrixAssig) { vec[i] = i; } - H2O4GPU::KMeans::KmMatrix mat0 (vec, 2048, 1024); - H2O4GPU::KMeans::KmMatrix mat1 = mat0; - H2O4GPU::KMeans::KmMatrix mat2; + h2o4gpu::Matrix::KmMatrix mat0 (vec, 2048, 1024); + h2o4gpu::Matrix::KmMatrix mat1 = mat0; + h2o4gpu::Matrix::KmMatrix mat2; mat2 = mat0; @@ -44,16 +44,16 @@ TEST(KmMatrix, KmMatrixRows) { for (size_t i = 0; i < 12 * 16; ++i) { vec[i] = i; } - H2O4GPU::KMeans::KmMatrix mat (vec, 12, 16); + h2o4gpu::Matrix::KmMatrix mat (vec, 12, 16); thrust::host_vector h_index (4, 1); h_index[0] = 0; h_index[1] = 2; h_index[2] = 9; h_index[3] = 1; - H2O4GPU::KMeans::KmMatrix index (h_index, 4, 1); + h2o4gpu::Matrix::KmMatrix index (h_index, 4, 1); - H2O4GPU::KMeans::KmMatrix rows = mat.rows(index); + h2o4gpu::Matrix::KmMatrix rows = mat.rows(index); thrust::host_vector h_sol (4 * 16); for (size_t i = 0; i < 16; ++i) { @@ -69,7 +69,7 @@ TEST(KmMatrix, KmMatrixRows) { h_sol[i] = vec[16 * 1 + (i - 48)]; } - H2O4GPU::KMeans::KmMatrix sol (h_sol, 4, 16); + h2o4gpu::Matrix::KmMatrix sol (h_sol, 4, 16); ASSERT_TRUE(rows == sol); } @@ -77,13 +77,13 @@ TEST(KmMatrix, KmMatrixRows) { TEST(KmMatrix, SizeError) { thrust::host_vector vec (12 * 16); ASSERT_THROW( - H2O4GPU::KMeans::KmMatrix mat (vec, 12, 4), + h2o4gpu::Matrix::KmMatrix mat (vec, 12, 4), std::runtime_error); } TEST(KmMatrix, KmMatrixUtils) { thrust::host_vector vec (12 * 16); - H2O4GPU::KMeans::KmMatrix mat (vec, 12, 16); + h2o4gpu::Matrix::KmMatrix mat (vec, 12, 16); ASSERT_EQ(mat.rows(), 12); ASSERT_EQ(mat.cols(), 16); @@ -93,9 +93,9 @@ TEST(KmMatrix, KmMatrixUtils) { TEST(KmMatrix, KmMatrixKparam) { thrust::host_vector vec (12 * 16); thrust::fill(vec.begin(), vec.end(), 1); - H2O4GPU::KMeans::KmMatrix mat (vec, 12, 16); + h2o4gpu::Matrix::KmMatrix mat (vec, 12, 16); - H2O4GPU::KMeans::kParam param = mat.k_param(); + h2o4gpu::Matrix::kParam param = mat.k_param(); ASSERT_EQ(param.ptr, mat.dev_ptr()); ASSERT_EQ(param.rows, 12); ASSERT_EQ(param.cols, 16); @@ -110,11 +110,11 @@ TEST(KmMatrix, KmMatrixCycle) { // Tweak this one to see if memory grows, there should be a better way to // test memory leak. size_t iters = std::pow(16, 1); - H2O4GPU::KMeans::KmMatrix mat0 (vec, rows, cols); + h2o4gpu::Matrix::KmMatrix mat0 (vec, rows, cols); mat0.dev_ptr(); for (size_t i = 0; i < iters; ++i) { - H2O4GPU::KMeans::KmMatrix mat1 = mat0; - H2O4GPU::KMeans::KmMatrix mat2 = mat1; + h2o4gpu::Matrix::KmMatrix mat1 = mat0; + h2o4gpu::Matrix::KmMatrix mat2 = mat1; mat0 = mat2; } } @@ -126,16 +126,16 @@ TEST(KmMatrix, Stack) { for (size_t i = 0; i < rows * cols; ++i) { vec[i] = i; } - H2O4GPU::KMeans::KmMatrix mat(vec, rows, cols); + h2o4gpu::Matrix::KmMatrix mat(vec, rows, cols); thrust::host_vector vec1 (rows * cols); for (size_t i = rows * cols; i < 2 * rows * cols; ++i) { vec1[i - rows * cols] = i; } - H2O4GPU::KMeans::KmMatrix mat1(vec1, rows, cols); + h2o4gpu::Matrix::KmMatrix mat1(vec1, rows, cols); - H2O4GPU::KMeans::KmMatrix calculated = - H2O4GPU::KMeans::stack(mat, mat1, H2O4GPU::KMeans::KmMatrixDim::ROW); + h2o4gpu::Matrix::KmMatrix calculated = + h2o4gpu::Matrix::stack(mat, mat1, h2o4gpu::Matrix::KmMatrixDim::ROW); thrust::host_vector res (2 * rows * cols); for (size_t i = 0; i < rows * cols; ++i) { @@ -145,7 +145,7 @@ TEST(KmMatrix, Stack) { res[i] = i; } - H2O4GPU::KMeans::KmMatrix res_mat (res, 2 * rows, cols); + h2o4gpu::Matrix::KmMatrix res_mat (res, 2 * rows, cols); ASSERT_TRUE(calculated == res_mat); } diff --git a/tests/cpp/gpu/KmMatrix/test_proxy.cu b/tests/cpp/gpu/KmMatrix/test_proxy.cu index 732d27eef..f7574596f 100644 --- a/tests/cpp/gpu/KmMatrix/test_proxy.cu +++ b/tests/cpp/gpu/KmMatrix/test_proxy.cu @@ -1,6 +1,6 @@ #include #include -#include "../../../../src/gpu/kmeans/KmMatrix/KmMatrix.hpp" +#include "../../../../src/gpu/matrix/KmMatrix/KmMatrix.hpp" // r --gtest_filter=KmMatrix.KmMatrixHostProxy TEST(KmMatrix, KmMatrixProxyHostEqual) { @@ -10,9 +10,9 @@ TEST(KmMatrix, KmMatrixProxyHostEqual) { vec[i] = i; } - H2O4GPU::KMeans::KmMatrix mat (vec, rows, cols); + h2o4gpu::Matrix::KmMatrix mat (vec, rows, cols); - H2O4GPU::KMeans::KmMatrix row = mat.row(1); + h2o4gpu::Matrix::KmMatrix row = mat.row(1); thrust::host_vector res (cols); @@ -20,7 +20,7 @@ TEST(KmMatrix, KmMatrixProxyHostEqual) { res[i] = v; } - H2O4GPU::KMeans::KmMatrix res_mat (res, 1, cols); + h2o4gpu::Matrix::KmMatrix res_mat (res, 1, cols); ASSERT_TRUE(res_mat == row); } @@ -34,12 +34,12 @@ TEST(KmMatrix, KmMatrixProxyDevEqual) { vec[i] = i; } - H2O4GPU::KMeans::KmMatrix mat (vec, rows, cols); + h2o4gpu::Matrix::KmMatrix mat (vec, rows, cols); mat.set_name ("mat"); mat.dev_ptr(); - H2O4GPU::KMeans::KmMatrix row = mat.row(1); + h2o4gpu::Matrix::KmMatrix row = mat.row(1); row.set_name ("row"); thrust::host_vector res (cols); @@ -48,7 +48,7 @@ TEST(KmMatrix, KmMatrixProxyDevEqual) { res[i] = v; } - H2O4GPU::KMeans::KmMatrix res_mat (res, 1, cols); + h2o4gpu::Matrix::KmMatrix res_mat (res, 1, cols); res_mat.set_name("res"); ASSERT_TRUE(res_mat == row); diff --git a/tests/cpp/gpu/kmeans/test_kmeans_init.cu b/tests/cpp/gpu/kmeans/test_kmeans_init.cu index d15b6d4d7..32efb79d1 100644 --- a/tests/cpp/gpu/kmeans/test_kmeans_init.cu +++ b/tests/cpp/gpu/kmeans/test_kmeans_init.cu @@ -5,22 +5,26 @@ #include -#include "../../../../src/gpu/kmeans/KmMatrix/KmMatrix.hpp" -#include "../../../../src/gpu/kmeans/KmMatrix/Generator.hpp" -#include "../../../../src/gpu/kmeans/KmMatrix/Arith.hpp" +#include "../../../../src/gpu/matrix/KmMatrix/KmMatrix.hpp" +#include "../../../../src/gpu/matrix/KmMatrix/Generator.hpp" +#include "../../../../src/gpu/matrix/KmMatrix/Arith.hpp" #include "../../../../src/gpu/kmeans/kmeans_init.cuh" +#include "../../../../src/common/utils.h" #include #include #include -using namespace H2O4GPU::KMeans; +using namespace h2o4gpu::kMeans; +using namespace h2o4gpu::Matrix; template struct GeneratorMock : GeneratorBase { public: KmMatrix generate() override { - M_ERROR("Not implemented"); + h2o4gpu_error("Not implemented"); + KmMatrix res; + return res; } KmMatrix generate(size_t _size) override { @@ -155,7 +159,7 @@ TEST(KmeansLL, KmeansLLInit) { h_data[i] = i - 4; } - H2O4GPU::KMeans::KmMatrix data (h_data, 6, 5); + KmMatrix data (h_data, 6, 5); auto res = kmeans_ll_init(data, 2);