Re-structure.

* Put KmMatrix under gpu/matrix * Use shared utils for both kmeans and KmMatrix. * Rename the namespaces.
h2oai · Jul 25, 2018 · 6c8ff4e · 6c8ff4e
1 parent 450051e
commit 6c8ff4e
Show file tree

Hide file tree

Showing 26 changed files with 263 additions and 358 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -94,8 +94,7 @@ if(USE_CUDA)
   FILE(GLOB_RECURSE GPU_SOURCES
     src/*.cu
     src/*.cuh
-    src/gpu/kmeans/KmMatrix/*.cpp
-    src/gpu/kmeans/KmMatrix/*.hpp
+    src/gpu/matrix/*.cpp
     src/common/*.cpp
     src/common/*.h)
 

diff --git a/src/common/utils.h b/src/common/utils.h
@@ -4,8 +4,14 @@
  */
 #pragma once
 #include <vector>
+
+#include <iostream>
+#include <sstream>
+
 #include "cblas/cblas.h"
 
+#define USE_CUDA() 1
+
 template<typename T>
 void self_dot(std::vector<T> array_in, int n, int dim,
               std::vector<T>& dots);
@@ -19,3 +25,30 @@ void compute_distances(std::vector<float> data_in,
                        std::vector<float> centroids_in,
                        std::vector<float> &pairwise_distances,
                        int n, int dim, int k);
+
+// Matrix host dev
+#define HG_HOSTDEV       __host__   __device__
+#define HG_DEV           __device__
+#define HG_DEVINLINE     __device__ __forceinline__
+#define HG_HOSTDEVINLINE __host__   __device__      __forceinline__
+
+#define h2o4gpu_error(x) error(x, __FILE__, __LINE__);
+
+inline void error(const char* e, const char* file, int line)
+{
+  std::stringstream ss;
+  ss << e << " - " << file << "(" << line << ")";
+  //throw error_text;
+  std::cerr << ss.str() << std::endl;
+  exit(-1);
+}
+
+#define h2o4gpu_check(condition, msg) check(condition, msg, __FILE__, __LINE__);
+
+inline void check(bool val, const char* e, const char* file, int line)
+{
+  if (!val)
+  {
+    error(e, file, line);
+  }
+}
diff --git a/src/gpu/kmeans/KmMatrix/KmConfig.h b/src/gpu/kmeans/KmMatrix/KmConfig.h
diff --git a/src/gpu/kmeans/KmMatrix/utils.cuh b/src/gpu/kmeans/KmMatrix/utils.cuh
diff --git a/src/gpu/kmeans/kmeans_general.h b/src/gpu/kmeans/kmeans_general.h
@@ -4,6 +4,7 @@
  */
 #pragma once
 #include "../../common/logger.h"
+#include "../utils/utils.cuh"
 #include "stdio.h"
 #define MAX_NGPUS 16
 
@@ -13,8 +14,6 @@
 
 // TODO(pseudotensor): Avoid throw for python exception handling.  Need to avoid all exit's and return exit code all the way back.
 #define gpuErrchk(ans) { gpu_assert((ans), __FILE__, __LINE__); }
-#define safe_cuda(ans) throw_on_cuda_error((ans), __FILE__, __LINE__);
-#define safe_cublas(ans) throw_on_cublas_error((ans), __FILE__, __LINE__);
 
 #define CUDACHECK(cmd) do {                           \
     cudaError_t e = cmd;                              \

diff --git a/src/gpu/kmeans/kmeans_h2o4gpu.cu b/src/gpu/kmeans/kmeans_h2o4gpu.cu
@@ -774,9 +774,9 @@ int kmeans_fit(int verbose, int seed, int gpu_idtry, int n_gputry,
             thrust::device,
             data[i]->begin(), data[i]->end(), h_init_data.begin());
       }
-      H2O4GPU::KMeans::KmMatrix<T> init_data(h_init_data, rows, cols);
-      H2O4GPU::KMeans::KmMatrix<T> final_centroids_matrix =
-          H2O4GPU::KMeans::KmeansLlInit<T>(seed, 1.5)(init_data, k);
+      h2o4gpu::kMeans::KmMatrix<T> init_data(h_init_data, rows, cols);
+      h2o4gpu::kMeans::KmMatrix<T> final_centroids_matrix =
+          h2o4gpu::kMeans::KmeansLlInit<T>(seed, 1.5)(init_data, k);
       thrust::host_vector<T> final_centroids (final_centroids_matrix.size());
       thrust::copy(
           final_centroids_matrix.dev_ptr(),

diff --git a/src/gpu/kmeans/kmeans_init.cu b/src/gpu/kmeans/kmeans_init.cu
@@ -15,14 +15,16 @@
 
 #include "kmeans_init.cuh"
 
-#include "KmMatrix/KmMatrix.hpp"
-#include "KmMatrix/Arith.hpp"
-#include "KmMatrix/utils.cuh"
-#include "KmMatrix/GpuInfo.cuh"
-#include "KmMatrix/blas.cuh"
+#include "../matrix/KmMatrix/KmMatrix.hpp"
+#include "../matrix/KmMatrix/Arith.hpp"
+#include "../matrix/KmMatrix/blas.cuh"
+#include "../utils/utils.cuh"
+#include "../utils/GpuInfo.cuh"
 
-namespace H2O4GPU {
-namespace KMeans {
+namespace h2o4gpu {
+namespace kMeans {
+
+using namespace Matrix;
 
 namespace kernel {
 // X^2 + Y^2, here only calculates the + operation.
@@ -116,7 +118,7 @@ KmMatrix<T> PairWiseDistanceOp<T>::operator()(KmMatrix<T>& _data,
           data_dot_.k_param(),
           centroids_dot_.k_param());
 
-  CUDA_CHECK(cudaGetLastError());
+  safe_cuda(cudaGetLastError());
 
   cublasHandle_t handle = GpuInfo::ins().cublas_handle();
 
@@ -191,7 +193,7 @@ KmMatrix<T> GreedyRecluster<T>::recluster(KmMatrix<T>& _centroids, size_t _k) {
   void *d_temp_storage = NULL;
 
   // determine the temp_storage_bytes
-  CUDA_CHECK(cub::DeviceHistogram::HistogramEven(
+  safe_cuda(cub::DeviceHistogram::HistogramEven(
       d_temp_storage, temp_storage_bytes,
       min_indices.dev_ptr(),
       weights.dev_ptr(),
@@ -200,16 +202,16 @@ KmMatrix<T> GreedyRecluster<T>::recluster(KmMatrix<T>& _centroids, size_t _k) {
       (T)min_indices.rows(),
       (int)_centroids.rows()));
 
-  CUDA_CHECK(cudaMalloc((void**)&d_temp_storage, temp_storage_bytes));
-  CUDA_CHECK(cub::DeviceHistogram::HistogramEven(
+  safe_cuda(cudaMalloc((void**)&d_temp_storage, temp_storage_bytes));
+  safe_cuda(cub::DeviceHistogram::HistogramEven(
       d_temp_storage, temp_storage_bytes,
       min_indices.dev_ptr(),    // d_samples
       weights.dev_ptr(),        // d_histogram
       min_indices.rows() + 1,   // num_levels
       (T)0.0,                   // lower_level
       (T)min_indices.rows(),    // upper_level
       (int)_centroids.rows())); // num_samples
-  CUDA_CHECK(cudaFree(d_temp_storage));
+  safe_cuda(cudaFree(d_temp_storage));
 
   // Sort the indices by weights in ascending order, then use those at front
   // as result.
@@ -352,7 +354,7 @@ KmeansLlInit<T, ReclusterPolicy>::operator()(KmMatrix<T>& _data, size_t _k) {
         "k must be less than or equal to the number of data points"
         ", k: %lu, data points: %lu",
         _k, _data.rows());
-    M_USER_ERROR(err_msg);
+    h2o4gpu_error(err_msg);
   }
 
   if (seed_ < 0) {
@@ -433,5 +435,5 @@ INSTANTIATE(int)
 #undef INSTANTIATE
 }
 
-}  // namespace Kmeans
-}  // namespace H2O4GPU
+}  // namespace kMeans
+}  // namespace h2o4gpu
diff --git a/src/gpu/kmeans/kmeans_init.cuh b/src/gpu/kmeans/kmeans_init.cuh
@@ -9,17 +9,18 @@
 
 #include <memory>
 
-#include "KmMatrix/KmConfig.h"
-#include "KmMatrix/KmMatrix.hpp"
-#include "KmMatrix/utils.cuh"
-#include "KmMatrix/Generator.hpp"
-#include "KmMatrix/Generator.cuh"
-#include "KmMatrix/GpuInfo.cuh"
+#include "../matrix/KmMatrix/KmMatrix.hpp"
+#include "../matrix/KmMatrix/Generator.hpp"
+#include "../matrix/KmMatrix/Generator.cuh"
+#include "../utils/GpuInfo.cuh"
+#include "../utils/utils.cuh"
 
 constexpr double ESP = 1e-8;
 
-namespace H2O4GPU {
-namespace KMeans {
+namespace h2o4gpu {
+namespace kMeans {
+
+using namespace Matrix;
 
 namespace detail {
 
@@ -202,7 +203,7 @@ struct KmeansLlInit : public KmeansInitBase<T> {
 
 // FIXME: Make kmeans++ a derived class of KmeansInitBase
 
-}  // namespace Kmeans
-}  // namespace H2O4GPU
+}  // namespace kMeans
+}  // namespace h2o4gpu
 
 #endif  // KMEANS_INIT_H_