Made KernelResource into a class for subclassing CUDA/OpenCL; fixed l…

…ogical error in beagle.cpp
beagle-dev · Sep 7, 2009 · 22981db · 22981db
1 parent 374d871
commit 22981db
Show file tree

Hide file tree

Showing 7 changed files with 90 additions and 80 deletions.
diff --git a/libhmsbeagle/GPU/GPUImplDefs.h b/libhmsbeagle/GPU/GPUImplDefs.h
@@ -244,20 +244,6 @@
                                                 } \
                                             }
 
-typedef struct {
-    int paddedStateCount;
-    int categoryCount;
-    int patternCount;
-    char* kernelCode;
-    int patternBlockSize;
-    int matrixBlockSize;
-    int blockPeelingSize;
-    int isPowerOfTwo;
-    int smallestPowerOfTwo;
-    int slowReweighing;
-    int multiplyBlockSize;
-} KernelResource;
-
 typedef struct Dim3Int Dim3Int;
 
 struct Dim3Int

diff --git a/libhmsbeagle/GPU/GPUInterface.h b/libhmsbeagle/GPU/GPUInterface.h
@@ -31,6 +31,7 @@
 #endif
 
 #include "libhmsbeagle/GPU/GPUImplDefs.h"
+#include "libhmsbeagle/GPU/KernelResource.h"
 
 #ifdef CUDA
     #include <cuda.h>
@@ -126,7 +127,7 @@ class GPUInterface {
 
     void DestroyKernelMap();
 
-    KernelResource* kernel;
+    KernelResource* kernelResource;
 
 protected:
 	void InitializeKernelMap();

diff --git a/libhmsbeagle/GPU/GPUInterfaceCUDA.cpp b/libhmsbeagle/GPU/GPUInterfaceCUDA.cpp
@@ -38,9 +38,10 @@
 #include "libhmsbeagle/GPU/GPUImplDefs.h"
 #include "libhmsbeagle/GPU/GPUImplHelper.h"
 #include "libhmsbeagle/GPU/GPUInterface.h"
+#include "libhmsbeagle/GPU/KernelResource.h"
 
 
-std::map<int, KernelResource*> kernelMap;
+std::map<int, KernelResource> kernelMap;
 
 
 #define SAFE_CUDA(call) { \
@@ -66,6 +67,7 @@ GPUInterface::GPUInterface() {
     cudaDevice = NULL;
     cudaContext = NULL;
     cudaModule = NULL;
+    kernelResource = NULL;
 
 #ifdef BEAGLE_DEBUG_FLOW
     fprintf(stderr,"\t\t\tLeaving  GPUInterface::GPUInterface\n");
@@ -82,6 +84,10 @@ GPUInterface::~GPUInterface() {
         SAFE_CUDA(cuCtxDetach(cudaContext));
     }
 
+    if (kernelResource != NULL) {
+        delete kernelResource;
+    }
+
 #ifdef BEAGLE_DEBUG_FLOW
     fprintf(stderr,"\t\t\tLeaving  GPUInterface::~GPUInterface\n");
 #endif    
@@ -129,55 +135,47 @@ int GPUInterface::GetDeviceCount() {
 }
 
 void GPUInterface::DestroyKernelMap() {
-
-    std::map<int, KernelResource*>::const_iterator itr;
-    for(itr = kernelMap.begin(); itr != kernelMap.end(); ++itr) {
-        KernelResource* rsrc = itr->second;
-#ifdef BEAGLE_DEBUG_VALUES
-        fprintf(stderr,"Key: %d %d\n",(*itr).first, rsrc->paddedStateCount);
-#endif BEAGLE_DEBUG_VALUES
-        delete rsrc;
-//        delete (*itr).second;
-    }
-    //kernelMap.clear();
+// No longer necessary as std::map automatically clears itself
 }
 
 void GPUInterface::InitializeKernelMap() {
 
 #ifdef BEAGLE_DEBUG_FLOW
-	fprintf(stderr,"\t\t\tLoading kernel information for CUDA!\n");
+    fprintf(stderr,"\t\t\tLoading kernel information for CUDA!\n");
 #endif
-
-	KernelResource* kernel4 = new KernelResource;
-	kernel4->kernelCode = (char*) KERNELS_STRING_4; 
-	kernel4->paddedStateCount = 4;
-	kernel4->patternBlockSize = PATTERN_BLOCK_SIZE_4;
-	kernel4->matrixBlockSize = MATRIX_BLOCK_SIZE_4;
-	kernel4->blockPeelingSize = BLOCK_PEELING_SIZE_4;
-	kernel4->slowReweighing = SLOW_REWEIGHING_4;
-	kernel4->multiplyBlockSize = MULTIPLY_BLOCK_SIZE;
-	kernelMap.insert(std::make_pair(4,kernel4));
-
-	KernelResource* kernel48 = new KernelResource;
-	kernel48->kernelCode = (char*) KERNELS_STRING_48;
-	kernel48->paddedStateCount = 48;
-	kernel48->patternBlockSize = PATTERN_BLOCK_SIZE_48;
-	kernel48->matrixBlockSize = MATRIX_BLOCK_SIZE_48;
-	kernel48->blockPeelingSize = BLOCK_PEELING_SIZE_48;
-	kernel48->slowReweighing = SLOW_REWEIGHING_48;
-	kernel48->multiplyBlockSize = MULTIPLY_BLOCK_SIZE;
-	kernelMap.insert(std::make_pair(48,kernel48));
-
-	KernelResource* kernel64 = new KernelResource;
-	kernel64->kernelCode = (char*) KERNELS_STRING_64;
-	kernel64->paddedStateCount = 64;
-	kernel64->patternBlockSize = PATTERN_BLOCK_SIZE_64;
-	kernel64->matrixBlockSize = MATRIX_BLOCK_SIZE_64;
-	kernel64->blockPeelingSize = BLOCK_PEELING_SIZE_64;
-	kernel64->slowReweighing = SLOW_REWEIGHING_64;
-	kernel64->multiplyBlockSize = MULTIPLY_BLOCK_SIZE;
-	kernelMap.insert(std::make_pair(64,kernel64));
-
+
+    KernelResource kernel4 = KernelResource(
+        4,
+        (char*) KERNELS_STRING_4,
+        PATTERN_BLOCK_SIZE_4,
+        MATRIX_BLOCK_SIZE_4,
+        BLOCK_PEELING_SIZE_4,
+        SLOW_REWEIGHING_4,
+        MULTIPLY_BLOCK_SIZE,
+        0,0);
+    kernelMap.insert(std::make_pair(4,kernel4));
+
+    KernelResource kernel48 = KernelResource(
+        48,
+        (char*) KERNELS_STRING_48,
+        PATTERN_BLOCK_SIZE_48,
+        MATRIX_BLOCK_SIZE_48,
+        BLOCK_PEELING_SIZE_48,
+        SLOW_REWEIGHING_48,
+        MULTIPLY_BLOCK_SIZE,
+        0,0);
+    kernelMap.insert(std::make_pair(48,kernel48));
+
+    KernelResource kernel64 = KernelResource(
+        64,
+        (char*) KERNELS_STRING_64,
+        PATTERN_BLOCK_SIZE_64,
+        MATRIX_BLOCK_SIZE_64,
+        BLOCK_PEELING_SIZE_64,
+        SLOW_REWEIGHING_64,
+        MULTIPLY_BLOCK_SIZE,
+        0,0);
+    kernelMap.insert(std::make_pair(64,kernel64));
 }
 
 void GPUInterface::SetDevice(int deviceNumber, int paddedStateCount, int categoryCount, int paddedPatternCount) {
@@ -190,20 +188,27 @@ void GPUInterface::SetDevice(int deviceNumber, int paddedStateCount, int categor
     SAFE_CUDA(cuCtxCreate(&cudaContext, CU_CTX_SCHED_AUTO, cudaDevice));
 
     if (kernelMap.size() == 0) {
-    	// kernels have not yet been initialized; do so now.  Hopefully, this only occurs once per library load.
-    	InitializeKernelMap();    	
+        // kernels have not yet been initialized; do so now.  Hopefully, this only occurs once per library load.
+        InitializeKernelMap();
     }
 
     if (kernelMap.count(paddedStateCount) == 0) {
     	fprintf(stderr,"Critical error: unable to find kernel code for %d states.\n",paddedStateCount);
     	exit(-1);
     }
 
-    kernel = kernelMap[paddedStateCount];
-    kernel->categoryCount = categoryCount;
-    kernel->patternCount = paddedPatternCount;
+//    kernel.paddedStateCount = paddedStateCount;
+//    kernel.kernelCode = kernelMap[paddedStateCount].kernelCode;
+//    kernel.patternBlockSize = kernelMap[paddedStateCount].patternBlockSize;
+//    kernel.matrixBlockSize = kernelMap[paddedStateCount].matrixBlockSize;
+//    kernel.blockPeelingSize = kernelMap[paddedStateCount].blockPeelingSize;
+//    kernel.slowReweighing = kernelMap[paddedStateCount].slowReweighing;
+//    kernel.multiplyBlockSize = kernelMap[paddedStateCount].multiplyBlockSize;
+    kernelResource = kernelMap[paddedStateCount].copy();
+    kernelResource->categoryCount = categoryCount;
+    kernelResource->patternCount = paddedPatternCount;
 
-    SAFE_CUDA(cuModuleLoadData(&cudaModule, kernel->kernelCode));     	
+    SAFE_CUDA(cuModuleLoadData(&cudaModule, kernelResource->kernelCode));
 
     SAFE_CUDA(cuCtxPopCurrent(&cudaContext));
 

diff --git a/libhmsbeagle/GPU/KernelLauncher.cpp b/libhmsbeagle/GPU/KernelLauncher.cpp
@@ -49,13 +49,13 @@ KernelLauncher::~KernelLauncher() {
 
 void KernelLauncher::SetupKernelBlocksAndGrids() {
 
-    kPaddedStateCount = gpu->kernel->paddedStateCount;
-    kCategoryCount = gpu->kernel->categoryCount;
-    kPatternCount = gpu->kernel->patternCount;
-    kMultiplyBlockSize = gpu->kernel->multiplyBlockSize;
-    kPatternBlockSize = gpu->kernel->patternBlockSize;
-    kSlowReweighing = gpu->kernel->slowReweighing;
-    kMatrixBlockSize = gpu->kernel->matrixBlockSize;
+    kPaddedStateCount = gpu->kernelResource->paddedStateCount;
+    kCategoryCount = gpu->kernelResource->categoryCount;
+    kPatternCount = gpu->kernelResource->patternCount;
+    kMultiplyBlockSize = gpu->kernelResource->multiplyBlockSize;
+    kPatternBlockSize = gpu->kernelResource->patternBlockSize;
+    kSlowReweighing = gpu->kernelResource->slowReweighing;
+    kMatrixBlockSize = gpu->kernelResource->matrixBlockSize;
 
     // Set up block/grid for transition matrices computation
     bgTransitionProbabilitiesBlock = Dim3Int(kMultiplyBlockSize, kMultiplyBlockSize);

diff --git a/libhmsbeagle/GPU/Makefile.am b/libhmsbeagle/GPU/Makefile.am
@@ -6,7 +6,8 @@ if BUILDCUDA
 noinst_LTLIBRARIES= libcuda.la 
 libcuda_la_SOURCES =  \
 BeagleGPUImpl.cpp  BeagleGPUImpl.h  GPUImplDefs.h  GPUImplHelper.cpp \
-GPUImplHelper.h  GPUInterface.h  GPUInterfaceCUDA.cpp  KernelLauncher.cpp  KernelLauncher.h 
+GPUImplHelper.h  GPUInterface.h  GPUInterfaceCUDA.cpp  KernelLauncher.cpp \
+KernelLauncher.h KernelResource.cpp KernelResource.h
 
 libcuda_la_CFLAGS = $(CUDA_CFLAGS)
 libcuda_la_CXXFLAGS = $(CUDA_CFLAGS)
@@ -17,7 +18,8 @@ if BUILDOPENCL
 noinst_LTLIBRARIES= libopencl.la 
 libopencl_la_SOURCES =  \
 BeagleGPUImpl.cpp  BeagleGPUImpl.h  GPUImplDefs.h  GPUImplHelper.cpp \
-GPUImplHelper.h  GPUInterface.h  GPUInterfaceOpenCL.cpp  KernelLauncher.cpp  KernelLauncher.h 
+GPUImplHelper.h  GPUInterface.h  GPUInterfaceOpenCL.cpp  KernelLauncher.cpp \
+KernelLauncher.h KernelResource.cpp KernelResource.h
 
 nodist_libopencl_la_SOURCES = BeagleOpenCL_Kernels.h
 BUILT_SOURCES = BeagleOpenCL_Kernels.h

diff --git a/libhmsbeagle/beagle.cpp b/libhmsbeagle/beagle.cpp
@@ -95,11 +95,11 @@ void beagle_library_initialize(void) {
 void beagle_library_finalize(void) {
 
 	// Destory GPU kernel info
-#if defined(CUDA)
-	GPUInterface* gpu = new GPUInterface;
-	gpu->DestroyKernelMap();
-	delete gpu;
-#endif
+//#if defined(CUDA)
+//	GPUInterface* gpu = new GPUInterface;
+//	gpu->DestroyKernelMap();
+//	delete gpu;
+//#endif
 
 	// Destroy implFactory
 	if (implFactory && loaded) {	
@@ -141,7 +141,7 @@ void __attribute__ ((destructor)) beagle_gnu_finalize(void) {
 #endif
 
 int beagleFinalize() {
-    if (!loaded)
+    if (loaded)
         beagle_library_finalize();
     return BEAGLE_SUCCESS;
 }

diff --git a/suppress_cuda.valgrind b/suppress_cuda.valgrind
@@ -0,0 +1,16 @@
+# Suppression for CUDA
+{
+	<insert a suppression name>
+	Memcheck:Leak
+	...
+	obj:/usr/local/cuda/lib/libcuda.dylib
+	...
+}
+
+{
+	<insert a suppression name>
+	Memcheck:Cond
+	...
+	fun:cuGLUnregisterBufferObject
+	...
+}