SINGA-104 Add Context Class

Update the Context class:
1. function, variable names.
2. add random generators for CPU threads.

TODO run test for test_context.cu. Add implicit/automatic init (using device 0).


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/9aff30aa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/9aff30aa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/9aff30aa

Branch: refs/heads/master
Commit: 9aff30aab69f81e45d3986c0337346e0e9170936
Parents: 35de4f9
Author: Wei Wang <[email protected]>
Authored: Thu Nov 26 11:53:10 2015 +0800
Committer: Wei Wang <[email protected]>
Committed: Thu Nov 26 11:56:37 2015 +0800

----------------------------------------------------------------------
 Makefile.gpu                  |  10 +--
 include/singa/utils/context.h | 140 +++++++++++++++++++++++++++----------
 src/test/test_context.cc      |  66 -----------------
 src/test/test_context.cu      |  55 +++++++++++++++
 src/utils/context.cc          |  82 +++++++++++-----------
 5 files changed, 203 insertions(+), 150 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/Makefile.gpu
----------------------------------------------------------------------
diff --git a/Makefile.gpu b/Makefile.gpu
index 2fea3b2..35b81b9 100644
--- a/Makefile.gpu
+++ b/Makefile.gpu
@@ -20,16 +20,16 @@
 
 ###################User Config Varaibles #############################
 # third-party library installation folder
-HOME_DIR := /usr
+HOME_DIR := /home/wangwei/local
 
 CUDA_DIR := /usr/local/cuda
 
 #CUDA_DIR :=
 
 # Lib folder for system and external libs. You may need to change it.
-LIBRARY_DIRS := $(HOME_DIR)/lib64 $(HOME_DIR)/lib $(HOME_DIR)/local/lib 
$(CUDA_DIR)/lib64 $(CUDA_DIR)/lib
+LIBRARY_DIRS := $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib $(HOME_DIR)/lib64 
$(HOME_DIR)/lib
 # Header folder for system and external libs. You may need to change it.
-INCLUDE_DIRS := $(HOME_DIR)/include ./include 
$(HOME_DIR)/local/include/zookeeper $(CUDA_DIR)/include
+INCLUDE_DIRS := $(CUDA_DIR)/include $(HOME_DIR)/include ./include
 # g++ location, should support c++11, tested with 4.8.1
 CXX := g++
 CUCXX := nvcc
@@ -50,7 +50,7 @@ ZK_FLAGS :=-DTHREADED -fpermissive
 CXXFLAGS := -O2 -msse3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
        $(MSHADOW_FLAGS) -DCPU_ONLY=1 $(ZK_FLAGS)\
        -funroll-loops $(foreach includedir, $(INCLUDE_DIRS), -I$(includedir))
-CUCXXFLAGS := $(MSHADOW_FLAGS) -std=c++11 -G $(CUDA_ARCH) \
+CUCXXFLAGS := $(MSHADOW_FLAGS) -DUSE_GPU -std=c++11 -G $(CUDA_ARCH) \
        $(foreach includedir, $(INCLUDE_DIRS), -I$(includedir))
 
 #Add device compile option
@@ -84,7 +84,7 @@ TEST_CUDA_SRCS :=$(shell find src/test/ -maxdepth 1 -name 
"*.cu")
 TEST_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, $(TEST_CUDA_SRCS:.cu=.o)))
 -include $(TEST_CUDA_OBJS:%.o=%.P)
 
-SINGA_CUDA_SRCS :=$(shell find src/ -maxdepth 2 -name "*.cu")
+SINGA_CUDA_SRCS := $(shell find src/ \( -path "src/test" \) -prune -o \( -name 
"*.cu" -type f \) -print )
 SINGA_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, 
$(SINGA_CUDA_SRCS:.cu=.o)))
 -include $(SINGA_CUDA_OBJS:%.o=%.P)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 762ae75..7a41dac 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -23,63 +23,129 @@
 #define SINGA_UTILS_CONTEXT_H_
 
 #include <vector>
+#include <random>
+#include <chrono>
+#include <thread>
+#include <unordered_map>
+#include <glog/logging.h>
+
 
 #ifdef USE_GPU
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <curand.h>
+#include "singa/utils/cuda_utils.h"
 #endif
 
 
 namespace singa {
 
-const int kDefaultDevice = 20;
+// max num of threads per process
+const int kNumMaxThreads = 1024;
 
+/**
+ * Context is used as a global singleton, which stores the mapping from CPU
+ * thread id to GPU device id. It manages the handlers for GPU
+ * devices. It also manages the GPU and CPU random generators, which are 
created
+ * when accessed. One CPU thread has a CPU random generator. A CPU device
+ * has a GPU random generator.
+ */
 class Context {
-  public:
-
+ public:
+   /**
+    * Destructor, release random generators and handlers.
+    */
        ~Context();
-
-       void Setup();
-
-#ifdef USE_GPU
-       int DeviceID(const int index) {
-         return device_ids_[index];
-       }
-
-       void SetDeviceID(const int index, const int id) {
-         device_ids_[index] = id;
-       }
-
-       void SetDevice(const int index) {
-         cudaSetDevice(device_ids_[index]);
+  /**
+   * Constructor, init arrays for random generators and handlers.
+   */
+  Context();
+
+  /**
+   * @return the ID of the device attached to a given CPU thread:
+   * if the device is a GPU card, then returns the GPU device ID;
+   * Else return -1.
+   */
+       int device_id(const std::thread::id tid) {
+    CHECK(device_id_.find(tid) != device_id_.end());
+         return device_id_[tid];
        }
 
-       cublasHandle_t Handle(const int index) {
-         return handles_[index];
+  /**
+   * Setup the CPU thread, which may be assigned a GPU device.
+   * Set the random seed to -1.
+   * A GPU handler will be created for the GPU device.
+   * @param[in] thread::id CPU thread ID
+   * @param[in] device_id GPU device ID
+   */
+       void SetupDevice(const std::thread::id tid, const int did);
+
+  /**
+   * @copy SetupDevice(const int, const int);
+   * @param[in] seed random seed
+   */
+  void SetupDevice(const std::thread::id tid, const int did, long long seed);
+
+  /**
+   * Get the CPU random generator.
+   * If the generator does not exist, then create it now.
+   * If the seed is not set, i.e., seed=-1, then get a seed from system time.
+   * @param[in] thread::id CPU thread ID
+   * @return the CPU random generator
+   */
+  std::mt19937* rand_generator(const std::thread::id tid) {
+    if (rand_generator_.find(tid) == rand_generator_.end()) {
+      CHECK(seed_.find(tid) != seed_.end());
+      auto seed = static_cast<unsigned>(seed_[tid]);
+      if (seed_[tid] == -1)
+        seed = std::chrono::system_clock::now().time_since_epoch().count();
+      rand_generator_[tid] = new std::mt19937(seed);
+    }
+    return rand_generator_[tid];
+  }
+#ifdef USE_GPU
+  /**
+   * Get the handler of the GPU device attached to a CPU thread.
+   * @param[in] thread::id
+   * @return the GPU handler, or nullptr if this thread does not have any GPU.
+   */
+       cublasHandle_t cublas_handle(const std::thread::id tid) {
+    CHECK(cublas_handle_.find(tid) != cublas_handle_.end());
+         return cublas_handle_[tid];
        }
-
-       void CreateHandle(const int index);
-
-       void DestoryHandle(const int index);
-
-       curandGenerator_t GpuRandGenerator(const int index) {
-         return gpu_rand_generators_[index];
+  /**
+   * Get the random generator of the GPU device assigned to the given thread.
+   * @param[in] thread::id
+   * @return random generator. If it does not exist, then create one.
+   * The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
+   */
+       curandGenerator_t curand_generator(const std::thread::id tid) {
+    if (curand_generator_.find(tid) == curand_generator_.end()) {
+      CHECK(seed_.find(tid) != seed_.end());
+      auto seed = seed_[tid];
+      // TODO handle user set seed
+      cudaSetDevice(device_id_[tid]);
+      curandCreateGenerator(&curand_generator_[tid], 
CURAND_RNG_PSEUDO_DEFAULT);
+    }
+         return curand_generator_[tid];
        }
 
-       void CreateGpuRandGenerator(const int index);
-
-       void DestoryGpuRandGenerator(const int index);
+  /*
+ protected:
+       void CreateHandle(const int thread::id);
+       void DestoryHandle(const int thread::id);
+       void CreateGpuRandGenerator(const int thread::id);
+       void DestoryGpuRandGenerator(const int thread::id);
+  */
 
 #endif
 
-  protected:
-       std::vector<int> device_ids_;
+ protected:
+
+       std::unordered_map<std::thread::id, int> device_id_;
+  std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
+  std::unordered_map<std::thread::id, int> seed_;
 #ifdef USE_GPU
-       std::vector<cublasHandle_t> handles_;
-       std::vector<curandGenerator_t> gpu_rand_generators_;
+       std::unordered_map<std::thread::id, cublasHandle_t> cublas_handle_;
+       std::unordered_map<std::thread::id, curandGenerator_t> 
curand_generator_;
 #endif
-
 };
 
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/src/test/test_context.cc
----------------------------------------------------------------------
diff --git a/src/test/test_context.cc b/src/test/test_context.cc
deleted file mode 100644
index 3a23b23..0000000
--- a/src/test/test_context.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-#include "gtest/gtest.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/context.h"
-
-//#include <cuda_runtime.h>
-//#include "cublas_v2.h"
-
-using namespace singa;
-using namespace std;
-
-TEST(ContextTest, TestDevice) {
-  auto context = Singleton<Context>::Instance();
-  context->Setup();
-
-  int index = 4;
-  int device_id = context->DeviceID(index);
-  ASSERT_EQ(4,device_id);
-
-  context->SetDeviceID(index,6);
-  device_id = context->DeviceID(index);
-  ASSERT_EQ(6,device_id);
-}
-
-TEST(ContextTest, TestHandle) {
-  auto context = Singleton<Context>::Instance();
-  context->Setup();
-
-  int index = 2;
-  context->CreateHandle(index);
-
-  float cpu_ret = 0.0f;
-  float gpu_ret = 0.0f;
-
-  float A[12];
-  float B[12];
-  
-  for(int i = 0; i < 12; i++) {
-       A[i]=i-1;
-       B[i]=i+1;
-  }
-
-  float* A_gpu = NULL;
-  float* B_gpu = NULL;
-  
-  cudaMalloc((void**)&A_gpu, 12*sizeof(float));
-  cudaMalloc((void**)&B_gpu, 12*sizeof(float));
-
-  cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice);
-  cudaMemcpy(B_gpu,B,12*sizeof(float),cudaMemcpyHostToDevice);
-
-  cublasHandle_t handle = context->Handle(index);
-  /*cublasHandle_t handle;
-  cudaSetDevice(0);
-  cublasCreate(&handle);*/
-
-  cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
-
-  for(int i = 0; i < 12;++i) {
-       cpu_ret += A[i] * B[i];
-  }
-  
-  ASSERT_EQ(gpu_ret,cpu_ret);
-  
-  cudaFree(A_gpu);
-  cudaFree(B_gpu);
-}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/src/test/test_context.cu
----------------------------------------------------------------------
diff --git a/src/test/test_context.cu b/src/test/test_context.cu
new file mode 100644
index 0000000..88ab06b
--- /dev/null
+++ b/src/test/test_context.cu
@@ -0,0 +1,55 @@
+#include <thread>
+#include "gtest/gtest.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
+#include "singa/utils/cuda_utils.h"
+
+using namespace singa;
+using namespace std;
+
+TEST(ContextTest, TestDevice) {
+  auto context = Singleton<Context>::Instance();
+
+  auto id = std::this_thread::get_id();
+  context->SetupDevice(id, 0);
+  auto device_id = context->device_id(id);
+  ASSERT_EQ(1,device_id);
+}
+
+TEST(ContextTest, TestHandle) {
+  auto context = Singleton<Context>::Instance();
+
+  float cpu_ret = 0.0f;
+  float gpu_ret = 0.0f;
+
+  float A[12];
+  float B[12];
+
+  for(int i = 0; i < 12; i++) {
+    A[i]=i-1;
+    B[i]=i+1;
+  }
+
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
+  context->SetupDevice(std::this_thread::get_id(), 0);
+
+  cudaMalloc((void**)&A_gpu, 12 * sizeof(float));
+  cudaMalloc((void**)&B_gpu, 12 * sizeof(float));
+
+  cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice);
+
+  cublasHandle_t handle = context->cublas_handle(std::this_thread::get_id());
+
+  cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
+
+  for(int i = 0; i < 12;++i) {
+    cpu_ret += A[i] * B[i];
+  }
+
+  ASSERT_EQ(gpu_ret,cpu_ret);
+
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/src/utils/context.cc
----------------------------------------------------------------------
diff --git a/src/utils/context.cc b/src/utils/context.cc
index 671bec0..37c8f39 100644
--- a/src/utils/context.cc
+++ b/src/utils/context.cc
@@ -23,66 +23,64 @@
 #include "singa/utils/singleton.h"
 
 namespace singa {
-  
+
 Context::~Context() {
 #ifdef USE_GPU
-  for(int i = 0; i < kDefaultDevice; ++i) {
-       SetDevice(i);
-
-       if(handles_[i] != NULL) {
-         cublasDestroy(handles_[i]);
-       }
-
-       if(gpu_rand_generators_[i] != NULL) {
-      curandDestroyGenerator(gpu_rand_generators_[i]);
-       }
+  for (auto& entry : device_id_) {
+    if (entry.second != -1) {
+      cudaSetDevice(entry.second);
+      if (cublas_handle_[entry.first] != nullptr) {
+        cublasDestroy(cublas_handle_[entry.first]);
+        cublas_handle_[entry.first] = nullptr;
+      }
+      if(curand_generator_[entry.first] != nullptr) {
+        curandDestroyGenerator(curand_generator_[entry.first]);
+        curand_generator_[entry.first] = nullptr;
+      }
+    }
   }
 #endif
+  for (auto& entry : rand_generator_) {
+    if (entry.second != nullptr) {
+      delete entry.second;
+      entry.second = nullptr;
+    }
+  }
 }
 
-void Context::Setup() {
+Context::Context() { }
 
-  for(int i = 0; i < kDefaultDevice; ++i) {
-       //init device index
-       device_ids_.push_back(i);
-  }
+void Context::SetupDevice(const std::thread::id thread, const int did) {
+  SetupDevice(thread, did, -1);
+}
 
+void Context::SetupDevice(const std::thread::id thread, const int did,
+    long long seed) {
+  device_id_[thread] = did;
 #ifdef USE_GPU
-  for(int i = 0; i < kDefaultDevice; ++i) {
-       //init handle
-       cublasHandle_t handle = NULL;
-       handles_.push_back(handle);
-
-       curandGenerator_t gpu_rand_generator = NULL;
-       gpu_rand_generators_.push_back(gpu_rand_generator);
+  if (did > -1) {
+    cudaSetDevice(did);
+    cublasCreate(&handle_[thread]);
   }
 #endif
+  seed_[thread] = seed;
 }
 
+/*
 #ifdef USE_GPU
-void Context::CreateHandle(const int index) {
-  SetDevice(device_ids_[index]);
-  cublasCreate(&handles_[index]);
-}
-
-void Context::DestoryHandle(const int index) {
-  SetDevice(device_ids_[index]);
-  cublasDestroy(handles_[index]);
-  handles_[index] = NULL;
+void Context::DestoryHandle(const int thread::id) {
+  cudaSetDevice(device_id_[thread::id]);
+  cublasDestroy(handle_[thread::id]);
+  handle_[thread::id] = nullptr;
 }
 
-void Context::CreateGpuRandGenerator(const int index) {
-  SetDevice(device_ids_[index]);
-  curandCreateGenerator(&gpu_rand_generators_[index], 
CURAND_RNG_PSEUDO_DEFAULT);
+void Context::DestoryGpuRandGenerator(const int thread::id) {
+  cudaSetDevice(device_id_[thread::id]);
+  curandDestroyGenerator(curand_generator_[thread::id]);
+  curand_generator_[thread::id] = nullptr;
 }
-
-void Context::DestoryGpuRandGenerator(const int index) {
-  SetDevice(device_ids_[index]);
-  curandDestroyGenerator(gpu_rand_generators_[index]);
-  gpu_rand_generators_[index] = NULL;
-}
-
 #endif
+*/
 
 
 }  // namespace singa

Reply via email to