[05/19] incubator-singa git commit: SINGA-100 Implement layers using CUDNN for GPU training

wangsh Wed, 16 Dec 2015 04:12:19 -0800

SINGA-100 Implement layers using CUDNN for GPU training

Add some gpu functions:
gpu_scale();
gpu_asum();
gpu_sample_gaussian();
gpu_sample_uniform();



Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/2ed18a54
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/2ed18a54
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/2ed18a54

Branch: refs/heads/master
Commit: 2ed18a547243b0ff160405f969d243cca31672bc
Parents: 5d35ef2
Author: seaok <[email protected]>
Authored: Fri Nov 27 18:51:05 2015 +0800
Committer: Wei Wang <[email protected]>
Committed: Fri Dec 11 11:47:57 2015 +0800

----------------------------------------------------------------------
 include/singa/utils/math_addr.h | 36 ++++++++++++++++++++++++++++++------
 include/singa/utils/math_blob.h | 10 ++++++----
 2 files changed, 36 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/2ed18a54/include/singa/utils/math_addr.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_addr.h b/include/singa/utils/math_addr.h
index f63ff78..f548606 100644
--- a/include/singa/utils/math_addr.h
+++ b/include/singa/utils/math_addr.h
@@ -29,6 +29,7 @@ extern "C" {
 #endif
 #include "singa/utils/singa_op.h"
 #ifdef USE_GPU
+#include "cuda_utils.h"
 #include <cublas_v2.h>
 #endif
 
@@ -170,6 +171,16 @@ void cpu_sample_gaussian(URNG& g, int n, Dtype mean, Dtype 
std, Dtype* A) {
 
 #ifdef USE_GPU
 template<typename Dtype>
+Dtype gpu_asum(int n, const Dtype* A, int inc) {
+  Dtype result = 0.0;
+  cublasHandle_t handle;
+  cublasCreate(&handle);
+  cublasSasum(handle, n, A, inc, &result);
+  cublasDestroy(handle);
+  return result;
+}
+
+template<typename Dtype>
 void gpu_gemm(const Dtype * A, const Dtype * B, const int m, const int n,
     const int k, const Dtype alpha, const Dtype beta, const bool TranA,
     const bool TranB, Dtype * C) {
@@ -205,6 +216,14 @@ void gpu_axpy(const Dtype * A, const int n, const Dtype 
alpha, Dtype * B) {
 }
 
 template<typename Dtype>
+void gpu_scale(const int n, const Dtype alpha, Dtype * A) {
+  cublasHandle_t handle;
+  cublasCreate(&handle);
+  cublasSscal(handle, n, &alpha, A, 1);
+  cublasDestroy(handle);
+}
+
+template<typename Dtype>
 Dtype gpu_dot(const Dtype * A, const Dtype * B, const int n) {
   cublasHandle_t handle;
   cublasCreate(&handle);
@@ -259,14 +278,19 @@ void gpu_expand_f(const Dtype * A, const int m, const int 
n, Dtype * B) {
 }
 
 
-template<typename Dtype>
-void gpu_sample_uniform(int n, Dtype low, Dtype high, Dtype* A) {
-
+template<typename Dtype, typename URNG>
+void gpu_sample_uniform(URNG g, int n, Dtype low, Dtype high, Dtype* A) {
+  //curandGenerator_t gen;
+  //curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
+  curandSetPseudoRandomGeneratorSeed(g, time(NULL));
+  curandGenerateUniform(g, A, n);
+  //curandDestroyGenerator(gen);
 }
 
-template<typename Dtype>
-void gpu_sample_gaussian(int n, Dtype mean, Dtype std, Dtype* A) {
-
+template<typename Dtype, typename URNG>
+void gpu_sample_gaussian(URNG g, int n, Dtype mean, Dtype std, Dtype* A) {
+  curandSetPseudoRandomGeneratorSeed(g, time(NULL));
+  curandGenerateNormal(g, A, n, mean, std);
 }
 
 // expand each element in A into a row of B

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/2ed18a54/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index ce40d4f..bbf7cc0 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -50,7 +50,7 @@ void Scale(Dtype alpha, Blob<Dtype> * B) {
   else {
 #ifdef USE_GPU
     // TODO(haibo) check it.
-//    gpu_scale(B->count(), alpha, B->mutable_gpu_data());
+    gpu_scale(B->count(), alpha, B->mutable_gpu_data());
 #endif
   }
 }
@@ -644,7 +644,7 @@ Dtype Asum(const Blob<Dtype>& A) {
     return cpu_asum(A.count(), A.cpu_data(), 1) / A.count();
   } else {
 #ifdef USE_GPU
-    return 0; // TODO(haibo)
+    return gpu_asum(A.count(), A.cpu_data(), 1) / A.count(); // TODO(haibo)
 #endif
   }
 }
@@ -662,7 +662,8 @@ void SampleUniform(Dtype low, Dtype high, Blob<Dtype>* A) {
   } else {
 #ifdef USE_GPU
     // TODO(haibo) check
-    gpu_sample_uniform(A->count(), low, high, A->mutable_gpu_data());
+    gpu_sample_uniform(context->curand_generator(thread), A->count(), low, 
high,
+               A->mutable_gpu_data());
 #endif
   }
 }
@@ -678,7 +679,8 @@ void SampleGaussian(Dtype mean, Dtype std, Blob<Dtype>* A) {
   } else {
 #ifdef USE_GPU
     // TODO(haibo) check it.
-    gpu_sample_gaussian(A->count(), mean, std, A->mutable_gpu_data());
+    gpu_sample_gaussian(context->curand_generator(thread), A->count(), mean, 
std,
+        A->mutable_gpu_data());
 #endif
   }
 }

[05/19] incubator-singa git commit: SINGA-100 Implement layers using CUDNN for GPU training

Reply via email to