SINGA-100 Implement layers using CUDNN for GPU training Add some gpu functions: gpu_scale(); gpu_asum(); gpu_sample_gaussian(); gpu_sample_uniform();
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/2ed18a54 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/2ed18a54 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/2ed18a54 Branch: refs/heads/master Commit: 2ed18a547243b0ff160405f969d243cca31672bc Parents: 5d35ef2 Author: seaok <[email protected]> Authored: Fri Nov 27 18:51:05 2015 +0800 Committer: Wei Wang <[email protected]> Committed: Fri Dec 11 11:47:57 2015 +0800 ---------------------------------------------------------------------- include/singa/utils/math_addr.h | 36 ++++++++++++++++++++++++++++++------ include/singa/utils/math_blob.h | 10 ++++++---- 2 files changed, 36 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/2ed18a54/include/singa/utils/math_addr.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_addr.h b/include/singa/utils/math_addr.h index f63ff78..f548606 100644 --- a/include/singa/utils/math_addr.h +++ b/include/singa/utils/math_addr.h @@ -29,6 +29,7 @@ extern "C" { #endif #include "singa/utils/singa_op.h" #ifdef USE_GPU +#include "cuda_utils.h" #include <cublas_v2.h> #endif @@ -170,6 +171,16 @@ void cpu_sample_gaussian(URNG& g, int n, Dtype mean, Dtype std, Dtype* A) { #ifdef USE_GPU template<typename Dtype> +Dtype gpu_asum(int n, const Dtype* A, int inc) { + Dtype result = 0.0; + cublasHandle_t handle; + cublasCreate(&handle); + cublasSasum(handle, n, A, inc, &result); + cublasDestroy(handle); + return result; +} + +template<typename Dtype> void gpu_gemm(const Dtype * A, const Dtype * B, const int m, const int n, const int k, const Dtype alpha, const Dtype beta, const bool TranA, const bool TranB, Dtype * C) { @@ -205,6 +216,14 @@ void gpu_axpy(const Dtype * A, const int n, const Dtype alpha, Dtype * B) { } template<typename Dtype> +void gpu_scale(const int n, const Dtype alpha, Dtype * A) { + cublasHandle_t handle; + cublasCreate(&handle); + cublasSscal(handle, n, &alpha, A, 1); + cublasDestroy(handle); +} + +template<typename Dtype> Dtype gpu_dot(const Dtype * A, const Dtype * B, const int n) { cublasHandle_t handle; cublasCreate(&handle); @@ -259,14 +278,19 @@ void gpu_expand_f(const Dtype * A, const int m, const int n, Dtype * B) { } -template<typename Dtype> -void gpu_sample_uniform(int n, Dtype low, Dtype high, Dtype* A) { - +template<typename Dtype, typename URNG> +void gpu_sample_uniform(URNG g, int n, Dtype low, Dtype high, Dtype* A) { + //curandGenerator_t gen; + //curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(g, time(NULL)); + curandGenerateUniform(g, A, n); + //curandDestroyGenerator(gen); } -template<typename Dtype> -void gpu_sample_gaussian(int n, Dtype mean, Dtype std, Dtype* A) { - +template<typename Dtype, typename URNG> +void gpu_sample_gaussian(URNG g, int n, Dtype mean, Dtype std, Dtype* A) { + curandSetPseudoRandomGeneratorSeed(g, time(NULL)); + curandGenerateNormal(g, A, n, mean, std); } // expand each element in A into a row of B http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/2ed18a54/include/singa/utils/math_blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h index ce40d4f..bbf7cc0 100644 --- a/include/singa/utils/math_blob.h +++ b/include/singa/utils/math_blob.h @@ -50,7 +50,7 @@ void Scale(Dtype alpha, Blob<Dtype> * B) { else { #ifdef USE_GPU // TODO(haibo) check it. -// gpu_scale(B->count(), alpha, B->mutable_gpu_data()); + gpu_scale(B->count(), alpha, B->mutable_gpu_data()); #endif } } @@ -644,7 +644,7 @@ Dtype Asum(const Blob<Dtype>& A) { return cpu_asum(A.count(), A.cpu_data(), 1) / A.count(); } else { #ifdef USE_GPU - return 0; // TODO(haibo) + return gpu_asum(A.count(), A.cpu_data(), 1) / A.count(); // TODO(haibo) #endif } } @@ -662,7 +662,8 @@ void SampleUniform(Dtype low, Dtype high, Blob<Dtype>* A) { } else { #ifdef USE_GPU // TODO(haibo) check - gpu_sample_uniform(A->count(), low, high, A->mutable_gpu_data()); + gpu_sample_uniform(context->curand_generator(thread), A->count(), low, high, + A->mutable_gpu_data()); #endif } } @@ -678,7 +679,8 @@ void SampleGaussian(Dtype mean, Dtype std, Blob<Dtype>* A) { } else { #ifdef USE_GPU // TODO(haibo) check it. - gpu_sample_gaussian(A->count(), mean, std, A->mutable_gpu_data()); + gpu_sample_gaussian(context->curand_generator(thread), A->count(), mean, std, + A->mutable_gpu_data()); #endif } }
