SINGA-80 New Blob Level and Address Level Math Operation Interface Uniform the signature of CPU and GPU. Fixed some bugs about MVAddRow() and OuterProduct(). Run All Test OK.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8ade7d76 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8ade7d76 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8ade7d76 Branch: refs/heads/master Commit: 8ade7d76dbe64b75088693febba7019e28d39c30 Parents: 247002d Author: seaok <[email protected]> Authored: Fri Mar 18 15:00:44 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Sat Apr 2 21:35:39 2016 +0800 ---------------------------------------------------------------------- include/singa/utils/math_addr.h | 6 +++--- include/singa/utils/math_blob.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8ade7d76/include/singa/utils/math_addr.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_addr.h b/include/singa/utils/math_addr.h index 524e13e..4a05cfd 100644 --- a/include/singa/utils/math_addr.h +++ b/include/singa/utils/math_addr.h @@ -234,13 +234,13 @@ void gpu_e_f(const int n, const Dtype * A, const Dtype * B, Dtype * C) { } template<typename Op, typename Dtype> -void gpu_e_f(const int n, const Dtype * A, const Dtype alpha, Dtype * B) { +void gpu_e_f(const int n, const Dtype alpha, const Dtype * A, Dtype * B) { Op::CudaMap(alpha, A, B, n); } template<typename Op, typename Dtype> -void gpu_e_f(const int n, const Dtype * A, const Dtype * B, - const Dtype alpha, const Dtype beta, Dtype * C) { +void gpu_e_f(const int n, const Dtype alpha, const Dtype beta, + const Dtype * A, const Dtype * B, Dtype * C) { Op::CudaMap(alpha, beta, A, B, C, n); } // element-wise generalized operation defined in Op http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8ade7d76/include/singa/utils/math_blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h index 35985f1..50da1f0 100644 --- a/include/singa/utils/math_blob.h +++ b/include/singa/utils/math_blob.h @@ -258,7 +258,7 @@ void OuterProduct(const Blob<Dtype>& A, const Blob<Dtype>& B, Blob<Dtype> * C) { } else { #ifdef USE_GPU gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), - m, n, 1, 1, 0, false, false, C->mutable_gpu_data()); + m, n, 1, Dtype(1), Dtype(0), false, false, C->mutable_gpu_data()); #else NO_GPU; #endif // USE_GPU @@ -321,7 +321,7 @@ void Map(Dtype alpha, const Blob<Dtype>& A, Blob<Dtype>* B) { cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data()); } else { #ifdef USE_GPU - gpu_e_f<Op>(A.count(), A.gpu_data(), alpha, B->mutable_gpu_data()); + gpu_e_f<Op>(A.count(), alpha, A.gpu_data(), B->mutable_gpu_data()); #else NO_GPU; #endif // USE_GPU @@ -491,8 +491,8 @@ void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { B->mutable_cpu_data()); } else { #ifdef USE_GPU - singa_gpu_add_vec_row(A.gpu_data(), B->gpu_data(), B->mutable_gpu_data(), - m, n, n); + gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), m, n, 1, + alpha, beta, false, false, B->mutable_gpu_data()); #else NO_GPU; #endif // USE_GPU
