SINGA-80 New Blob Level and Address Level Math Operation Interface Clean the files with cpplint. Add fatal log for places where GPU is needed but the code is not compiled with GPU. There are few TODOs in math_blob.h left.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d452c1fb Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d452c1fb Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d452c1fb Branch: refs/heads/master Commit: d452c1fb4128ef8a90198100033160826290b0c3 Parents: 8ade7d7 Author: Wei Wang <[email protected]> Authored: Mon Apr 4 11:12:32 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Mon Apr 4 11:12:32 2016 +0800 ---------------------------------------------------------------------- include/singa/utils/blob.h | 2 +- include/singa/utils/math_addr.h | 8 ++--- include/singa/utils/math_blob.h | 14 ++++---- src/test/test_math.cc | 70 ++++++++++++++++++------------------ 4 files changed, 48 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/include/singa/utils/blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/blob.h b/include/singa/utils/blob.h index 9defeac..1a0a592 100644 --- a/include/singa/utils/blob.h +++ b/include/singa/utils/blob.h @@ -281,7 +281,7 @@ class Blob { if (transpose() != other->transpose()) return false; if (count() != other->count()) return false; if (shape().size() != other->shape().size()) return false; - for (int i = 0; i < shape().size(); i++) { + for (unsigned int i = 0; i < shape().size(); i++) { if (shape(i) != other->shape(i)) return false; } const Dtype * a = cpu_data(); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/include/singa/utils/math_addr.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_addr.h b/include/singa/utils/math_addr.h index 4a05cfd..cf1d227 100644 --- a/include/singa/utils/math_addr.h +++ b/include/singa/utils/math_addr.h @@ -78,7 +78,7 @@ void cpu_copy(const int n, const Dtype* A, Dtype *B) { } template<typename Dtype> -Dtype cpu_dot(const Dtype * A, const Dtype * B, const int n) { +Dtype cpu_dot(const int n, const Dtype * A, const Dtype * B) { Dtype sum = 0; for (int i = 0 ; i < n ; i++) sum += A[i] * B[i]; @@ -210,8 +210,8 @@ void gpu_scale(cublasHandle_t handle, const int n, const Dtype alpha, } template<typename Dtype> -Dtype gpu_dot(cublasHandle_t handle, const Dtype * A, const Dtype * B, - const int n) { +Dtype gpu_dot(cublasHandle_t handle, const int n, const Dtype * A, + const Dtype * B) { Dtype result = 0.0; cublasSdot(handle, n, A, 1, B, 1, &result); return result; @@ -240,7 +240,7 @@ void gpu_e_f(const int n, const Dtype alpha, const Dtype * A, Dtype * B) { template<typename Op, typename Dtype> void gpu_e_f(const int n, const Dtype alpha, const Dtype beta, - const Dtype * A, const Dtype * B, Dtype * C) { + const Dtype * A, const Dtype * B, Dtype * C) { Op::CudaMap(alpha, beta, A, B, C, n); } // element-wise generalized operation defined in Op http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/include/singa/utils/math_blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h index 50da1f0..abe7722 100644 --- a/include/singa/utils/math_blob.h +++ b/include/singa/utils/math_blob.h @@ -223,11 +223,11 @@ Dtype VVDot(const Blob<Dtype> & A, const Blob<Dtype> & B) { auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); if (device < 0) { - res = cpu_dot(A.cpu_data(), B.cpu_data(), n); + res = cpu_dot(n, A.cpu_data(), B.cpu_data()); } else { #ifdef USE_GPU - res = gpu_dot(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), - n); + res = gpu_dot(context->cublas_handle(device), n, A.gpu_data(), + B.gpu_data()); #else NO_GPU; #endif // USE_GPU @@ -302,8 +302,9 @@ void Map(const Blob<Dtype> & A, const Blob<Dtype> & B, Blob<Dtype> * C) { cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data()); } else { #ifdef USE_GPU - // gpu part gpu_e_f<Op>(A.count(), A.gpu_data(), B.gpu_data(), C->mutable_gpu_data()); +#else + NO_GPU; #endif // USE_GPU } } @@ -491,8 +492,8 @@ void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { B->mutable_cpu_data()); } else { #ifdef USE_GPU - gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), m, n, 1, - alpha, beta, false, false, B->mutable_gpu_data()); + gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), m, + n, 1, alpha, beta, false, false, B->mutable_gpu_data()); #else NO_GPU; #endif // USE_GPU @@ -737,6 +738,7 @@ void Softmax(int nb_rows, const Blob<Dtype>& A, Blob<Dtype>* B) { cpu_softmax(nb_rows, A.count() / nb_rows, A.cpu_data(), B->mutable_cpu_data()); } else { + // TODO(wangwei) implement the GPU version. NO_GPU; } } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/src/test/test_math.cc ---------------------------------------------------------------------- diff --git a/src/test/test_math.cc b/src/test/test_math.cc index 6bb6001..9830703 100644 --- a/src/test/test_math.cc +++ b/src/test/test_math.cc @@ -30,7 +30,7 @@ #ifdef USE_GPU #include <cuda_runtime.h> -#include "cublas_v2.h" +#include <cublas_v2.h> #endif using namespace singa; @@ -64,8 +64,8 @@ TEST(MathBlobTest, TestGEMV) { float AT[5][5] = {}; float B[5] = {}; float Res[5] = {}; - for(int i = 0; i < 5; i++) { - for(int j = 0; j < 5; j++) { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { A[i][j] = i * j + i - j; AT[j][i] = i * j + i - j; } @@ -88,7 +88,7 @@ TEST(MathBlobTest, TestGEMV) { BlobATB->set_cpu_data(Res); for (int i = 0; i < 5; i++) { - for(int j = 0; j < 5; j++) { + for (int j = 0; j < 5; j++) { Res[i] += 2*A[i][j] * B[j]; } } @@ -107,8 +107,8 @@ TEST(MathBlobTest, TestMVDot) { float AT[5][5] = {}; float B[5] = {}; float Res[5] = {}; - for(int i = 0; i < 5; i++) { - for(int j = 0; j < 5; j++) { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { A[i][j] = i * j + i - j; AT[j][i] = i * j + i - j; } @@ -131,7 +131,7 @@ TEST(MathBlobTest, TestMVDot) { BlobATB->set_cpu_data(Res); for (int i = 0; i < 5; i++) { - for(int j = 0; j < 5; j++) { + for (int j = 0; j < 5; j++) { Res[i] += A[i][j] * B[j]; } } @@ -156,8 +156,8 @@ TEST(MathBlobTest, TestGEMM) { float B[5][5]= {}; float BT[5][5]= {}; float Res[5][5]= {}; - for(int i = 0; i < 5; i++) { - for(int j = 0; j < 5; j++) { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { A[i][j] = i * j + i - j; AT[j][i] = i * j + i - j; B[i][j] = - i * j + i * i - j * j; @@ -214,8 +214,8 @@ TEST(MathBlobTest, TestMMDot) { float B[5][5]= {}; float BT[5][5]= {}; float Res[5][5]= {}; - for(int i = 0; i < 5; i++) { - for(int j = 0; j < 5; j++) { + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { A[i][j] = i * j + i - j; AT[j][i] = i * j + i - j; B[i][j] = - i * j + i * i - j * j; @@ -292,8 +292,8 @@ TEST(MathBlobTest, TestOuterProduct) { A[i] = i * i - 5* (i%2); B[i] = 2* i * i - 3* (i%4); } - for(int i = 0; i < 10; i++) { - for(int j = 0; j < 10; j++) { + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { AB[i][j] = A[i]*B[j]; } } @@ -392,9 +392,9 @@ TEST(MathBlobTest, TestMVAddCol) { float A[10] = {}; float B[10][10] = {}; float BT[10][10] = {}; - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { A[i] = 5*i -2; - for(int j = 0; j < 10; j++) { + for (int j = 0; j < 10; j++) { B[i][j] = i * j + i - j; BT[j][i] = i * j + i - j; } @@ -405,8 +405,8 @@ TEST(MathBlobTest, TestMVAddCol) { BlobBT->set_cpu_data(BT[0]); BlobBT->set_transpose(true); - for(int i = 0; i < 10; i++) { - for(int j = 0; j < 10; j++) { + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { B[i][j] = 2.0 * A[i] + 3.0 * B[i][j]; BT[j][i] = 2.0 * A[i] + 3.0 * BT[j][i]; } @@ -433,9 +433,9 @@ TEST(MathBlobTest, TestMVAddRow) { float A[10] = {}; float B[10][10] = {}; float BT[10][10] = {}; - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { A[i] = 5*i -2; - for(int j = 0; j < 10; j++) { + for (int j = 0; j < 10; j++) { B[i][j] = i * j + i - j; BT[j][i] = i * j + i - j; } @@ -446,8 +446,8 @@ TEST(MathBlobTest, TestMVAddRow) { BlobBT->set_cpu_data(BT[0]); BlobBT->set_transpose(true); - for(int i = 0; i < 10; i++) { - for(int j = 0; j < 10; j++) { + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { B[j][i] = 2.0 * A[i] + 3.0 * B[j][i]; BT[i][j] = 2.0 * A[i] + 3.0 * BT[i][j]; } @@ -474,9 +474,9 @@ TEST(MathBlobTest, TestRepmatCol) { float A[10] = {}; float B[10][10] = {}; float BT[10][10] = {}; - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { A[i] = 5*i -2; - for(int j = 0; j < 10; j++) { + for (int j = 0; j < 10; j++) { B[i][j] = A[i]; BT[j][i] = A[i]; } @@ -506,9 +506,9 @@ TEST(MathBlobTest, TestRepmatRow) { float A[10] = {}; float B[10][10] = {}; float BT[10][10] = {}; - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { A[i] = 5*i -2; - for(int j = 0; j < 10; j++) { + for (int j = 0; j < 10; j++) { B[j][i] = A[i]; BT[i][j] = A[i]; } @@ -538,9 +538,9 @@ TEST(MathBlobTest, TestMVSumCol) { float A[10] = {}; float B[10][10] = {}; float BT[10][10] = {}; - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { A[i] = 5*i -2; - for(int j = 0; j < 10; j++) { + for (int j = 0; j < 10; j++) { B[i][j] = i * j + i - j; BT[j][i] = i * j + i - j; } @@ -552,9 +552,9 @@ TEST(MathBlobTest, TestMVSumCol) { BlobBT->set_cpu_data(BT[0]); BlobBT->set_transpose(true); - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { A[i] *= 2.0; - for(int j = 0; j < 10; j++) { + for (int j = 0; j < 10; j++) { A[i] += 3.0 * B[i][j]; } } @@ -577,9 +577,9 @@ TEST(MathBlobTest, TestMVSumRow) { float A[10] = {}; float B[10][10] = {}; float BT[10][10] = {}; - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { A[i] = 5*i -2; - for(int j = 0; j < 10; j++) { + for (int j = 0; j < 10; j++) { B[j][i] = i * j + i - j; BT[i][j] = i * j + i - j; } @@ -591,9 +591,9 @@ TEST(MathBlobTest, TestMVSumRow) { BlobBT->set_cpu_data(BT[0]); BlobBT->set_transpose(true); - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { A[i] *= 2.0; - for(int j = 0; j < 10; j++) { + for (int j = 0; j < 10; j++) { A[i] += 3.0 * B[j][i]; } } @@ -608,7 +608,7 @@ TEST(MathBlobTest, TestMVSumRow) { TEST(MathBlobTest, TestASum) { float A[10] = {}; - for(int i = 0; i < 10; i++) { + for (int i = 0; i < 10; i++) { A[i] = ((i % 3) -1) * i; } @@ -888,7 +888,7 @@ TEST(MathTest, TestDotGPU) { cudaMemcpy(B_gpu, B, 12*sizeof(float), cudaMemcpyHostToDevice); auto context = Singleton<Context>::Instance(); context->SetupDevice(std::this_thread::get_id(), 0); - float gpu_ret = gpu_dot<float>(context->cublas_handle(0), A_gpu, B_gpu, 12); + float gpu_ret = gpu_dot<float>(context->cublas_handle(0), 12, A_gpu, B_gpu); float cpu_ret = 0.0f; for (int i = 0; i < 12; i++) {
