SINGA-80 New Blob Level and Address Level Math Operation Interface

Clean the files with cpplint.
Add fatal log for places where GPU is needed but the code is not compiled with 
GPU.
There are few TODOs in math_blob.h left.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d452c1fb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d452c1fb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d452c1fb

Branch: refs/heads/master
Commit: d452c1fb4128ef8a90198100033160826290b0c3
Parents: 8ade7d7
Author: Wei Wang <[email protected]>
Authored: Mon Apr 4 11:12:32 2016 +0800
Committer: Wei Wang <[email protected]>
Committed: Mon Apr 4 11:12:32 2016 +0800

----------------------------------------------------------------------
 include/singa/utils/blob.h      |  2 +-
 include/singa/utils/math_addr.h |  8 ++---
 include/singa/utils/math_blob.h | 14 ++++----
 src/test/test_math.cc           | 70 ++++++++++++++++++------------------
 4 files changed, 48 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/include/singa/utils/blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/blob.h b/include/singa/utils/blob.h
index 9defeac..1a0a592 100644
--- a/include/singa/utils/blob.h
+++ b/include/singa/utils/blob.h
@@ -281,7 +281,7 @@ class Blob {
     if (transpose() != other->transpose()) return false;
     if (count() != other->count()) return false;
     if (shape().size() != other->shape().size()) return false;
-    for (int i = 0; i < shape().size(); i++) {
+    for (unsigned int i = 0; i < shape().size(); i++) {
       if (shape(i) != other->shape(i)) return false;
     }
     const Dtype * a = cpu_data();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/include/singa/utils/math_addr.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_addr.h b/include/singa/utils/math_addr.h
index 4a05cfd..cf1d227 100644
--- a/include/singa/utils/math_addr.h
+++ b/include/singa/utils/math_addr.h
@@ -78,7 +78,7 @@ void cpu_copy(const int n, const Dtype* A, Dtype *B) {
 }
 
 template<typename Dtype>
-Dtype cpu_dot(const Dtype * A, const Dtype * B, const int n) {
+Dtype cpu_dot(const int n, const Dtype * A, const Dtype * B) {
   Dtype sum = 0;
   for (int i = 0 ; i < n ; i++)
     sum += A[i] * B[i];
@@ -210,8 +210,8 @@ void gpu_scale(cublasHandle_t handle, const int n, const 
Dtype alpha,
 }
 
 template<typename Dtype>
-Dtype gpu_dot(cublasHandle_t handle, const Dtype * A, const Dtype * B,
-    const int n) {
+Dtype gpu_dot(cublasHandle_t handle, const int n, const Dtype * A,
+    const Dtype * B) {
   Dtype result = 0.0;
   cublasSdot(handle, n, A, 1, B, 1, &result);
   return result;
@@ -240,7 +240,7 @@ void gpu_e_f(const int n, const Dtype alpha, const Dtype * 
A, Dtype * B) {
 
 template<typename Op, typename Dtype>
 void gpu_e_f(const int n, const Dtype alpha, const Dtype beta,
-       const Dtype * A, const Dtype * B, Dtype * C) {
+  const Dtype * A, const Dtype * B, Dtype * C) {
   Op::CudaMap(alpha, beta, A, B, C, n);
 }
 // element-wise generalized operation defined in Op

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index 50da1f0..abe7722 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -223,11 +223,11 @@ Dtype VVDot(const Blob<Dtype> & A, const Blob<Dtype> & B) 
{
   auto context = Singleton<Context>::Instance();
   int device = context->device_id(std::this_thread::get_id());
   if (device < 0) {
-    res = cpu_dot(A.cpu_data(), B.cpu_data(), n);
+    res = cpu_dot(n, A.cpu_data(), B.cpu_data());
   } else {
 #ifdef USE_GPU
-    res = gpu_dot(context->cublas_handle(device), A.gpu_data(), B.gpu_data(),
-        n);
+    res = gpu_dot(context->cublas_handle(device), n, A.gpu_data(),
+        B.gpu_data());
 #else
     NO_GPU;
 #endif  // USE_GPU
@@ -302,8 +302,9 @@ void Map(const Blob<Dtype> & A, const Blob<Dtype> & B, 
Blob<Dtype> * C) {
     cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data());
   } else {
 #ifdef USE_GPU
-    // gpu part
     gpu_e_f<Op>(A.count(), A.gpu_data(), B.gpu_data(), C->mutable_gpu_data());
+#else
+    NO_GPU;
 #endif  // USE_GPU
   }
 }
@@ -491,8 +492,8 @@ void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & 
A, Blob<Dtype> * B) {
           B->mutable_cpu_data());
     } else {
 #ifdef USE_GPU
-      gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), 
m, n, 1,
-                 alpha, beta, false, false, B->mutable_gpu_data());
+      gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), m,
+          n, 1, alpha, beta, false, false, B->mutable_gpu_data());
 #else
       NO_GPU;
 #endif  // USE_GPU
@@ -737,6 +738,7 @@ void Softmax(int nb_rows, const Blob<Dtype>& A, 
Blob<Dtype>* B) {
     cpu_softmax(nb_rows, A.count() / nb_rows, A.cpu_data(),
       B->mutable_cpu_data());
   } else {
+    // TODO(wangwei) implement the GPU version.
     NO_GPU;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index 6bb6001..9830703 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -30,7 +30,7 @@
 
 #ifdef USE_GPU
 #include <cuda_runtime.h>
-#include "cublas_v2.h"
+#include <cublas_v2.h>
 #endif
 
 using namespace singa;
@@ -64,8 +64,8 @@ TEST(MathBlobTest, TestGEMV) {
   float AT[5][5] = {};
   float B[5] = {};
   float Res[5] = {};
-  for(int i = 0; i < 5; i++) {
-    for(int j = 0; j < 5; j++) {
+  for (int i = 0; i < 5; i++) {
+    for (int j = 0; j < 5; j++) {
       A[i][j] = i * j + i - j;
       AT[j][i] = i * j + i - j;
     }
@@ -88,7 +88,7 @@ TEST(MathBlobTest, TestGEMV) {
   BlobATB->set_cpu_data(Res);
 
   for (int i = 0; i < 5; i++) {
-    for(int j = 0; j < 5; j++) {
+    for (int j = 0; j < 5; j++) {
       Res[i] += 2*A[i][j] * B[j];
     }
   }
@@ -107,8 +107,8 @@ TEST(MathBlobTest, TestMVDot) {
   float AT[5][5] = {};
   float B[5] = {};
   float Res[5] = {};
-  for(int i = 0; i < 5; i++) {
-    for(int j = 0; j < 5; j++) {
+  for (int i = 0; i < 5; i++) {
+    for (int j = 0; j < 5; j++) {
       A[i][j] = i * j + i - j;
       AT[j][i] = i * j + i - j;
     }
@@ -131,7 +131,7 @@ TEST(MathBlobTest, TestMVDot) {
   BlobATB->set_cpu_data(Res);
 
   for (int i = 0; i < 5; i++) {
-    for(int j = 0; j < 5; j++) {
+    for (int j = 0; j < 5; j++) {
       Res[i] += A[i][j] * B[j];
     }
   }
@@ -156,8 +156,8 @@ TEST(MathBlobTest, TestGEMM) {
   float B[5][5]= {};
   float BT[5][5]= {};
   float Res[5][5]= {};
-  for(int i = 0; i < 5; i++) {
-    for(int j = 0; j < 5; j++) {
+  for (int i = 0; i < 5; i++) {
+    for (int j = 0; j < 5; j++) {
       A[i][j] = i * j + i - j;
       AT[j][i] = i * j + i - j;
       B[i][j] = - i * j + i * i - j * j;
@@ -214,8 +214,8 @@ TEST(MathBlobTest, TestMMDot) {
   float B[5][5]= {};
   float BT[5][5]= {};
   float Res[5][5]= {};
-  for(int i = 0; i < 5; i++) {
-    for(int j = 0; j < 5; j++) {
+  for (int i = 0; i < 5; i++) {
+    for (int j = 0; j < 5; j++) {
       A[i][j] = i * j + i - j;
       AT[j][i] = i * j + i - j;
       B[i][j] = - i * j + i * i - j * j;
@@ -292,8 +292,8 @@ TEST(MathBlobTest, TestOuterProduct) {
     A[i] = i * i - 5* (i%2);
     B[i] = 2* i * i - 3* (i%4);
   }
-  for(int i = 0; i < 10; i++) {
-    for(int j = 0; j < 10; j++) {
+  for (int i = 0; i < 10; i++) {
+    for (int j = 0; j < 10; j++) {
       AB[i][j] = A[i]*B[j];
     }
   }
@@ -392,9 +392,9 @@ TEST(MathBlobTest, TestMVAddCol) {
   float A[10] = {};
   float B[10][10] = {};
   float BT[10][10] = {};
-  for(int i = 0; i < 10; i++) {
+  for (int i = 0; i < 10; i++) {
     A[i] = 5*i -2;
-    for(int j = 0; j < 10; j++) {
+    for (int j = 0; j < 10; j++) {
       B[i][j] = i * j + i - j;
       BT[j][i] = i * j + i - j;
     }
@@ -405,8 +405,8 @@ TEST(MathBlobTest, TestMVAddCol) {
   BlobBT->set_cpu_data(BT[0]);
   BlobBT->set_transpose(true);
 
-  for(int i = 0; i < 10; i++) {
-    for(int j = 0; j < 10; j++) {
+  for (int i = 0; i < 10; i++) {
+    for (int j = 0; j < 10; j++) {
       B[i][j] = 2.0 * A[i] + 3.0 * B[i][j];
       BT[j][i] = 2.0 * A[i] + 3.0 * BT[j][i];
     }
@@ -433,9 +433,9 @@ TEST(MathBlobTest, TestMVAddRow) {
   float A[10] = {};
   float B[10][10] = {};
   float BT[10][10] = {};
-  for(int i = 0; i < 10; i++) {
+  for (int i = 0; i < 10; i++) {
     A[i] = 5*i -2;
-    for(int j = 0; j < 10; j++) {
+    for (int j = 0; j < 10; j++) {
       B[i][j] = i * j + i - j;
       BT[j][i] = i * j + i - j;
     }
@@ -446,8 +446,8 @@ TEST(MathBlobTest, TestMVAddRow) {
   BlobBT->set_cpu_data(BT[0]);
   BlobBT->set_transpose(true);
 
-  for(int i = 0; i < 10; i++) {
-    for(int j = 0; j < 10; j++) {
+  for (int i = 0; i < 10; i++) {
+    for (int j = 0; j < 10; j++) {
       B[j][i] = 2.0 * A[i] + 3.0 * B[j][i];
       BT[i][j] = 2.0 * A[i] + 3.0 * BT[i][j];
     }
@@ -474,9 +474,9 @@ TEST(MathBlobTest, TestRepmatCol) {
   float A[10] = {};
   float B[10][10] = {};
   float BT[10][10] = {};
-  for(int i = 0; i < 10; i++) {
+  for (int i = 0; i < 10; i++) {
     A[i] = 5*i -2;
-    for(int j = 0; j < 10; j++) {
+    for (int j = 0; j < 10; j++) {
       B[i][j] = A[i];
       BT[j][i] = A[i];
     }
@@ -506,9 +506,9 @@ TEST(MathBlobTest, TestRepmatRow) {
   float A[10] = {};
   float B[10][10] = {};
   float BT[10][10] = {};
-  for(int i = 0; i < 10; i++) {
+  for (int i = 0; i < 10; i++) {
     A[i] = 5*i -2;
-    for(int j = 0; j < 10; j++) {
+    for (int j = 0; j < 10; j++) {
       B[j][i] = A[i];
       BT[i][j] = A[i];
     }
@@ -538,9 +538,9 @@ TEST(MathBlobTest, TestMVSumCol) {
   float A[10] = {};
   float B[10][10] = {};
   float BT[10][10] = {};
-  for(int i = 0; i < 10; i++) {
+  for (int i = 0; i < 10; i++) {
     A[i] = 5*i -2;
-    for(int j = 0; j < 10; j++) {
+    for (int j = 0; j < 10; j++) {
       B[i][j] = i * j + i - j;
       BT[j][i] = i * j + i - j;
     }
@@ -552,9 +552,9 @@ TEST(MathBlobTest, TestMVSumCol) {
   BlobBT->set_cpu_data(BT[0]);
   BlobBT->set_transpose(true);
 
-  for(int i = 0; i < 10; i++) {
+  for (int i = 0; i < 10; i++) {
     A[i] *= 2.0;
-    for(int j = 0; j < 10; j++) {
+    for (int j = 0; j < 10; j++) {
       A[i] += 3.0 * B[i][j];
     }
   }
@@ -577,9 +577,9 @@ TEST(MathBlobTest, TestMVSumRow) {
   float A[10] = {};
   float B[10][10] = {};
   float BT[10][10] = {};
-  for(int i = 0; i < 10; i++) {
+  for (int i = 0; i < 10; i++) {
     A[i] = 5*i -2;
-    for(int j = 0; j < 10; j++) {
+    for (int j = 0; j < 10; j++) {
       B[j][i] = i * j + i - j;
       BT[i][j] = i * j + i - j;
     }
@@ -591,9 +591,9 @@ TEST(MathBlobTest, TestMVSumRow) {
   BlobBT->set_cpu_data(BT[0]);
   BlobBT->set_transpose(true);
 
-  for(int i = 0; i < 10; i++) {
+  for (int i = 0; i < 10; i++) {
     A[i] *= 2.0;
-    for(int j = 0; j < 10; j++) {
+    for (int j = 0; j < 10; j++) {
       A[i] += 3.0 * B[j][i];
     }
   }
@@ -608,7 +608,7 @@ TEST(MathBlobTest, TestMVSumRow) {
 
 TEST(MathBlobTest, TestASum) {
   float A[10] = {};
-  for(int i = 0; i < 10; i++) {
+  for (int i = 0; i < 10; i++) {
     A[i] = ((i % 3) -1) * i;
   }
 
@@ -888,7 +888,7 @@ TEST(MathTest, TestDotGPU) {
   cudaMemcpy(B_gpu, B, 12*sizeof(float), cudaMemcpyHostToDevice);
   auto context = Singleton<Context>::Instance();
   context->SetupDevice(std::this_thread::get_id(), 0);
-  float gpu_ret = gpu_dot<float>(context->cublas_handle(0), A_gpu, B_gpu, 12);
+  float gpu_ret = gpu_dot<float>(context->cublas_handle(0), 12, A_gpu, B_gpu);
 
   float cpu_ret = 0.0f;
   for (int i = 0; i < 12; i++) {

Reply via email to