SINGA-80 New Blob Level and Address Level Math Operation Interface

---

Blob level cpu implementation
todo: random functions


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b99de6c8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b99de6c8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b99de6c8

Branch: refs/heads/master
Commit: b99de6c87882473ba7cfc72b5e67f54b13574752
Parents: 06cdf36
Author: jinyangturbo <pku.tu...@gmail.com>
Authored: Thu Oct 22 01:07:05 2015 -0700
Committer: Wei Wang <wang...@comp.nus.edu.sg>
Committed: Mon Nov 9 17:04:48 2015 +0800

----------------------------------------------------------------------
 src/blob/math_addr.cc |  51 ++++++++++++
 src/blob/math_blob.cc | 193 +++++++++++++++++++++++++++++++++++++++++++++
 src/blob/test.cc      | 165 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 409 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b99de6c8/src/blob/math_addr.cc
----------------------------------------------------------------------
diff --git a/src/blob/math_addr.cc b/src/blob/math_addr.cc
new file mode 100644
index 0000000..f28fdcb
--- /dev/null
+++ b/src/blob/math_addr.cc
@@ -0,0 +1,51 @@
+extern "C"
+{
+   #include <cblas.h>
+}
+
+#include "singa/blob/math_addr.h"
+#include "singa/blob/singa_op.h"
+
+namespace singa{
+
+const float * cpu_uni_vec(const int n)
+{
+       float * res = new float[n];
+       for(int i = 0; i < n; i++)
+               res[i] = 1.0;
+       return res;
+}
+
+void cpu_gemm(const float * A, const float * B, const int m, const int n, 
const int k, const float alpha, const float beta, const bool TranA, const bool 
TranB, float * C)
+{
+       int lda, ldb;
+       CBLAS_TRANSPOSE tA, tB;
+       lda = TranA ? m : k;
+       ldb = TranB ? k : n;
+       tA = TranA ? CblasTrans : CblasNoTrans;
+       tB = TranB ? CblasTrans : CblasNoTrans;
+       cblas_sgemm(CblasRowMajor, tA, tB, m, n, k, alpha, A, lda, B, ldb, 
beta, C, n);
+}
+
+void cpu_gemv(const float * A, const float * B, const int m, const int n, 
const float alpha, const float beta, const bool TranA, float * C)
+{
+       CBLAS_TRANSPOSE tA;
+       tA = TranA ? CblasTrans : CblasNoTrans;
+       cblas_sgemv(CblasRowMajor, tA, m, n, alpha, A, n, B, 1, beta, C, 1);
+}
+
+void cpu_axpy(const float * A, const int n, const float alpha, float * B)
+{
+       cblas_saxpy(n, alpha, A, 1, B, 1);
+}
+
+float cpu_dot(const float * A, const float * B, const int n)
+{
+       float sum = 0;
+       for(int i = 0 ; i < n ; i++)
+               sum += A[i]*B[i];
+       return sum;
+}
+
+
+} // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b99de6c8/src/blob/math_blob.cc
----------------------------------------------------------------------
diff --git a/src/blob/math_blob.cc b/src/blob/math_blob.cc
new file mode 100644
index 0000000..ff81667
--- /dev/null
+++ b/src/blob/math_blob.cc
@@ -0,0 +1,193 @@
+#include "singa/blob/math_blob.h"
+
+namespace singa {
+
+/**********************************************************************************/
+// shape_check function
+
+int get_size(const std::vector<int>& shape)
+{
+  int sum = 1;
+  for(unsigned int i = 0; i < shape.size(); i++) sum *= shape[i];
+  return sum; 
+}
+
+/**********************************************************************************/
+// class1 matrix operation
+
+
+void GEMM(XPU xpu, const Blob<float> & A, const Blob<float> & B, Blob<float> * 
C, float alpha, float beta)
+{
+       if(xpu == cpu)
+       {
+         if(check_shape_mmm(A, B, *C))
+         {
+           int m = C->shape().at(0);
+           int n = C->shape().at(1);
+           int k = A.isTranspose() ? A.shape().at(0) : A.shape().at(1);
+           bool TranA = A.isTranspose();
+           bool TranB = B.isTranspose();
+           cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, k, alpha, beta, TranA, 
TranB, C->mutable_cpu_data());
+         }
+         else{
+         // report errors here
+         }
+       }
+       if(xpu == gpu)
+       {
+         //gpu part
+       }
+}
+//C = alpha*A*B+beta*C, A, B and C are matrix
+
+ 
+void MMDot(XPU xpu, const Blob<float> & A, const Blob<float> & B, Blob<float> 
* C)
+{
+       GEMM(xpu, A, B, C, 1, 0);
+}
+// A,B and C are matrix
+
+
+void MVDot(XPU xpu, const Blob<float> & A, const Blob<float> & B, Blob<float> 
* C)
+{
+       if(xpu == cpu)
+       {
+               if(check_shape_mvv(A, B, *C))
+               {
+                       int m = B.shape().at(0);
+                       int n = C->shape().at(0);
+                       bool TranA = A.isTranspose();
+                       cpu_gemv(A.cpu_data(), B.cpu_data(), m, n, 1, 0, TranA, 
C->mutable_cpu_data());
+               }
+               else{
+                       // report errors here
+               }
+       }
+       if(xpu == gpu)
+       {
+         //gpu part
+       }
+       
+}
+// A is matrix,B and C are vector
+
+ 
+void VVDot(XPU xpu, const Blob<float> & A, const Blob<float> & B, Blob<float> 
* C)
+{
+       if(xpu == cpu)
+       {
+               if(check_shape_vvm(A, B, *C))
+               {
+                       int m = C->shape().at(0);
+                       int n = C->shape().at(1);
+                       cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, 1, 1, 0, 
false, false, C->mutable_cpu_data());
+               }
+               else{
+               // report errors here
+               }
+       }
+       if(xpu == gpu)
+       {
+         //gpu part
+       }
+}
+// C is matrix,A and B are vector
+
+ 
+float VVdot(XPU xpu, const Blob<float> & A, const Blob<float> & B)
+{
+       float res = 0;
+       if(xpu == cpu)
+       {
+               if(check_shape_equal(A, B, B))
+               {
+                       int n = get_size(A.shape());
+                       res = cpu_dot(A.cpu_data(), B.cpu_data(), n);
+               }
+               else{
+               // report errors here
+               }
+       }
+       if(xpu == gpu)
+       {
+         //gpu part
+       }
+       return res;
+}
+//A and B are vectors
+
+void AXPY(XPU xpu, const Blob<float> & A, Blob<float> * B, float alpha)
+{
+       if(xpu == cpu)
+       {
+               if(check_shape_equal(A, *B, *B))
+               {
+                       cpu_axpy(A.cpu_data(), get_size(A.shape()), alpha, 
B->mutable_cpu_data());
+               }
+               else{
+               // report errors here
+               }
+       }
+       if(xpu == gpu)
+       {
+         //gpu part
+       }
+}
+// element-wise operation: Bi = alpha*Ai+Bi  A and B should have the same size
+
+inline void Repmat(XPU xpu, const Blob<float> & A, Blob<float> * B)
+{
+       MVAdd(xpu, A, B, 1, 0);
+}
+// A is a vector, B is a matrix , let each row of B to be A
+
+void MVAdd(XPU xpu, const Blob<float> & A, Blob<float> * B, float alpha, float 
beta)
+{
+       if(xpu == cpu)
+       {
+               if(check_shape_mv(*B, A))
+               {
+                       int m = get_size(A.shape());
+                       int n = get_size(B->shape()) / m;
+                       const float * univ = cpu_uni_vec(n);
+                       cpu_gemm(A.cpu_data(), univ, m, n, 1, alpha, beta, 
false, false, B->mutable_cpu_data());
+                       delete univ;
+               }
+               else{
+               // report errors here
+               }
+       }
+       if(xpu == gpu)
+       {
+         //gpu part
+       }       
+}
+// A is a vector, B is a matrix , Bij = alpha*Ai+beta*Bij
+// will use gemm. faster than general expand_f
+
+void MVSum(XPU xpu, const Blob<float> & A, Blob<float> * B, float alpha, float 
beta)
+{
+       if(xpu == cpu)
+       {
+               if(check_shape_mv(A, *B))
+               {
+                       int m = get_size(B->shape());
+                       int n = get_size(A.shape()) / m;
+                       const float * univ = cpu_uni_vec(n);
+                       cpu_gemm(A.cpu_data(), univ, m, 1, n, alpha, beta, 
false, false, B->mutable_cpu_data());
+                       delete univ;
+               }
+               else{
+               // report errors here
+               }
+       }
+       if(xpu == gpu)
+       {
+         //gpu part
+       }
+}
+// B is a vector, A is a matrix , Bi = \sigma_j_{alpha*Aij}+beta*Bi
+// will use gemm. faster than general reduce_f
+
+} // namespace singa
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b99de6c8/src/blob/test.cc
----------------------------------------------------------------------
diff --git a/src/blob/test.cc b/src/blob/test.cc
new file mode 100644
index 0000000..d13ed5e
--- /dev/null
+++ b/src/blob/test.cc
@@ -0,0 +1,165 @@
+#include <iostream>
+
+#include "singa_op.h"
+#include "math_addr.h"
+
+using namespace std;
+
+void test_gemm1()
+{
+            float A[3][2] = {};
+            float B[3][2] = {};
+            float C[2][2] = {};
+            for(int i = 0; i < 3; i++)
+                for(int j = 0; j < 2; j++)
+                {
+                A[i][j] = i+j;
+                B[i][j] = i+j - i*j;
+                }
+            cpu_gemm(A[0], B[0], 2, 2, 3 , 1, 0, true, false, C[0]);
+            float D[2][2] = {};
+            for(int i = 0; i < 2; i++)
+                for(int j = 0; j < 2; j++)
+                {
+                    D[i][j] = 0;
+                    for(int k = 0; k < 3; k++)
+                    D[i][j] += A[k][i]*B[k][j];
+                }
+            for(int i = 0; i < 2; i++)
+                for(int j = 0; j < 2; j++)
+                {
+                cout<<C[i][j] - D[i][j]<<endl;
+                }
+}
+
+
+void test_gemm2()
+{
+            float A[2][3] = {};
+            float B[3][2] = {};
+            float C[2][2] = {};
+            for(int i = 0; i < 3; i++)
+                for(int j = 0; j < 2; j++)
+                {
+                A[j][i] = i-j;
+                B[i][j] = i+j + i*j;
+                }
+            cpu_gemm(A[0], B[0], 2, 2, 3 , 1, 0, false, false, C[0]);
+            float D[2][2] = {};
+            for(int i = 0; i < 2; i++)
+                for(int j = 0; j < 2; j++)
+                {
+                    D[i][j] = 0;
+                    for(int k = 0; k < 3; k++)
+                    D[i][j] += A[i][k]*B[k][j];
+                }
+            for(int i = 0; i < 2; i++)
+                for(int j = 0; j < 2; j++)
+                {
+                cout<<C[i][j] - D[i][j]<<endl;
+                }
+}
+
+
+void test_gemv()
+{
+        float A[4][3] = {};
+        float B[4]= {};
+        float C[3] = {};
+        float D[3] = {};
+        for(int i = 0; i < 4; i++)
+        {
+            for(int j = 0; j < 3; j++)
+                    {
+                    A[j][i] = i-j + i*j;
+                    }
+        }
+        for(int i = 0; i < 4; i++)B[i] = i;
+        for(int i = 0; i < 3; i++)C[i] = 10;
+        cpu_gemv(A[0], B, 4, 3, 1, 1, true, C);
+        for(int i = 0; i < 3; i++)
+                for(int j = 0; j < 4; j++)
+                {
+                    D[i] += A[j][i]*B[j];
+                }
+        for(int i = 0; i < 3; i++)cout<<C[i] - D[i] - 10<<endl;
+}
+
+void test_axpy()
+{
+        float A[4][3] = {};
+        float C[4][3] = {};
+        float B[3][4] = {};
+        float D[3][4] = {};
+        for(int i = 0; i < 4; i++)
+        {
+            for(int j = 0; j < 3; j++)
+                    {
+                    A[i][j] = i-j + i*j;
+                    B[j][i] = i-j + i*j;
+                    C[i][j] = A[i][j];
+                    D[j][i] = B[j][i];
+                    }
+        }
+        cpu_axpy(A[0], 12, 2, B[0]);
+        for(int i = 0; i < 12; i++)D[0][i] += 2*C[0][i];
+        for(int i = 0; i < 3; i++)
+        {
+            for(int j = 0; j < 4; j++)
+                    {
+                    cout<<B[i][j] - D[i][j]<<endl;
+                    }
+        }
+}
+
+void test_eop()
+{
+        float A[10] = {};
+        float B[10] = {};
+        float C[10] = {};
+        float D[10] = {};
+        float O[10] = {};
+        for(int i = 0; i < 10; i++)
+        {
+            A[i] = i;
+            B[i] = -i;
+            C[i] = i;
+        }
+        cpu_e_f<op::Set>(5, 15, O);
+        for(int i = 0; i < 5; i++)cout<<O[i] - 15<<endl;
+        for(int i = 5; i < 10; i++)cout<<O[i]<<endl;
+        cpu_e_f<op::Scale>(10, C, 2, C);
+        for(int i = 0; i < 10; i++)cout<<C[i] - 2* i<<endl;
+        cpu_e_f<op::Add>(10, A, B, 0, 0, O);
+        for(int i = 0; i < 10; i++)cout<<O[i]<<endl;
+}
+
+void test_exrd()
+{
+        float A[3][10] = {};
+        float B[3] = {};
+        for(int i = 0; i < 3; i++)
+            for(int j = 0; j < 10; j++)
+            {
+                A[i][j] = (i + 1)*j;
+            }
+        cpu_reduce_f<op::Sum>(A[0], 3, 10, B);
+        for(int i = 0; i < 3; i++) B[i] -= 45*(i+1);
+        for(int i = 0; i < 3; i++)cout<<B[i]<<endl;
+        cpu_expand_f<op::Repmat>(B, 3, 10, A[0]);
+        cpu_reduce_f<op::Sum>(A[0], 3, 10, B);
+        for(int i = 0; i < 3; i++)cout<<B[i]<<endl;
+}
+
+int main()
+{
+    test_gemm1()  ;
+       test_gemm2();
+       test_gemv();
+       test_axpy();
+       test_eop();
+       test_exrd();
+    return 0;
+}
+
+

Reply via email to