SINGA-80 New Blob Level and Address Level Math Operation Interface ---
Blob level cpu implementation todo: random functions Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b99de6c8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b99de6c8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b99de6c8 Branch: refs/heads/master Commit: b99de6c87882473ba7cfc72b5e67f54b13574752 Parents: 06cdf36 Author: jinyangturbo <pku.tu...@gmail.com> Authored: Thu Oct 22 01:07:05 2015 -0700 Committer: Wei Wang <wang...@comp.nus.edu.sg> Committed: Mon Nov 9 17:04:48 2015 +0800 ---------------------------------------------------------------------- src/blob/math_addr.cc | 51 ++++++++++++ src/blob/math_blob.cc | 193 +++++++++++++++++++++++++++++++++++++++++++++ src/blob/test.cc | 165 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 409 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b99de6c8/src/blob/math_addr.cc ---------------------------------------------------------------------- diff --git a/src/blob/math_addr.cc b/src/blob/math_addr.cc new file mode 100644 index 0000000..f28fdcb --- /dev/null +++ b/src/blob/math_addr.cc @@ -0,0 +1,51 @@ +extern "C" +{ + #include <cblas.h> +} + +#include "singa/blob/math_addr.h" +#include "singa/blob/singa_op.h" + +namespace singa{ + +const float * cpu_uni_vec(const int n) +{ + float * res = new float[n]; + for(int i = 0; i < n; i++) + res[i] = 1.0; + return res; +} + +void cpu_gemm(const float * A, const float * B, const int m, const int n, const int k, const float alpha, const float beta, const bool TranA, const bool TranB, float * C) +{ + int lda, ldb; + CBLAS_TRANSPOSE tA, tB; + lda = TranA ? m : k; + ldb = TranB ? k : n; + tA = TranA ? CblasTrans : CblasNoTrans; + tB = TranB ? CblasTrans : CblasNoTrans; + cblas_sgemm(CblasRowMajor, tA, tB, m, n, k, alpha, A, lda, B, ldb, beta, C, n); +} + +void cpu_gemv(const float * A, const float * B, const int m, const int n, const float alpha, const float beta, const bool TranA, float * C) +{ + CBLAS_TRANSPOSE tA; + tA = TranA ? CblasTrans : CblasNoTrans; + cblas_sgemv(CblasRowMajor, tA, m, n, alpha, A, n, B, 1, beta, C, 1); +} + +void cpu_axpy(const float * A, const int n, const float alpha, float * B) +{ + cblas_saxpy(n, alpha, A, 1, B, 1); +} + +float cpu_dot(const float * A, const float * B, const int n) +{ + float sum = 0; + for(int i = 0 ; i < n ; i++) + sum += A[i]*B[i]; + return sum; +} + + +} // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b99de6c8/src/blob/math_blob.cc ---------------------------------------------------------------------- diff --git a/src/blob/math_blob.cc b/src/blob/math_blob.cc new file mode 100644 index 0000000..ff81667 --- /dev/null +++ b/src/blob/math_blob.cc @@ -0,0 +1,193 @@ +#include "singa/blob/math_blob.h" + +namespace singa { + +/**********************************************************************************/ +// shape_check function + +int get_size(const std::vector<int>& shape) +{ + int sum = 1; + for(unsigned int i = 0; i < shape.size(); i++) sum *= shape[i]; + return sum; +} + +/**********************************************************************************/ +// class1 matrix operation + + +void GEMM(XPU xpu, const Blob<float> & A, const Blob<float> & B, Blob<float> * C, float alpha, float beta) +{ + if(xpu == cpu) + { + if(check_shape_mmm(A, B, *C)) + { + int m = C->shape().at(0); + int n = C->shape().at(1); + int k = A.isTranspose() ? A.shape().at(0) : A.shape().at(1); + bool TranA = A.isTranspose(); + bool TranB = B.isTranspose(); + cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, k, alpha, beta, TranA, TranB, C->mutable_cpu_data()); + } + else{ + // report errors here + } + } + if(xpu == gpu) + { + //gpu part + } +} +//C = alpha*A*B+beta*C, A, B and C are matrix + + +void MMDot(XPU xpu, const Blob<float> & A, const Blob<float> & B, Blob<float> * C) +{ + GEMM(xpu, A, B, C, 1, 0); +} +// A,B and C are matrix + + +void MVDot(XPU xpu, const Blob<float> & A, const Blob<float> & B, Blob<float> * C) +{ + if(xpu == cpu) + { + if(check_shape_mvv(A, B, *C)) + { + int m = B.shape().at(0); + int n = C->shape().at(0); + bool TranA = A.isTranspose(); + cpu_gemv(A.cpu_data(), B.cpu_data(), m, n, 1, 0, TranA, C->mutable_cpu_data()); + } + else{ + // report errors here + } + } + if(xpu == gpu) + { + //gpu part + } + +} +// A is matrix,B and C are vector + + +void VVDot(XPU xpu, const Blob<float> & A, const Blob<float> & B, Blob<float> * C) +{ + if(xpu == cpu) + { + if(check_shape_vvm(A, B, *C)) + { + int m = C->shape().at(0); + int n = C->shape().at(1); + cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, 1, 1, 0, false, false, C->mutable_cpu_data()); + } + else{ + // report errors here + } + } + if(xpu == gpu) + { + //gpu part + } +} +// C is matrix,A and B are vector + + +float VVdot(XPU xpu, const Blob<float> & A, const Blob<float> & B) +{ + float res = 0; + if(xpu == cpu) + { + if(check_shape_equal(A, B, B)) + { + int n = get_size(A.shape()); + res = cpu_dot(A.cpu_data(), B.cpu_data(), n); + } + else{ + // report errors here + } + } + if(xpu == gpu) + { + //gpu part + } + return res; +} +//A and B are vectors + +void AXPY(XPU xpu, const Blob<float> & A, Blob<float> * B, float alpha) +{ + if(xpu == cpu) + { + if(check_shape_equal(A, *B, *B)) + { + cpu_axpy(A.cpu_data(), get_size(A.shape()), alpha, B->mutable_cpu_data()); + } + else{ + // report errors here + } + } + if(xpu == gpu) + { + //gpu part + } +} +// element-wise operation: Bi = alpha*Ai+Bi A and B should have the same size + +inline void Repmat(XPU xpu, const Blob<float> & A, Blob<float> * B) +{ + MVAdd(xpu, A, B, 1, 0); +} +// A is a vector, B is a matrix , let each row of B to be A + +void MVAdd(XPU xpu, const Blob<float> & A, Blob<float> * B, float alpha, float beta) +{ + if(xpu == cpu) + { + if(check_shape_mv(*B, A)) + { + int m = get_size(A.shape()); + int n = get_size(B->shape()) / m; + const float * univ = cpu_uni_vec(n); + cpu_gemm(A.cpu_data(), univ, m, n, 1, alpha, beta, false, false, B->mutable_cpu_data()); + delete univ; + } + else{ + // report errors here + } + } + if(xpu == gpu) + { + //gpu part + } +} +// A is a vector, B is a matrix , Bij = alpha*Ai+beta*Bij +// will use gemm. faster than general expand_f + +void MVSum(XPU xpu, const Blob<float> & A, Blob<float> * B, float alpha, float beta) +{ + if(xpu == cpu) + { + if(check_shape_mv(A, *B)) + { + int m = get_size(B->shape()); + int n = get_size(A.shape()) / m; + const float * univ = cpu_uni_vec(n); + cpu_gemm(A.cpu_data(), univ, m, 1, n, alpha, beta, false, false, B->mutable_cpu_data()); + delete univ; + } + else{ + // report errors here + } + } + if(xpu == gpu) + { + //gpu part + } +} +// B is a vector, A is a matrix , Bi = \sigma_j_{alpha*Aij}+beta*Bi +// will use gemm. faster than general reduce_f + +} // namespace singa + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b99de6c8/src/blob/test.cc ---------------------------------------------------------------------- diff --git a/src/blob/test.cc b/src/blob/test.cc new file mode 100644 index 0000000..d13ed5e --- /dev/null +++ b/src/blob/test.cc @@ -0,0 +1,165 @@ +#include <iostream> + +#include "singa_op.h" +#include "math_addr.h" + +using namespace std; + +void test_gemm1() +{ + float A[3][2] = {}; + float B[3][2] = {}; + float C[2][2] = {}; + for(int i = 0; i < 3; i++) + for(int j = 0; j < 2; j++) + { + A[i][j] = i+j; + B[i][j] = i+j - i*j; + } + cpu_gemm(A[0], B[0], 2, 2, 3 , 1, 0, true, false, C[0]); + float D[2][2] = {}; + for(int i = 0; i < 2; i++) + for(int j = 0; j < 2; j++) + { + D[i][j] = 0; + for(int k = 0; k < 3; k++) + D[i][j] += A[k][i]*B[k][j]; + } + for(int i = 0; i < 2; i++) + for(int j = 0; j < 2; j++) + { + cout<<C[i][j] - D[i][j]<<endl; + } +} + + +void test_gemm2() +{ + float A[2][3] = {}; + float B[3][2] = {}; + float C[2][2] = {}; + for(int i = 0; i < 3; i++) + for(int j = 0; j < 2; j++) + { + A[j][i] = i-j; + B[i][j] = i+j + i*j; + } + cpu_gemm(A[0], B[0], 2, 2, 3 , 1, 0, false, false, C[0]); + float D[2][2] = {}; + for(int i = 0; i < 2; i++) + for(int j = 0; j < 2; j++) + { + D[i][j] = 0; + for(int k = 0; k < 3; k++) + D[i][j] += A[i][k]*B[k][j]; + } + for(int i = 0; i < 2; i++) + for(int j = 0; j < 2; j++) + { + cout<<C[i][j] - D[i][j]<<endl; + } +} + + +void test_gemv() +{ + float A[4][3] = {}; + float B[4]= {}; + float C[3] = {}; + float D[3] = {}; + for(int i = 0; i < 4; i++) + { + for(int j = 0; j < 3; j++) + { + A[j][i] = i-j + i*j; + } + } + for(int i = 0; i < 4; i++)B[i] = i; + for(int i = 0; i < 3; i++)C[i] = 10; + cpu_gemv(A[0], B, 4, 3, 1, 1, true, C); + for(int i = 0; i < 3; i++) + for(int j = 0; j < 4; j++) + { + D[i] += A[j][i]*B[j]; + } + for(int i = 0; i < 3; i++)cout<<C[i] - D[i] - 10<<endl; +} + +void test_axpy() +{ + float A[4][3] = {}; + float C[4][3] = {}; + float B[3][4] = {}; + float D[3][4] = {}; + for(int i = 0; i < 4; i++) + { + for(int j = 0; j < 3; j++) + { + A[i][j] = i-j + i*j; + B[j][i] = i-j + i*j; + C[i][j] = A[i][j]; + D[j][i] = B[j][i]; + } + } + cpu_axpy(A[0], 12, 2, B[0]); + for(int i = 0; i < 12; i++)D[0][i] += 2*C[0][i]; + for(int i = 0; i < 3; i++) + { + for(int j = 0; j < 4; j++) + { + cout<<B[i][j] - D[i][j]<<endl; + } + } +} + +void test_eop() +{ + float A[10] = {}; + float B[10] = {}; + float C[10] = {}; + float D[10] = {}; + float O[10] = {}; + for(int i = 0; i < 10; i++) + { + A[i] = i; + B[i] = -i; + C[i] = i; + } + cpu_e_f<op::Set>(5, 15, O); + for(int i = 0; i < 5; i++)cout<<O[i] - 15<<endl; + for(int i = 5; i < 10; i++)cout<<O[i]<<endl; + cpu_e_f<op::Scale>(10, C, 2, C); + for(int i = 0; i < 10; i++)cout<<C[i] - 2* i<<endl; + cpu_e_f<op::Add>(10, A, B, 0, 0, O); + for(int i = 0; i < 10; i++)cout<<O[i]<<endl; +} + +void test_exrd() +{ + float A[3][10] = {}; + float B[3] = {}; + for(int i = 0; i < 3; i++) + for(int j = 0; j < 10; j++) + { + A[i][j] = (i + 1)*j; + } + cpu_reduce_f<op::Sum>(A[0], 3, 10, B); + for(int i = 0; i < 3; i++) B[i] -= 45*(i+1); + for(int i = 0; i < 3; i++)cout<<B[i]<<endl; + cpu_expand_f<op::Repmat>(B, 3, 10, A[0]); + cpu_reduce_f<op::Sum>(A[0], 3, 10, B); + for(int i = 0; i < 3; i++)cout<<B[i]<<endl; +} + +int main() +{ + test_gemm1() ; + test_gemm2(); + test_gemv(); + test_axpy(); + test_eop(); + test_exrd(); + return 0; +} + +