Repository: incubator-singa Updated Branches: refs/heads/master 369d87960 -> 8329aa0c3
SINGA-80 New Blob Level and Address Level Math Operation Interface ------- add gtest for blob level functions and bug fix in math_blob.h Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/247002d3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/247002d3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/247002d3 Branch: refs/heads/master Commit: 247002d3027b5ac45dda735f62cc2f8df7c2dcac Parents: 0233049 Author: jinyangturbo <[email protected]> Authored: Wed Mar 16 00:16:01 2016 -0700 Committer: Wei Wang <[email protected]> Committed: Sat Apr 2 21:33:27 2016 +0800 ---------------------------------------------------------------------- include/singa/utils/blob.h | 15 + include/singa/utils/math_blob.h | 70 +++-- src/test/test_math.cc | 587 +++++++++++++++++++++++++++++++++++ 3 files changed, 649 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/247002d3/include/singa/utils/blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/blob.h b/include/singa/utils/blob.h index 3351cff..9defeac 100644 --- a/include/singa/utils/blob.h +++ b/include/singa/utils/blob.h @@ -276,6 +276,21 @@ class Blob { ret.transpose_ = !transpose_; return ret; } + // to check if two blob has the exact same content + bool check_equal(Blob* other) const { + if (transpose() != other->transpose()) return false; + if (count() != other->count()) return false; + if (shape().size() != other->shape().size()) return false; + for (int i = 0; i < shape().size(); i++) { + if (shape(i) != other->shape(i)) return false; + } + const Dtype * a = cpu_data(); + const Dtype * b = other->cpu_data(); + for (int i = 0; i < count(); i++) { + if (a[i] != b[i]) return false; + } + return true; + } protected: std::shared_ptr<SyncedMemory> data_ = nullptr; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/247002d3/include/singa/utils/math_blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h index 55ba44b..35985f1 100644 --- a/include/singa/utils/math_blob.h +++ b/include/singa/utils/math_blob.h @@ -32,14 +32,16 @@ #include "singa/utils/context.h" namespace singa { + +#define NO_GPU LOG(FATAL) << "Not compiled with GPU"; /** * \file math_blob.h is not tested thorough. * Only GEMM() and MMDot() MVSumRow() andMVAddRow() are used now. */ /************* BLAS level 1 *****************/ /** - * Scale each element of A with alpha, and put the result into B. - * Bi = alpha*Ai + * Scale each element of A with alpha, and put the result into A. + * Ai = alpha*Ai * Use blas scale internally. */ template<typename Dtype> @@ -52,6 +54,8 @@ void Scale(Dtype alpha, Blob<Dtype> * B) { #ifdef USE_GPU gpu_scale(context->cublas_handle(device), B->count(), alpha, B->mutable_gpu_data()); +#else + NO_GPU; #endif } } @@ -70,7 +74,9 @@ void AXPY(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype> * B) { #ifdef USE_GPU gpu_axpy(context->cublas_handle(device), A.count(), alpha, A.gpu_data(), B->mutable_gpu_data()); -#endif // USE_GPU +#else + NO_GPU; +#endif } } @@ -111,6 +117,8 @@ void GEMV(Dtype alpha, Dtype beta, const Blob<Dtype>& A, #ifdef USE_GPU gpu_gemv(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), m, n, alpha, beta, TranA, C->mutable_gpu_data()); +#else + NO_GPU; #endif // USE_GPU } } @@ -176,6 +184,8 @@ void GEMM(Dtype alpha, Dtype beta, const Blob<Dtype>& A, const Blob<Dtype>& B, #ifdef USE_GPU gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), m, n, k, alpha, beta, TranA, TranB, C->mutable_gpu_data()); +#else + NO_GPU; #endif // USE_GPU } } @@ -216,9 +226,10 @@ Dtype VVDot(const Blob<Dtype> & A, const Blob<Dtype> & B) { res = cpu_dot(A.cpu_data(), B.cpu_data(), n); } else { #ifdef USE_GPU - // gpu part res = gpu_dot(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), n); +#else + NO_GPU; #endif // USE_GPU } return res; @@ -242,12 +253,14 @@ void OuterProduct(const Blob<Dtype>& A, const Blob<Dtype>& B, Blob<Dtype> * C) { auto context = Singleton<Context>::Instance(); int device = context->device_id(std::this_thread::get_id()); if (device < 0) { - cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, 1, 1, 0, false, false, - C->mutable_cpu_data()); + cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, 1, Dtype(1), Dtype(0), false, + false, C->mutable_cpu_data()); } else { #ifdef USE_GPU gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), m, n, 1, 1, 0, false, false, C->mutable_gpu_data()); +#else + NO_GPU; #endif // USE_GPU } } @@ -268,7 +281,7 @@ void Map(const Blob<Dtype> & A, Blob<Dtype> * B) { #ifdef USE_GPU gpu_e_f<Op>(A.count(), A.gpu_data(), B->mutable_gpu_data()); #else - LOG(ERROR) << "Not implemented"; + NO_GPU; #endif // USE_GPU } } @@ -310,7 +323,7 @@ void Map(Dtype alpha, const Blob<Dtype>& A, Blob<Dtype>* B) { #ifdef USE_GPU gpu_e_f<Op>(A.count(), A.gpu_data(), alpha, B->mutable_gpu_data()); #else - LOG(FATAL) << "Not implemented"; + NO_GPU; #endif // USE_GPU } } @@ -328,9 +341,8 @@ void Map(Dtype alpha, const Blob<Dtype>& A, const Blob<Dtype>& B, cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->cpu_data(), C->mutable_cpu_data()); } else { -#ifdef USE_GPU - LOG(ERROR) << "Not implemented"; -#endif // USE_GPU + // TODO(wangwei) implement gpu version. + NO_GPU; } } @@ -353,7 +365,7 @@ void Copy(const Blob<Dtype>& A, Blob<Dtype>* B) { CUDA_CHECK(cudaMemcpy(static_cast<Dtype*>(B->mutable_gpu_data()), A.gpu_data(), sizeof(Dtype) * A.count(), cudaMemcpyDefault)); #else - LOG(FATAL) << "Not implemented"; + NO_GPU; #endif } } @@ -365,7 +377,7 @@ void Copy(const Blob<Dtype>& A, Blob<Dtype>* B) { */ template<typename Dtype> void Add(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype> * B) { - Map<singa::op::Add<Dtype>>(alpha, A, B); + Map<singa::op::Add<Dtype>, Dtype>(alpha, A, B); } /** @@ -385,7 +397,7 @@ void Add(const Blob<Dtype> & A, const Blob<Dtype> & B, */ template<typename Dtype> void Sub(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype>* B) { - Map<singa::op::Sub<Dtype>>(alpha, A, B); + Map<singa::op::Sub<Dtype>, Dtype>(alpha, A, B); } /** @@ -406,7 +418,7 @@ void Sub(const Blob<Dtype> & A, const Blob<Dtype> & B, template<typename Dtype> void Mult(const Blob<Dtype> & A, const Blob<Dtype> & B, Blob<Dtype> * C) { - Map<singa::op::Mult<Dtype>>(A, B, C); + Map<singa::op::Mult<Dtype>, Dtype>(A, B, C); // TODO(wangwei) use MKL's vector func } @@ -417,7 +429,7 @@ void Mult(const Blob<Dtype> & A, const Blob<Dtype> & B, template<typename Dtype> void Div(const Blob<Dtype> & A, const Blob<Dtype> & B, Blob<Dtype> * C) { - Map<singa::op::Div<Dtype>>(A, B, C); + Map<singa::op::Div<Dtype>, Dtype>(A, B, C); // TODO(wangwei) use MKL's vector func } /** @@ -481,6 +493,8 @@ void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { #ifdef USE_GPU singa_gpu_add_vec_row(A.gpu_data(), B->gpu_data(), B->mutable_gpu_data(), m, n, n); +#else + NO_GPU; #endif // USE_GPU } } @@ -520,6 +534,8 @@ void MVAddRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { #ifdef USE_GPU singa_gpu_add_vec_row(A.gpu_data(), B->gpu_data(), B->mutable_gpu_data(), m, n, n); +#else + NO_GPU; #endif // USE_GPU } } @@ -574,7 +590,8 @@ void MVSumCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { } else { #ifdef USE_GPU singa_gpu_sum_col(A.gpu_data(), B->mutable_gpu_data(), m, n, n); - // gpu part (TODO check transpose case) +#else + NO_GPU; #endif // USE_GPU } } @@ -599,7 +616,8 @@ void MVSumRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) { } else { #ifdef USE_GPU singa_gpu_sum_row(A.gpu_data(), B->mutable_gpu_data(), m, n, n); - // gpu part (TODO check transpose case) +#else + NO_GPU; #endif // USE_GPU } } @@ -619,8 +637,9 @@ void Reduce2D(const Blob<Dtype> & A, Blob<Dtype> * B) { cpu_reduce_f<Op>(A.cpu_data(), m, n, B->mutable_cpu_data()); } else { #ifdef USE_GPU - // gpu part gpu_reduce_f<Op>(A.gpu_data(), m, n, B->mutable_gpu_data()); +#else + NO_GPU; #endif // USE_GPU } } @@ -640,6 +659,8 @@ void Expand2D(const Blob<Dtype> & A, Blob<Dtype> * B) { } else { #ifdef USE_GPU gpu_expand_f<Op>(A.gpu_data(), m, n, B->mutable_gpu_data()); +#else + NO_GPU; #endif // USE_GPU } } @@ -659,6 +680,8 @@ Dtype Asum(const Blob<Dtype>& A) { #ifdef USE_GPU ret = gpu_asum(context->cublas_handle(device), A.count(), A.gpu_data(), 1) / A.count(); +#else + NO_GPU; #endif } return ret; @@ -679,7 +702,7 @@ void SampleUniform(Dtype low, Dtype high, Blob<Dtype>* A) { gpu_sample_uniform(context->curand_generator(thread), A->count(), low, high, A->mutable_gpu_data()); #else - LOG(FATAL) << "Not implemented"; + NO_GPU; #endif } } @@ -696,6 +719,8 @@ void SampleGaussian(Dtype mean, Dtype std, Blob<Dtype>* A) { #ifdef USE_GPU gpu_sample_gaussian(context->curand_generator(thread), A->count(), mean, std, A->mutable_gpu_data()); +#else + NO_GPU; #endif } } @@ -712,8 +737,7 @@ void Softmax(int nb_rows, const Blob<Dtype>& A, Blob<Dtype>* B) { cpu_softmax(nb_rows, A.count() / nb_rows, A.cpu_data(), B->mutable_cpu_data()); } else { -#ifdef USE_GPU -#endif // USE_GPU + NO_GPU; } } @@ -727,7 +751,7 @@ void Zero(Blob<Dtype>* B) { #ifdef USE_GPU cudaMemset(B->mutable_gpu_data(), 0, B->count() * sizeof(float)); #else - LOG(FATAL) << "Not implemented"; + NO_GPU; #endif // USE_GPU } } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/247002d3/src/test/test_math.cc ---------------------------------------------------------------------- diff --git a/src/test/test_math.cc b/src/test/test_math.cc index 2627b2e..6bb6001 100644 --- a/src/test/test_math.cc +++ b/src/test/test_math.cc @@ -20,6 +20,8 @@ *************************************************************/ #include <thread> #include "gtest/gtest.h" +#include "singa/utils/blob.h" +#include "singa/utils/math_blob.h" #include "singa/utils/math_addr.h" #include "singa/utils/math_kernel.h" #include "singa/utils/singa_op.h" @@ -34,6 +36,591 @@ using namespace singa; using namespace std; +TEST(MathBlobTest, TestScale) { + Blob<float> *A = new Blob<float>(10); + Blob<float> *B = new Blob<float>(10); + A->SetValue(2); + B->SetValue(6); + Scale<float>(3.0, A); + ASSERT_EQ(A->check_equal(B), true); +} + +TEST(MathBlobTest, TestAXPY) { + Blob<float> * A = new Blob<float>(10); + Blob<float> * B = new Blob<float>(10); + Blob<float> * C = new Blob<float>(10); + Blob<float> * D = new Blob<float>(10); + A->SetValue(2); + B->SetValue(3); + C->SetValue(7); + D->SetValue(2); + AXPY<float>(2.0, *A, B); + ASSERT_EQ(B->check_equal(C), true); + ASSERT_EQ(A->check_equal(D), true); +} + +TEST(MathBlobTest, TestGEMV) { + float A[5][5] = {}; + float AT[5][5] = {}; + float B[5] = {}; + float Res[5] = {}; + for(int i = 0; i < 5; i++) { + for(int j = 0; j < 5; j++) { + A[i][j] = i * j + i - j; + AT[j][i] = i * j + i - j; + } + B[i] = 5*i + 3; + Res[i] = i; + } + + Blob<float> * BlobA = new Blob<float>(5, 5); + Blob<float> * BlobAT = new Blob<float>(5, 5); + Blob<float> * BlobB = new Blob<float>(5); + Blob<float> * BlobAB = new Blob<float>(5); + Blob<float> * BlobATB = new Blob<float>(5); + Blob<float> * BlobRes = new Blob<float>(5); + + BlobA->set_cpu_data(A[0]); + BlobAT->set_cpu_data(AT[0]); + BlobAT->set_transpose(true); + BlobB->set_cpu_data(B); + BlobAB->set_cpu_data(Res); + BlobATB->set_cpu_data(Res); + + for (int i = 0; i < 5; i++) { + for(int j = 0; j < 5; j++) { + Res[i] += 2*A[i][j] * B[j]; + } + } + + BlobRes->set_cpu_data(Res); + + GEMV<float>(2, 1, *BlobA, *BlobB, BlobAB); + GEMV<float>(2, 1, *BlobAT, *BlobB, BlobATB); + + ASSERT_EQ(BlobAB->check_equal(BlobRes), true); + ASSERT_EQ(BlobATB->check_equal(BlobRes), true); +} + +TEST(MathBlobTest, TestMVDot) { + float A[5][5] = {}; + float AT[5][5] = {}; + float B[5] = {}; + float Res[5] = {}; + for(int i = 0; i < 5; i++) { + for(int j = 0; j < 5; j++) { + A[i][j] = i * j + i - j; + AT[j][i] = i * j + i - j; + } + B[i] = 5*i -2; + Res[i] = 0; + } + + Blob<float> * BlobA = new Blob<float>(5, 5); + Blob<float> * BlobAT = new Blob<float>(5, 5); + Blob<float> * BlobB = new Blob<float>(5); + Blob<float> * BlobAB = new Blob<float>(5); + Blob<float> * BlobATB = new Blob<float>(5); + Blob<float> * BlobRes = new Blob<float>(5); + + BlobA->set_cpu_data(A[0]); + BlobAT->set_cpu_data(AT[0]); + BlobAT->set_transpose(true); + BlobB->set_cpu_data(B); + BlobAB->set_cpu_data(Res); + BlobATB->set_cpu_data(Res); + + for (int i = 0; i < 5; i++) { + for(int j = 0; j < 5; j++) { + Res[i] += A[i][j] * B[j]; + } + } + + BlobRes->set_cpu_data(Res); + + MVDot<float>(*BlobA, *BlobB, BlobAB); + MVDot<float>(*BlobAT, *BlobB, BlobATB); + + const float * addrRes = BlobAB->cpu_data(); + for (int i = 0; i < 5; i++) { + ASSERT_EQ(addrRes[i], Res[i]); + } + ASSERT_EQ(BlobAB->check_equal(BlobRes), true); + ASSERT_EQ(BlobAB->check_equal(BlobRes), true); + ASSERT_EQ(BlobATB->check_equal(BlobRes), true); +} + +TEST(MathBlobTest, TestGEMM) { + float A[5][5] = {}; + float AT[5][5] = {}; + float B[5][5]= {}; + float BT[5][5]= {}; + float Res[5][5]= {}; + for(int i = 0; i < 5; i++) { + for(int j = 0; j < 5; j++) { + A[i][j] = i * j + i - j; + AT[j][i] = i * j + i - j; + B[i][j] = - i * j + i * i - j * j; + BT[j][i] = - i * j + i * i - j * j; + Res[i][j] = i * j + i * i + j * j; + } + } + + Blob<float> * BlobA = new Blob<float>(5, 5); + BlobA->set_cpu_data(A[0]); + Blob<float> * BlobAT = new Blob<float>(5, 5); + BlobAT->set_cpu_data(AT[0]); + BlobAT->set_transpose(true); + Blob<float> * BlobB = new Blob<float>(5, 5); + BlobB->set_cpu_data(B[0]); + Blob<float> * BlobBT = new Blob<float>(5, 5); + BlobBT->set_cpu_data(BT[0]); + BlobBT->set_transpose(true); + Blob<float> * BlobAB = new Blob<float>(5, 5); + BlobAB->set_cpu_data(Res[0]); + Blob<float> * BlobABT = new Blob<float>(5, 5); + BlobABT->set_cpu_data(Res[0]); + Blob<float> * BlobATB = new Blob<float>(5, 5); + BlobATB->set_cpu_data(Res[0]); + Blob<float> * BlobATBT = new Blob<float>(5, 5); + BlobATBT->set_cpu_data(Res[0]); + + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + Res[i][j] *= 2; + for (int k = 0; k < 5; k++) { + Res[i][j] += 3 * A[i][k]*B[k][j]; + } + } + } + + Blob<float> * BlobRes = new Blob<float>(5, 5); + BlobRes->set_cpu_data(Res[0]); + + GEMM<float>(3, 2, *BlobA, *BlobB, BlobAB); + GEMM<float>(3, 2, *BlobA, *BlobBT, BlobABT); + GEMM<float>(3, 2, *BlobAT, *BlobB, BlobATB); + GEMM<float>(3, 2, *BlobAT, *BlobBT, BlobATBT); + + ASSERT_EQ(BlobAB->check_equal(BlobRes), true); + ASSERT_EQ(BlobATB->check_equal(BlobRes), true); + ASSERT_EQ(BlobABT->check_equal(BlobRes), true); + ASSERT_EQ(BlobATBT->check_equal(BlobRes), true); +} + +TEST(MathBlobTest, TestMMDot) { + float A[5][5] = {}; + float AT[5][5] = {}; + float B[5][5]= {}; + float BT[5][5]= {}; + float Res[5][5]= {}; + for(int i = 0; i < 5; i++) { + for(int j = 0; j < 5; j++) { + A[i][j] = i * j + i - j; + AT[j][i] = i * j + i - j; + B[i][j] = - i * j + i * i - j * j; + BT[j][i] = - i * j + i * i - j * j; + Res[i][j] = i * j + i * i + j * j; + } + } + + Blob<float> * BlobA = new Blob<float>(5, 5); + BlobA->set_cpu_data(A[0]); + Blob<float> * BlobAT = new Blob<float>(5, 5); + BlobAT->set_cpu_data(AT[0]); + BlobAT->set_transpose(true); + Blob<float> * BlobB = new Blob<float>(5, 5); + BlobB->set_cpu_data(B[0]); + Blob<float> * BlobBT = new Blob<float>(5, 5); + BlobBT->set_cpu_data(BT[0]); + BlobBT->set_transpose(true); + Blob<float> * BlobAB = new Blob<float>(5, 5); + BlobAB->set_cpu_data(Res[0]); + Blob<float> * BlobABT = new Blob<float>(5, 5); + BlobABT->set_cpu_data(Res[0]); + Blob<float> * BlobATB = new Blob<float>(5, 5); + BlobATB->set_cpu_data(Res[0]); + Blob<float> * BlobATBT = new Blob<float>(5, 5); + BlobATBT->set_cpu_data(Res[0]); + + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + Res[i][j] = 0; + for (int k = 0; k < 5; k++) { + Res[i][j] += A[i][k]*B[k][j]; + } + } + } + + Blob<float> * BlobRes = new Blob<float>(5, 5); + BlobRes->set_cpu_data(Res[0]); + + MMDot<float>(*BlobA, *BlobB, BlobAB); + MMDot<float>(*BlobA, *BlobBT, BlobABT); + MMDot<float>(*BlobAT, *BlobB, BlobATB); + MMDot<float>(*BlobAT, *BlobBT, BlobATBT); + + ASSERT_EQ(BlobAB->check_equal(BlobRes), true); + ASSERT_EQ(BlobATB->check_equal(BlobRes), true); + ASSERT_EQ(BlobABT->check_equal(BlobRes), true); + ASSERT_EQ(BlobATBT->check_equal(BlobRes), true); +} + +TEST(MathBlobTest, TestVVDot) { + float A[10] = {}; + float B[10] = {}; + float prod = 0; + for (int i = 0; i < 10; i++) { + A[i] = i * i - 5* (i%2); + B[i] = 2* i * i - 3* (i%4); + prod += A[i] * B[i]; + } + + Blob<float> * BlobA = new Blob<float>(10); + BlobA->set_cpu_data(A); + Blob<float> * BlobB = new Blob<float>(10); + BlobB->set_cpu_data(B); + float blobprod = VVDot<float>(*BlobA, *BlobB); + ASSERT_EQ(blobprod, prod); +} + +TEST(MathBlobTest, TestOuterProduct) { + float A[10] = {}; + float B[10] = {}; + float AB[10][10] = {}; + for (int i = 0; i < 10; i++) { + A[i] = i * i - 5* (i%2); + B[i] = 2* i * i - 3* (i%4); + } + for(int i = 0; i < 10; i++) { + for(int j = 0; j < 10; j++) { + AB[i][j] = A[i]*B[j]; + } + } + Blob<float> * BlobA = new Blob<float>(10); + BlobA->set_cpu_data(A); + Blob<float> * BlobB = new Blob<float>(10); + BlobB->set_cpu_data(B); + Blob<float> * BlobAB = new Blob<float>(10, 10); + // BlobAB->SetValue(3); + Blob<float> * BlobRes = new Blob<float>(10, 10); + BlobRes->set_cpu_data(AB[0]); + OuterProduct<float>(*BlobA, *BlobB, BlobAB); + + ASSERT_EQ(BlobAB->check_equal(BlobRes), true); +} + +TEST(MathBlobTest, TestMapAB) { + float A[10] = {}; + float Res[10] = {}; + for (int i = 0; i < 10; i++) { + A[i] = i * i - 5* (i%2); + Res[i] = A[i] * A[i]; + } + Blob<float> * BlobA = new Blob<float>(10); + BlobA->set_cpu_data(A); + Blob<float> * BlobB = new Blob<float>(10); + Blob<float> * BlobRes = new Blob<float>(10); + BlobRes->set_cpu_data(Res); + Map<singa::op::Square<float>, float>(*BlobA, BlobB); + ASSERT_EQ(BlobB->check_equal(BlobRes), true); +} + +TEST(MathBlobTest, TestMapABC) { + float A[10] = {}; + float B[10] = {}; + float Res[10] = {}; + for (int i = 0; i < 10; i++) { + A[i] = i * i - 5* (i%2); + B[i] = 2* i * i - 3* (i%4); + Res[i] = A[i] * B[i]; + } + Blob<float> * BlobA = new Blob<float>(10); + BlobA->set_cpu_data(A); + Blob<float> * BlobB = new Blob<float>(10); + BlobB->set_cpu_data(B); + Blob<float> * BlobC = new Blob<float>(10); + Blob<float> * BlobRes = new Blob<float>(10); + BlobRes->set_cpu_data(Res); + Map<singa::op::Mult<float>, float>(*BlobA, *BlobB, BlobC); + ASSERT_EQ(BlobC->check_equal(BlobRes), true); +} + +TEST(MathBlobTest, TestCopy) { + Blob<float> *BlobA = new Blob<float>(10); + Blob<float> *BlobB = new Blob<float>(10); + float A[10] = {}; + for (int i = 0; i < 10; i++) { + A[i] = i * i - 5* (i%2); + } + BlobA->set_cpu_data(A); + Copy<float>(*BlobA, BlobB); + ASSERT_EQ(BlobA->check_equal(BlobB), true); +} + +TEST(MathBlobTest, TestAdd) { + Blob<float> *A = new Blob<float>(10); + Blob<float> *B = new Blob<float>(10); + Blob<float> *C = new Blob<float>(10); + Blob<float> *D = new Blob<float>(10); + A->SetValue(5); + B->SetValue(6); + D->SetValue(11); + Add<float>(*A, *B, C); + ASSERT_EQ(C->check_equal(D), true); +} + +TEST(MathBlobTest, TestSub) { + Blob<float> *A = new Blob<float>(10); + Blob<float> *B = new Blob<float>(10); + Blob<float> *C = new Blob<float>(10); + Blob<float> *D = new Blob<float>(10); + A->SetValue(5); + B->SetValue(6); + D->SetValue(-1); + Sub<float>(*A, *B, C); + ASSERT_EQ(C->check_equal(D), true); +} + +TEST(MathBlobTest, TestMVAddCol) { + Blob<float> *BlobA = new Blob<float>(10); + Blob<float> *BlobB = new Blob<float>(10, 10); + Blob<float> *BlobBT = new Blob<float>(10, 10); + Blob<float> *BlobRes = new Blob<float>(10, 10); + Blob<float> *BlobResT = new Blob<float>(10, 10); + + float A[10] = {}; + float B[10][10] = {}; + float BT[10][10] = {}; + for(int i = 0; i < 10; i++) { + A[i] = 5*i -2; + for(int j = 0; j < 10; j++) { + B[i][j] = i * j + i - j; + BT[j][i] = i * j + i - j; + } + } + + BlobA->set_cpu_data(A); + BlobB->set_cpu_data(B[0]); + BlobBT->set_cpu_data(BT[0]); + BlobBT->set_transpose(true); + + for(int i = 0; i < 10; i++) { + for(int j = 0; j < 10; j++) { + B[i][j] = 2.0 * A[i] + 3.0 * B[i][j]; + BT[j][i] = 2.0 * A[i] + 3.0 * BT[j][i]; + } + } + + BlobRes->set_cpu_data(B[0]); + BlobResT->set_cpu_data(BT[0]); + BlobResT->set_transpose(true); + + MVAddCol<float>(2.0, 3.0, *BlobA, BlobB); + MVAddCol<float>(2.0, 3.0, *BlobA, BlobBT); + + ASSERT_EQ(BlobB->check_equal(BlobRes), true); + ASSERT_EQ(BlobBT->check_equal(BlobResT), true); +} + +TEST(MathBlobTest, TestMVAddRow) { + Blob<float> *BlobA = new Blob<float>(10); + Blob<float> *BlobB = new Blob<float>(10, 10); + Blob<float> *BlobBT = new Blob<float>(10, 10); + Blob<float> *BlobRes = new Blob<float>(10, 10); + Blob<float> *BlobResT = new Blob<float>(10, 10); + + float A[10] = {}; + float B[10][10] = {}; + float BT[10][10] = {}; + for(int i = 0; i < 10; i++) { + A[i] = 5*i -2; + for(int j = 0; j < 10; j++) { + B[i][j] = i * j + i - j; + BT[j][i] = i * j + i - j; + } + } + + BlobA->set_cpu_data(A); + BlobB->set_cpu_data(B[0]); + BlobBT->set_cpu_data(BT[0]); + BlobBT->set_transpose(true); + + for(int i = 0; i < 10; i++) { + for(int j = 0; j < 10; j++) { + B[j][i] = 2.0 * A[i] + 3.0 * B[j][i]; + BT[i][j] = 2.0 * A[i] + 3.0 * BT[i][j]; + } + } + + BlobRes->set_cpu_data(B[0]); + BlobResT->set_cpu_data(BT[0]); + BlobResT->set_transpose(true); + + MVAddRow<float>(2.0, 3.0, *BlobA, BlobB); + MVAddRow<float>(2.0, 3.0, *BlobA, BlobBT); + + ASSERT_EQ(BlobB->check_equal(BlobRes), true); + ASSERT_EQ(BlobBT->check_equal(BlobResT), true); +} + +TEST(MathBlobTest, TestRepmatCol) { + Blob<float> *BlobA = new Blob<float>(10); + Blob<float> *BlobB = new Blob<float>(10, 10); + Blob<float> *BlobBT = new Blob<float>(10, 10); + Blob<float> *BlobRes = new Blob<float>(10, 10); + Blob<float> *BlobResT = new Blob<float>(10, 10); + + float A[10] = {}; + float B[10][10] = {}; + float BT[10][10] = {}; + for(int i = 0; i < 10; i++) { + A[i] = 5*i -2; + for(int j = 0; j < 10; j++) { + B[i][j] = A[i]; + BT[j][i] = A[i]; + } + } + + BlobA->set_cpu_data(A); + BlobBT->set_transpose(true); + + BlobRes->set_cpu_data(B[0]); + BlobResT->set_cpu_data(BT[0]); + BlobResT->set_transpose(true); + + RepmatCol<float>(*BlobA, BlobB); + RepmatCol<float>(*BlobA, BlobBT); + + ASSERT_EQ(BlobB->check_equal(BlobRes), true); + ASSERT_EQ(BlobBT->check_equal(BlobResT), true); +} + +TEST(MathBlobTest, TestRepmatRow) { + Blob<float> *BlobA = new Blob<float>(10); + Blob<float> *BlobB = new Blob<float>(10, 10); + Blob<float> *BlobBT = new Blob<float>(10, 10); + Blob<float> *BlobRes = new Blob<float>(10, 10); + Blob<float> *BlobResT = new Blob<float>(10, 10); + + float A[10] = {}; + float B[10][10] = {}; + float BT[10][10] = {}; + for(int i = 0; i < 10; i++) { + A[i] = 5*i -2; + for(int j = 0; j < 10; j++) { + B[j][i] = A[i]; + BT[i][j] = A[i]; + } + } + + BlobA->set_cpu_data(A); + BlobBT->set_transpose(true); + + BlobRes->set_cpu_data(B[0]); + BlobResT->set_cpu_data(BT[0]); + BlobResT->set_transpose(true); + + RepmatRow<float>(*BlobA, BlobB); + RepmatRow<float>(*BlobA, BlobBT); + + ASSERT_EQ(BlobB->check_equal(BlobRes), true); + ASSERT_EQ(BlobBT->check_equal(BlobResT), true); +} + +TEST(MathBlobTest, TestMVSumCol) { + Blob<float> *BlobA = new Blob<float>(10); + Blob<float> *BlobACopy = new Blob<float>(10); + Blob<float> *BlobB = new Blob<float>(10, 10); + Blob<float> *BlobBT = new Blob<float>(10, 10); + Blob<float> *BlobRes = new Blob<float>(10); + + float A[10] = {}; + float B[10][10] = {}; + float BT[10][10] = {}; + for(int i = 0; i < 10; i++) { + A[i] = 5*i -2; + for(int j = 0; j < 10; j++) { + B[i][j] = i * j + i - j; + BT[j][i] = i * j + i - j; + } + } + + BlobA->set_cpu_data(A); + BlobACopy->set_cpu_data(A); + BlobB->set_cpu_data(B[0]); + BlobBT->set_cpu_data(BT[0]); + BlobBT->set_transpose(true); + + for(int i = 0; i < 10; i++) { + A[i] *= 2.0; + for(int j = 0; j < 10; j++) { + A[i] += 3.0 * B[i][j]; + } + } + BlobRes->set_cpu_data(A); + + MVSumCol<float>(2.0, 3.0, *BlobB, BlobA); + MVSumCol<float>(2.0, 3.0, *BlobBT, BlobACopy); + + ASSERT_EQ(BlobA->check_equal(BlobRes), true); + ASSERT_EQ(BlobACopy->check_equal(BlobRes), true); +} + +TEST(MathBlobTest, TestMVSumRow) { + Blob<float> *BlobA = new Blob<float>(10); + Blob<float> *BlobACopy = new Blob<float>(10); + Blob<float> *BlobB = new Blob<float>(10, 10); + Blob<float> *BlobBT = new Blob<float>(10, 10); + Blob<float> *BlobRes = new Blob<float>(10); + + float A[10] = {}; + float B[10][10] = {}; + float BT[10][10] = {}; + for(int i = 0; i < 10; i++) { + A[i] = 5*i -2; + for(int j = 0; j < 10; j++) { + B[j][i] = i * j + i - j; + BT[i][j] = i * j + i - j; + } + } + + BlobA->set_cpu_data(A); + BlobACopy->set_cpu_data(A); + BlobB->set_cpu_data(B[0]); + BlobBT->set_cpu_data(BT[0]); + BlobBT->set_transpose(true); + + for(int i = 0; i < 10; i++) { + A[i] *= 2.0; + for(int j = 0; j < 10; j++) { + A[i] += 3.0 * B[j][i]; + } + } + BlobRes->set_cpu_data(A); + + MVSumRow<float>(2.0, 3.0, *BlobB, BlobA); + MVSumRow<float>(2.0, 3.0, *BlobBT, BlobACopy); + + ASSERT_EQ(BlobA->check_equal(BlobRes), true); + ASSERT_EQ(BlobACopy->check_equal(BlobRes), true); +} + +TEST(MathBlobTest, TestASum) { + float A[10] = {}; + for(int i = 0; i < 10; i++) { + A[i] = ((i % 3) -1) * i; + } + + Blob<float> *BlobA = new Blob<float>(10); + BlobA->set_cpu_data(A); + + float BlobRes = Asum<float>(*BlobA); + float res = cblas_sasum(10, A, 1) / 10; + + ASSERT_EQ(BlobRes, res); +} + TEST(MathTest, TestGemmCPU) { float A[3][2] = {}; float B[3][2] = {};
