http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/src/core/tensor/tensor_math_cpp.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h index 5ce33ad..97da896 100644 --- a/src/core/tensor/tensor_math_cpp.h +++ b/src/core/tensor/tensor_math_cpp.h @@ -29,34 +29,34 @@ /// For Blob argument xxx, name its pointer as xxxPtr. namespace singa { template <> -void Square<float, lang::Cpp>(int count, const Blob* input, - Blob* ret, Context* ctx) { - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* in = static_cast<const float*>(input->data()); +void Square<float, lang::Cpp>(int count, const Blob *input, Blob *ret, + Context *ctx) { + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *in = static_cast<const float *>(input->data()); for (int i = 0; i < count; i++) { dptr[i] = in[i] * in[i]; } } template <> -void Add<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs, - Blob* ret, Context* ctx) { +void Add<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs, + Blob *ret, Context *ctx) { // CHECK_EQ(ctx->stream, nullptr); - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* lptr = static_cast<const float*>(lhs->data()); - const float* rptr = static_cast<const float*>(rhs->data()); + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *lptr = static_cast<const float *>(lhs->data()); + const float *rptr = static_cast<const float *>(rhs->data()); for (int i = 0; i < count; i++) { dptr[i] = lptr[i] + rptr[i]; } } template <> -void Sub<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs, - Blob* ret, Context* ctx) { +void Sub<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs, + Blob *ret, Context *ctx) { // CHECK_EQ(ctx->stream, nullptr); - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* lptr = static_cast<const float*>(lhs->data()); - const float* rptr = static_cast<const float*>(rhs->data()); + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *lptr = static_cast<const float *>(lhs->data()); + const float *rptr = static_cast<const float *>(rhs->data()); for (int i = 0; i < count; i++) { dptr[i] = lptr[i] - rptr[i]; } @@ -64,10 +64,10 @@ void Sub<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs, // sum all elements of input into ret // TODO(wangwei) optimize using omp template <> -void Sum<float, lang::Cpp>(int count, const Blob* input, float* ret, - Context* ctx) { +void Sum<float, lang::Cpp>(int count, const Blob *input, float *ret, + Context *ctx) { float s = 0.f; - const float* in = static_cast<const float*>(input->data()); + const float *in = static_cast<const float *>(input->data()); for (int i = 0; i < count; i++) { s += in[i]; } @@ -76,10 +76,10 @@ void Sum<float, lang::Cpp>(int count, const Blob* input, float* ret, // TODO(wangwei) optimize using omp template <> -void SumRows<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* ret, - Context* ctx) { - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* in = static_cast<const float*>(input->data()); +void SumRows<float, lang::Cpp>(int nrow, int ncol, const Blob *input, Blob *ret, + Context *ctx) { + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *in = static_cast<const float *>(input->data()); memset(dptr, 0, ncol * sizeof(float)); for (int r = 0; r < nrow; r++) { for (int c = 0; c < ncol; c++) { @@ -91,10 +91,10 @@ void SumRows<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* ret, // Sum the rows of the input matrix into a vector // TODO(wangwei) optimize using omp template <> -void SumColumns<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* ret, - Context* ctx) { - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* in = static_cast<const float*>(input->data()); +void SumColumns<float, lang::Cpp>(int nrow, int ncol, const Blob *input, + Blob *ret, Context *ctx) { + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *in = static_cast<const float *>(input->data()); memset(dptr, 0, ncol * sizeof(float)); for (int r = 0; r < nrow; r++) { for (int c = 0; c < ncol; c++) { @@ -104,64 +104,127 @@ void SumColumns<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* r } template <> -void EltwiseMult<float, lang::Cpp>(int count, const Blob* input, float x, - Blob* ret, Context* ctx) { - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* lptr = static_cast<const float*>(input->data()); +void EltwiseMult<float, lang::Cpp>(int count, const Blob *input, float x, + Blob *ret, Context *ctx) { + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *lptr = static_cast<const float *>(input->data()); for (int i = 0; i < count; i++) { dptr[i] = lptr[i] * x; } } template <> -void EltwiseMult<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs, - Blob* ret, Context* ctx) { - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* lptr = static_cast<const float*>(lhs->data()); - const float* rptr = static_cast<const float*>(rhs->data()); +void EltwiseMult<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs, + Blob *ret, Context *ctx) { + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *lptr = static_cast<const float *>(lhs->data()); + const float *rptr = static_cast<const float *>(rhs->data()); for (int i = 0; i < count; i++) { dptr[i] = lptr[i] * rptr[i]; } } template <> -void Bernoulli<float, lang::Cpp>(int count, float p, Blob* ret, Context* ctx) { +void Bernoulli<float, lang::Cpp>(int count, float p, Blob *ret, Context *ctx) { std::bernoulli_distribution distribution(p); - float* ptr = static_cast<float*>(ret->mutable_data()); + float *ptr = static_cast<float *>(ret->mutable_data()); for (int i = 0; i < count; i++) { ptr[i] = distribution(ctx->random_generator) ? 1.0f : 0.0f; } } template <> -void Uniform<float, lang::Cpp>(int count, float low, float high, Blob* ret, - Context* ctx) { +void Uniform<float, lang::Cpp>(int count, float low, float high, Blob *ret, + Context *ctx) { std::uniform_real_distribution<float> distribution(low, high); - float* ptr = static_cast<float*>(ret->mutable_data()); + float *ptr = static_cast<float *>(ret->mutable_data()); for (int i = 0; i < count; i++) { ptr[i] = static_cast<float>(distribution(ctx->random_generator)); } } template <> -void Gaussian<float, lang::Cpp>(int count, float mean, float std, Blob* ret, - Context* ctx) { +void Gaussian<float, lang::Cpp>(int count, float mean, float std, Blob *ret, + Context *ctx) { std::normal_distribution<float> distribution(mean, std); - float* ptr = static_cast<float*>(ret->mutable_data()); + float *ptr = static_cast<float *>(ret->mutable_data()); for (int i = 0; i < count; i++) { ptr[i] = static_cast<float>(distribution(ctx->random_generator)); } } +// follow the consistency guide of math API +template <> +void Div<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = alpha / inPtr[i]; + } +} + +template <> +void DGMM<float, lang::Cpp>(const bool side_right, const size_t nrow, + const size_t ncol, const Blob *M, const Blob *v, + Blob *out, Context *ctx) { + const float *MPtr = static_cast<const float *>(M->data()); + const float *vPtr = static_cast<const float *>(v->data()); + float *outPtr = static_cast<float *>(out->mutable_data()); + if (side_right) { + for (size_t r = 0; r < nrow; r++) { + size_t offset = r * ncol; + for (size_t c = 0; c < ncol; c++) { + outPtr[offset + c] = MPtr[offset + c] * vPtr[c]; + } + } + } else { + for (size_t r = 0; r < nrow; r++) { + size_t offset = r * ncol; + for (size_t c = 0; c < ncol; c++) { + outPtr[offset + c] = MPtr[offset + c] * vPtr[r]; + } + } + } +} + +template <> +void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out, + Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + for (size_t i = 0; i < num; i++) + outPtr[i] = x; +} #ifdef USE_CBLAS template <> -void Dot<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs, - float* ret, Context* ctx) { - float dptr = ret->mutable_data(), lptr = lhs->data(), rptr = rhs->data(); - *ret = cblas_sdot(count, lptr, 1, rptr, 1); +void Dot<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2, + float *out, Context *ctx) { + const float *in1Ptr = static_cast<const float *>(in1->data()); + const float *in2Ptr = static_cast<const float *>(in2->data()); + *out = cblas_sdot(num, in1Ptr, 1, in2Ptr, 1); } -#endif +template <> +void GEMM<float, lang::Cpp>(const bool transA, const bool transB, + const size_t nrowA, const size_t ncolB, + const size_t ncolA, const float alpha, + const Blob *A, const Blob *B, const float beta, + Blob *C, Context *ctx) { + auto transa = transA ? CblasTrans : CblasNoTrans; + auto transb = transB ? CblasTrans : CblasNoTrans; + auto lda = transA ? nrowA : ncolA; + auto ldb = transB ? ncolA : ncolB; + auto ldc = ncolB; + const float *APtr = static_cast<const float *>(A->data()); + const float *BPtr = static_cast<const float *>(B->data()); + float *CPtr = static_cast<float *>(C->mutable_data()); + cblas_sgemm(CblasRowMajor, transa, transb, nrowA, ncolB, ncolA, alpha, APtr, + lda, BPtr, ldb, beta, CPtr, ldc); } +#endif // USE_CBLAS + + +} // namespace singa + #endif // SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/src/core/tensor/tensor_math_cuda.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h index f26b5a3..26299ba 100644 --- a/src/core/tensor/tensor_math_cuda.h +++ b/src/core/tensor/tensor_math_cuda.h @@ -22,76 +22,129 @@ #ifdef USE_CUDA #include "./tensor_math.h" #include "./math_kernel.h" +#include "singa/utils/cuda_utils.h" #include "singa/core/common.h" namespace singa { // TODO(wangwei) Clean implementations following comments in tensor_math_cpp.h. // TODO(wangwei) optimize using stream -template<> -void Add<float, lang::Cuda>(int count, const Blob* lhs, const Blob* rhs, - Blob* ret, Context* ctx) { - const float* a = static_cast<const float*> (lhs->data()); - const float* b = static_cast<const float*> (rhs->data()); - float* c = static_cast<float*> (ret->mutable_data()); +template <> +void Add<float, lang::Cuda>(int count, const Blob *lhs, const Blob *rhs, + Blob *ret, Context *ctx) { + const float *a = static_cast<const float *>(lhs->data()); + const float *b = static_cast<const float *>(rhs->data()); + float *c = static_cast<float *>(ret->mutable_data()); cuda::add(count, a, b, c); } // TODO(wangwei) optimize using stream -template<> -void Sub<float, lang::Cuda>(int count, const Blob* lhs, const Blob* rhs, - Blob* ret, Context* ctx) { - const float* a = static_cast<const float*> (lhs->data()); - const float* b = static_cast<const float*> (rhs->data()); - float* c = static_cast<float*> (ret->mutable_data()); +template <> +void Sub<float, lang::Cuda>(int count, const Blob *lhs, const Blob *rhs, + Blob *ret, Context *ctx) { + const float *a = static_cast<const float *>(lhs->data()); + const float *b = static_cast<const float *>(rhs->data()); + float *c = static_cast<float *>(ret->mutable_data()); cuda::sub(count, a, b, c); } template <> -void EltwiseMult<float, lang::Cuda>(int count, const Blob* input, float x, - Blob* ret, Context* ctx) -{ - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* lptr = static_cast<const float*>(input->data()); +void EltwiseMult<float, lang::Cuda>(int count, const Blob *input, float x, + Blob *ret, Context *ctx) { + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *lptr = static_cast<const float *>(input->data()); cuda::mult(count, lptr, x, dptr); } // TODO(wangwei) optimize using stream template <> -void Square<float, lang::Cuda>(int count, const Blob* input, Blob* ret, - Context* ctx) { - const float* in = static_cast<const float*>(input->data()); - float* out = static_cast<float*>(ret->mutable_data()); +void Square<float, lang::Cuda>(int count, const Blob *input, Blob *ret, + Context *ctx) { + const float *in = static_cast<const float *>(input->data()); + float *out = static_cast<float *>(ret->mutable_data()); cuda::square(count, in, out); } + // sum all elements of input into ret // TODO(wangwei) optimize using stream template <> -void Sum<float, lang::Cuda>(int count, const Blob* input, float* ret, - Context* ctx) { - const float* in = static_cast<const float*>(input->data()); +void Sum<float, lang::Cuda>(int count, const Blob *input, float *ret, + Context *ctx) { + const float *in = static_cast<const float *>(input->data()); cuda::sum(count, in, ret); } // TODO(wangwei) optimize using stream template <> -void SumRows<float, lang::Cuda>(int nrow, int ncol, const Blob* input, - Blob* ret, Context* ctx) { - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* in = static_cast<const float*>(input->data()); +void SumRows<float, lang::Cuda>(int nrow, int ncol, const Blob *input, + Blob *ret, Context *ctx) { + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *in = static_cast<const float *>(input->data()); cuda::sum_row(nrow, ncol, ncol, in, dptr); } // Sum the rows of the input matrix into a vector // TODO(wangwei) optimize using stream template <> -void SumColumns<float, lang::Cuda>(int nrow, int ncol, const Blob* input, - Blob* ret, Context* ctx) { - float* dptr = static_cast<float*>(ret->mutable_data()); - const float* in = static_cast<const float*>(input->data()); +void SumColumns<float, lang::Cuda>(int nrow, int ncol, const Blob *input, + Blob *ret, Context *ctx) { + float *dptr = static_cast<float *>(ret->mutable_data()); + const float *in = static_cast<const float *>(input->data()); cuda::sum_col(nrow, ncol, ncol, in, dptr); } + +// follow the consistency guide of math API +template <> +void Div<float, lang::Cuda>(const size_t num, const float alpha, const Blob *in, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + cuda::Div(num, alpha, inPtr, outPtr, ctx->stream); } +template <> +void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out, + Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + cuda::Set(num, x, outPtr, ctx->stream); +} +// NOTE: cublas uses column major order. +// http://peterwittek.com/cublas-matrix-c-style.html +template <> +void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow, + const size_t ncol, const Blob *M, const Blob *v, + Blob *out, Context *ctx) { + auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream + const float *MPtr = static_cast<const float *>(M->data()); + const float *vPtr = static_cast<const float *>(v->data()); + float *outPtr = static_cast<float *>(out->mutable_data()); + if (side_right) { + CUBLAS_CHECK(cublasSdgmm(handle, CUBLAS_SIDE_LEFT, ncol, nrow, MPtr, ncol, + vPtr, 1, outPtr, ncol)); + } else { + CUBLAS_CHECK(cublasSdgmm(handle, CUBLAS_SIDE_RIGHT, ncol, nrow, MPtr, ncol, + vPtr, 1, outPtr, ncol)); + } +} +// http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm +template <> +void GEMM<float, lang::Cuda>(const bool transA, const bool transB, + const size_t nrowA, const size_t ncolB, + const size_t ncolA, const float alpha, + const Blob *A, const Blob *B, const float beta, + Blob *C, Context *ctx) { + auto transa = transA ? CUBLAS_OP_T : CUBLAS_OP_N; + auto transb = transB ? CUBLAS_OP_T : CUBLAS_OP_N; + int lda = transA ? nrowA : ncolA; + int ldb = transB ? ncolA : ncolB; + int ldc = ncolB; + const float *APtr = static_cast<const float *>(A->data()); + const float *BPtr = static_cast<const float *>(B->data()); + float *CPtr = static_cast<float *>(C->mutable_data()); + auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream + CUBLAS_CHECK(cublasSgemm(handle, transb, transa, ncolB, nrowA, ncolA, &alpha, + BPtr, ldb, APtr, lda, &beta, CPtr, ldc)); +} +} // namespace singa #endif // USE_CUDA #endif // SINGA_CORE_TENSOR_TENSOR_MATH_CUDA_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/test/singa/test_cpp_math.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_cpp_math.cc b/test/singa/test_cpp_math.cc deleted file mode 100644 index 78c713f..0000000 --- a/test/singa/test_cpp_math.cc +++ /dev/null @@ -1,25 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#include "gtest/gtest.h" -#include "../src/core/tensor/tensor_math_cpp.h" - - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/test/singa/test_mse.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_mse.cc b/test/singa/test_mse.cc index 3ee6bf8..67f583c 100644 --- a/test/singa/test_mse.cc +++ b/test/singa/test_mse.cc @@ -23,7 +23,7 @@ #include "singa/core/tensor.h" #include "singa/core/device.h" #include "../src/model/loss/mse.h" - +#include "singa_config.h" using singa::Tensor; class TestMSE : public ::testing::Test { protected: @@ -39,6 +39,7 @@ class TestMSE : public ::testing::Test { singa::Tensor p, t; }; +#ifdef USE_CBLAS TEST_F(TestMSE, CppForward) { singa::MSE mse; const Tensor& loss = mse.Forward(p, t); @@ -54,6 +55,17 @@ TEST_F(TestMSE, CppForward) { } } +TEST_F(TestMSE, CppBackward) { + singa::MSE mse; + mse.Forward(p, t); + const Tensor& grad = mse.Backward(); + + auto gdat = grad.data<const float*>(); + + for (size_t i = 0; i < grad.Size(); i++) + EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i])); +} +#endif TEST_F(TestMSE, CudaForward) { singa::MSE mse; singa::CudaGPU dev; @@ -73,18 +85,6 @@ TEST_F(TestMSE, CudaForward) { EXPECT_FLOAT_EQ(ldat[i], 0.5 * l); } } - -TEST_F(TestMSE, CppBackward) { - singa::MSE mse; - mse.Forward(p, t); - const Tensor& grad = mse.Backward(); - - auto gdat = grad.data<const float*>(); - - for (size_t i = 0; i < grad.Size(); i++) - EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i])); -} - TEST_F(TestMSE, CudaBackward) { singa::MSE mse; singa::CudaGPU dev; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/test/singa/test_tensor.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc index f9acdb0..bd039ad 100644 --- a/test/singa/test_tensor.cc +++ b/test/singa/test_tensor.cc @@ -111,5 +111,3 @@ TEST(TensorClass, T) { EXPECT_EQ(t.shape()[1], o.shape()[0]); } - - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/test/singa/test_tensor_math.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc index fb7e3e8..8368c55 100644 --- a/test/singa/test_tensor_math.cc +++ b/test/singa/test_tensor_math.cc @@ -5,10 +5,8 @@ using singa::Shape; using singa::Device; class TestTensorMath : public ::testing::Test { - protected: +protected: virtual void SetUp() { - const float dat1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - const float dat2[] = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f}; a.Reshape(singa::Shape{6}); b.Reshape(singa::Shape{6}); c.Reshape(singa::Shape{6, 1}); @@ -18,12 +16,14 @@ class TestTensorMath : public ::testing::Test { b.CopyDataFromHostPtr<float>(dat2, 6); } Tensor a, b, c, d; + const float dat1[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + const float dat2[6] = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f}; }; TEST_F(TestTensorMath, MemberAddTensor) { Tensor aa = a.Clone(); aa += a; - const float* dptr = aa.data<const float*>(); + const float *dptr = aa.data<const float *>(); EXPECT_FLOAT_EQ(2.0f, dptr[0]); EXPECT_FLOAT_EQ(4.0f, dptr[1]); EXPECT_FLOAT_EQ(6.0f, dptr[2]); @@ -31,40 +31,467 @@ TEST_F(TestTensorMath, MemberAddTensor) { // check p is initialized to 0 Tensor p(Shape{6}); p += aa; - const float* dptr1 = p.data<const float*>(); + const float *dptr1 = p.data<const float *>(); EXPECT_FLOAT_EQ(2.0f, dptr1[0]); EXPECT_FLOAT_EQ(4.0f, dptr1[1]); EXPECT_FLOAT_EQ(6.0f, dptr1[2]); a += b; - const float* dptr2 = a.data<const float*>(); + const float *dptr2 = a.data<const float *>(); EXPECT_FLOAT_EQ(2.1f, dptr2[0]); EXPECT_FLOAT_EQ(4.1f, dptr2[1]); EXPECT_FLOAT_EQ(6.1f, dptr2[2]); EXPECT_FLOAT_EQ(12.1f, dptr2[5]); } - TEST_F(TestTensorMath, AddTensors) { Tensor ret(a.shape(), a.device(), a.data_type()); Add(a, b, &ret); - const float* dptr = ret.data<const float*>(); + const float *dptr = ret.data<const float *>(); EXPECT_FLOAT_EQ(2.1f, dptr[0]); EXPECT_FLOAT_EQ(4.1f, dptr[1]); EXPECT_FLOAT_EQ(6.1f, dptr[2]); EXPECT_FLOAT_EQ(12.1f, dptr[5]); const Tensor d = a + b; - const float* dptr2 = d.data<const float*>(); + const float *dptr2 = d.data<const float *>(); EXPECT_FLOAT_EQ(2.1f, dptr2[0]); EXPECT_FLOAT_EQ(4.1f, dptr2[1]); EXPECT_FLOAT_EQ(6.1f, dptr2[2]); EXPECT_FLOAT_EQ(12.1f, dptr2[5]); Add(a, b, &a); - const float* dptr1 = a.data<const float*>(); + const float *dptr1 = a.data<const float *>(); EXPECT_FLOAT_EQ(2.1f, dptr1[0]); EXPECT_FLOAT_EQ(4.1f, dptr1[1]); EXPECT_FLOAT_EQ(6.1f, dptr1[2]); EXPECT_FLOAT_EQ(12.1f, dptr1[5]); } + +TEST_F(TestTensorMath, SetValue) { + Tensor t(Shape{4}); + t.SetValue(0.3f); + const float *ptr = t.data<const float *>(); + for (int i = 0; i < 4; i++) + EXPECT_FLOAT_EQ(ptr[i], 0.3f); +} + +TEST_F(TestTensorMath, Reshape) { + Tensor t(Shape{4}); + t.SetValue(0.3f); + Tensor p = Reshape(t, Shape{4, 1}); + const float *ptr = t.data<const float *>(); + EXPECT_EQ(p.shape(0), 4u); + EXPECT_EQ(p.shape(1), 1u); + for (int i = 0; i < 4; i++) + EXPECT_FLOAT_EQ(ptr[i], 0.3f); +} +#ifdef USE_CBLAS +TEST_F(TestTensorMath, MultCpp) { + const float x[4] = {1.0f, 2.0f, 3.0f, 4.0f}; + Tensor t(Shape{2, 2}); + t.CopyDataFromHostPtr(x, 4); + d.CopyDataFromHostPtr(dat1, 6); + Tensor C = Mult(d, t); + const float *xptr = C.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + float tmp = 0; + for (int k = 0; k < 2; k++) { + tmp += dat1[i * 2 + k] * x[k * 2 + j]; + } + EXPECT_FLOAT_EQ(xptr[i * 2 + j], tmp); + } + } + const float y[8] = {1.0f, 2.0f, 3.0f, 4.0f, 1.1f, 2.1f, 3.1f, 4.1f}; + Tensor s(Shape{4, 2}); + s.CopyDataFromHostPtr(y, 8); + const float *sPtr = s.data<const float *>(); + for (int i = 0; i < 8; i++) + EXPECT_FLOAT_EQ(sPtr[i], y[i]); + Tensor D = Mult(d, s.T()); + const float *DPtr = D.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + float tmp = 0; + for (int k = 0; k < 2; k++) { + tmp += dat1[i * 2 + k] * y[j * 2 + k]; + } + EXPECT_FLOAT_EQ(DPtr[i * 4 + j], tmp); + } + } + Tensor p(Shape{4, 1}); + p.CopyDataFromHostPtr(x, 4); + Tensor q(Shape{1, 4}); + q.SetValue(1.0f); + Tensor o(Shape{4, 4}); + + Mult(p, q, &o); + const float *oPtr = o.data<const float *>(); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + EXPECT_FLOAT_EQ(oPtr[i * 4 + j], x[i]); + } + } +} + +TEST_F(TestTensorMath, AddColumnCpp) { + const float x[3] = {1.0f, 2.0f, 3.0f}; + Tensor t(Shape{3}); + t.CopyDataFromHostPtr(x, 3); + d.CopyDataFromHostPtr(dat1, 6); + AddColumn(t, &d); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[i]); + } + } +} +TEST_F(TestTensorMath, SubColumnCpp) { + const float x[3] = {1.0f, 2.0f, 3.0f}; + Tensor t(Shape{3}); + t.CopyDataFromHostPtr(x, 3); + d.CopyDataFromHostPtr(dat1, 6); + SubColumn(t, &d); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[i]); + } + } +} + + +TEST_F(TestTensorMath, DivColumnCpp) { + const float x[3] = {1.0f, 2.0f, 3.0f}; + Tensor t(Shape{3}); + t.CopyDataFromHostPtr(x, 3); + d.CopyDataFromHostPtr(dat1, 6); + DivColumn(t, &d); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[i]); + } + } +} + + +TEST_F(TestTensorMath, AddRowCpp) { + const float x[2] = {1.1f, 2.1f}; + Tensor t(Shape{2}); + t.CopyDataFromHostPtr(x, 2); + d.CopyDataFromHostPtr(dat1, 6); + AddRow(t, &d); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[j]); + } + } +} + + +TEST_F(TestTensorMath, SubRowCpp) { + const float x[2] = {1.1f, 2.1f}; + Tensor t(Shape{2}); + t.CopyDataFromHostPtr(x, 2); + d.CopyDataFromHostPtr(dat1, 6); + SubRow(t, &d); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[j]); + } + } +} + + +TEST_F(TestTensorMath, MultRowCpp) { + const float x[2] = {1.1f, 2.1f}; + Tensor t(Shape{2}); + t.CopyDataFromHostPtr(x, 2); + d.CopyDataFromHostPtr(dat1, 6); + MultRow(t, &d); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[j]); + } + } +} + + +TEST_F(TestTensorMath, SumRowsCpp) { + Tensor t(Shape{2}); + d.CopyDataFromHostPtr(dat1, 6); + SumRows(d, &t); + const float *tptr = t.data<const float *>(); + for (int i = 0; i < 2; i++) { + float tmp = 0; + for (int j = 0; j < 3; j++) { + tmp += dat1[j * 2 + i]; + } + EXPECT_FLOAT_EQ(tptr[i], tmp); + } +} + + +TEST_F(TestTensorMath, SumColumnsCpp) { + Tensor t(Shape{3}); + d.CopyDataFromHostPtr(dat1, 6); + SumColumns(d, &t); + const float *tptr = t.data<const float *>(); + for (int i = 0; i < 3; i++) { + float tmp = 0; + for (int j = 0; j < 2; j++) { + tmp += dat1[i * 2 + j]; + } + EXPECT_FLOAT_EQ(tptr[i], tmp); + } +} +#endif +TEST_F(TestTensorMath, MultCuda) { + const float x[4] = {1.0f, 2.0f, 3.0f, 4.0f}; + singa::CudaGPU dev; + Tensor t(Shape{2, 2}, &dev); + t.CopyDataFromHostPtr(x, 4); + d.ToDevice(&dev); + d.CopyDataFromHostPtr(dat1, 6); + Tensor C = Mult(d, t); + C.ToHost(); + const float *xptr = C.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + float tmp = 0; + for (int k = 0; k < 2; k++) { + tmp += dat1[i * 2 + k] * x[k * 2 + j]; + } + EXPECT_FLOAT_EQ(xptr[i * 2 + j], tmp); + } + } + + const float y[8] = {1.0f, 2.0f, 3.0f, 4.0f, 1.1f, 2.1f, 3.1f, 4.1f}; + Tensor s(Shape{4, 2}, &dev); + s.CopyDataFromHostPtr(y, 8); + Tensor D = Mult(d, s.T()); + D.ToHost(); + const float *DPtr = D.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + float tmp = 0; + for (int k = 0; k < 2; k++) { + tmp += dat1[i * 2 + k] * y[j * 2 + k]; + } + EXPECT_FLOAT_EQ(DPtr[i * 4 + j], tmp); + } + } + Tensor p(Shape{4, 1}, &dev); + p.CopyDataFromHostPtr(x, 4); + Tensor q(Shape{1, 4}, &dev); + q.SetValue(1.0f); + Tensor o(Shape{4, 4}, &dev); + + Mult(p, q, &o); + o.ToHost(); + const float *oPtr = o.data<const float *>(); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + EXPECT_FLOAT_EQ(oPtr[i * 4 + j], x[i]); + } + } +} + +TEST_F(TestTensorMath, AddColumnCuda) { + const float x[3] = {1.0f, 2.0f, 3.0f}; + singa::CudaGPU dev; + Tensor t(Shape{3}, &dev); + t.CopyDataFromHostPtr(x, 3); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + AddColumn(t, &d); + d.ToHost(); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[i]); + } + } +} + + +TEST_F(TestTensorMath, SubColumnCuda) { + const float x[3] = {1.0f, 2.0f, 3.0f}; + singa::CudaGPU dev; + Tensor t(Shape{3}, &dev); + t.CopyDataFromHostPtr(x, 3); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + SubColumn(t, &d); + d.ToHost(); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[i]); + } + } +} + +TEST_F(TestTensorMath, MultColumnCpp) { + const float x[3] = {1.0f, 2.0f, 3.0f}; + Tensor t(Shape{3}); + t.CopyDataFromHostPtr(x, 3); + d.CopyDataFromHostPtr(dat1, 6); + MultColumn(t, &d); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[i]); + } + } +} + +TEST_F(TestTensorMath, MultColumnCuda) { + const float x[3] = {1.0f, 2.0f, 3.0f}; + singa::CudaGPU dev; + Tensor t(Shape{3}, &dev); + t.CopyDataFromHostPtr(x, 3); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + MultColumn(t, &d); + d.ToHost(); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[i]); + } + } +} +TEST_F(TestTensorMath, DivColumnCuda) { + const float x[3] = {1.0f, 2.0f, 3.0f}; + singa::CudaGPU dev; + Tensor t(Shape{3}, &dev); + t.CopyDataFromHostPtr(x, 3); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + DivColumn(t, &d); + d.ToHost(); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[i]); + } + } +} +TEST_F(TestTensorMath, AddRowCuda) { + const float x[2] = {1.1f, 2.1f}; + singa::CudaGPU dev; + Tensor t(Shape{2}, &dev); + t.CopyDataFromHostPtr(x, 2); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + AddRow(t, &d); + d.ToHost(); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[j]); + } + } +} +TEST_F(TestTensorMath, SubRowCuda) { + const float x[2] = {1.1f, 2.1f}; + singa::CudaGPU dev; + Tensor t(Shape{2}, &dev); + t.CopyDataFromHostPtr(x, 2); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + SubRow(t, &d); + d.ToHost(); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[j]); + } + } +} +TEST_F(TestTensorMath, MultRowCuda) { + const float x[2] = {1.1f, 2.1f}; + singa::CudaGPU dev; + Tensor t(Shape{2}, &dev); + t.CopyDataFromHostPtr(x, 2); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + MultRow(t, &d); + d.ToHost(); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[j]); + } + } +} + +TEST_F(TestTensorMath, DivRowCpp) { + const float x[2] = {1.1f, 2.1f}; + Tensor t(Shape{2}); + t.CopyDataFromHostPtr(x, 2); + d.CopyDataFromHostPtr(dat1, 6); + DivRow(t, &d); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[j]); + } + } +} + +TEST_F(TestTensorMath, DivRowCuda) { + const float x[2] = {1.1f, 2.1f}; + singa::CudaGPU dev; + Tensor t(Shape{2}, &dev); + t.CopyDataFromHostPtr(x, 2); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + DivRow(t, &d); + d.ToHost(); + const float *xptr = d.data<const float *>(); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[j]); + } + } +} +TEST_F(TestTensorMath, SumRowsCuda) { + singa::CudaGPU dev; + Tensor t(Shape{2}, &dev); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + SumRows(d, &t); + t.ToHost(); + const float *tptr = t.data<const float *>(); + for (int i = 0; i < 2; i++) { + float tmp = 0; + for (int j = 0; j < 3; j++) { + tmp += dat1[j * 2 + i]; + } + EXPECT_FLOAT_EQ(tptr[i], tmp); + } +} +TEST_F(TestTensorMath, SumColumnCuda) { + singa::CudaGPU dev; + Tensor t(Shape{3}, &dev); + d.CopyDataFromHostPtr(dat1, 6); + d.ToDevice(&dev); + SumColumns(d, &t); + t.ToHost(); + const float *tptr = t.data<const float *>(); + for (int i = 0; i < 3; i++) { + float tmp = 0; + for (int j = 0; j < 2; j++) { + tmp += dat1[i * 2 + j]; + } + EXPECT_FLOAT_EQ(tptr[i], tmp); + } +}
