SINGA-168 Implement Cpp Math functions APIs Update error log for tensor_math.h to include the function name, e.g. "Foo is not implemented".
Add Tensor Math Cpp Implementation and Test Cases Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/07c49da5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/07c49da5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/07c49da5 Branch: refs/heads/dev Commit: 07c49da5b1ee6582780f5faef6c6bf3418a7a0b6 Parents: 01aaf49 Author: [email protected] <[email protected]> Authored: Fri Jun 3 20:46:16 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Sun Jun 12 12:15:11 2016 +0800 ---------------------------------------------------------------------- src/core/tensor/tensor_math.h | 293 +++++++++---------- src/core/tensor/tensor_math_cpp.h | 508 ++++++++++++++++++++++++--------- test/singa/test_tensor_math.cc | 264 ++++++++++++++++- 3 files changed, 774 insertions(+), 291 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/07c49da5/src/core/tensor/tensor_math.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h index ff865e0..1bf6fc7 100644 --- a/src/core/tensor/tensor_math.h +++ b/src/core/tensor/tensor_math.h @@ -50,277 +50,259 @@ namespace singa { // ================Linear algebra functions==================================== /// ret[i] = |input[i]| template <typename DType, typename Lang> -void Abs(int count, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Abs(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Abs Not Implemented"; } template <typename DType, typename Lang> -void Set(int count, DType x, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Set(const size_t num, const DType x, Blob *out, Context *ctx) { + LOG(FATAL) << "Set Not Implemented"; } + /// sum all elements of input into ret template <typename DType, typename Lang> -void Sum(int count, const Blob *input, DType *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Sum(const size_t num, const Blob *in, DType *out, Context *ctx) { + LOG(FATAL) << "Sum Not Implemented"; } /// ret[i] = sign(input[i]) template <typename DType, typename Lang> -void Sign(int count, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Sign(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Sign Not Implemented"; } /// Base is e, Neper number. ret[i]=exp(input[i]) template <typename DType, typename Lang> -void Exp(int count, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Exp(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Exp Not Implemented"; } /// Natual logarithm, the base is e, Neper number ret[i]=log(input[i]). template <typename DType, typename Lang> -void Log(int count, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Log(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Log Not Implemented"; } - /// Element-wise operation, ret[i]=sqrt([input[i]) template <typename DType, typename Lang> -void Sqrt(int count, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Sqrt(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Sqrt Not Implemented"; } /// Element-wise operation, ret[i]=square([input[i]) template <typename DType, typename Lang> -void Square(int count, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Square(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Square Not Implemented"; } /// Element-wise operation, ret[i]=tanh([input[i]) template <typename DType, typename Lang> -void Tanh(int count, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Tanh(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Tanh Not Implemented"; } /// Element-wise operation, ret[i]=max(0, input[i]) template <typename DType, typename Lang> -void ReLU(int count, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void ReLU(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "ReLU Not Implemented"; } /// Element-wise operation, ret[i]=sigmoid([input[i]) template <typename DType, typename Lang> -void Sigmoid(int count, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Sigmoid(const size_t num, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Sigmoid Not Implemented"; } -/// Do softmax for each row invidually +// Do softmax for each row invidually template <typename DType, typename Lang> -void Softmax(int nrow, int ncol, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Softmax(const size_t nrow, const size_t ncol, const Blob *in, + Blob *out, Context *ctx) { + LOG(FATAL) << "Softmax Not Implemented"; } // TODO(wangwei) unify SumRow and SumCol. /// Sum the rows of the input matrix into a vector template <typename DType, typename Lang> -void SumRows(int nrow, int ncol, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void SumRows(const size_t nrow, const size_t ncol, const Blob *in, + Blob *out, Context *ctx) { + LOG(FATAL) << "SumRows Not Implemented"; } /// Sum the columns of the input matrix into a vector template <typename DType, typename Lang> -void SumColumns(int nrow, int ncol, const Blob *input, Blob *ret, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void SumColumns(const size_t nrow, const size_t ncol, const Blob *in, + Blob *out, Context *ctx) { + LOG(FATAL) << "SumColumns Not Implemented"; } // TODO(wangwei) unify AddRow and AddCol. -/// Add the vector v to every row of A as the row of ret +/// Add the vector v to every row of A as the row of out template <typename DType, typename Lang> -void AddRow(int nrow, int ncol, const Blob *A, const Blob *v, Blob *ret, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void AddRow(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v, + Blob *out, Context *ctx) { + LOG(FATAL) << "AddRow Not Implemented"; } -/// Add the vector v to every column of A as the column of ret +/// Add the vector v to every column of A as the column of out template <typename DType, typename Lang> -void AddCol(int nrow, int ncol, const Blob *A, const Blob *v, Blob *ret, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void AddCol(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v, + Blob *out, Context *ctx) { + LOG(FATAL) << "AddCol Not Implemented"; } /// Element-wise operation, do v^x for every v from the input tensor template <typename DType, typename Lang> -void Pow(int count, const Blob *input, DType x, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Pow(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { + LOG(FATAL) << "Pow Not Implemented"; } /// Element-wise operation, do v^x for every v from the lhs and every x from rhs template <typename DType, typename Lang> -void Pow(int count, const Blob *lhs, const Blob *rhs, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Pow(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + LOG(FATAL) << "Pow-Pair Not Implemented"; } /// Element-wise operation, clamp every element into [low, high] /// if x>high, then x=high; if x<low, then x=low. template <typename DType, typename Lang> -void Clamp(int count, DType low, DType high, const Blob *input, Blob *ret, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Clamp(const size_t num, const DType low, const DType high, const Blob *in, Blob *out, Context *ctx) { + LOG(FATAL) << "Clamp Not Implemented"; } /// ret = input + x template <typename DType, typename Lang> -void Add(int count, const Blob *input, DType x, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Add(const size_t num, const Blob *in, const DType x, + Blob *out, Context *ctx) { + LOG(FATAL) << "Add Not Implemented"; } + +/// ret = lhs + rhs +template <typename DType, typename Lang> +void Add(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + LOG(FATAL) << "Add-Pair Not Implemented"; +} + /// ret = input - x template <typename DType, typename Lang> -void Sub(int count, const Blob *input, DType x, Blob *ret, Context *ctx) { - Add<DType, Lang>(count, input, -x, ret, ctx); +void Sub(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { + Add<DType, Lang>(num, in, -x, out, ctx); } -/// ret = input * x + +/// ret = lhs - rhs template <typename DType, typename Lang> -void EltwiseMult(int count, const Blob *input, DType x, Blob *ret, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Sub(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + LOG(FATAL) << "Sub-Pair Not Implemented"; } -/// ret = input / x + +/// ret = input * x template <typename DType, typename Lang> -void Div(int count, const Blob *input, DType x, Blob *ret, Context *ctx) { - EltwiseMult<DType, Lang>(count, input, DType(1) / x, ret, ctx); +void EltwiseMult(const size_t num, const Blob *in, const DType x, Blob *out, + Context *ctx) { + LOG(FATAL) << "EltwiseMult Not Implemented"; } -/// ret = lhs + rhs +/// ret = lhs * rhs template <typename DType, typename Lang> -void Add(int count, const Blob *lhs, const Blob *rhs, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void EltwiseMult(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + LOG(FATAL) << "EltwiseMult-Pair Not Implemented"; } -/// ret = lhs - rhs +/// ret = input / x template <typename DType, typename Lang> -void Sub(int count, const Blob *lhs, const Blob *rhs, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Div(const size_t num, const DType x, const Blob *in, + Blob *out, Context *ctx) { + LOG(FATAL) << "Div Not Implemented"; } -/// ret = lhs * rhs template <typename DType, typename Lang> -void EltwiseMult(int count, const Blob *lhs, const Blob *rhs, Blob *ret, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Div(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { + CHECK_NE(x,0.f); + EltwiseMult<DType, Lang>(num, in, DType(1) / x, out, ctx); } /// ret = lhs / rhs template <typename DType, typename Lang> -void Div(int count, const Blob *lhs, const Blob *rhs, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Div(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + LOG(FATAL) << "Div-Pair Not Implemented"; } /// outer-product. /// lhs and rhs are vectors of len m and n. ret is matrix of shape m * n template <typename DType, typename Lang> -void Outer(int m, int n, const Blob *lhs, const Blob *rhs, Blob *ret, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Outer(const size_t m, const size_t n, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + LOG(FATAL) << "Outer Not Implemented"; } /// ret[i]=(input[i]<x)?1.f:0.f template <typename DType, typename Lang> -void LT(int count, const Blob *input, float x, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void LT(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { + LOG(FATAL) << "LT Not Implemented"; } /// ret[i]=(input[i]<=x)?1.f:0.f template <typename DType, typename Lang> -void LE(int count, const Blob *input, float x, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void LE(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { + LOG(FATAL) << "LE Not Implemented"; } /// ret[i]=(input[i]>x)?1.f:0.f template <typename DType, typename Lang> -void GT(int count, const Blob *input, float x, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void GT(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { + LOG(FATAL) << "GT Not Implemented"; } -/// ret[i]=(input[i]>x)?1.f:0.f +/// ret[i]=(input[i]>=x)?1.f:0.f template <typename DType, typename Lang> -void GE(int count, const Blob *input, float x, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void GE(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { + LOG(FATAL) << "GE Not Implemented"; } // ===== BLAS functions, ref to http://docs.nvidia.com/cuda/cublas // ===== Level 1 /// return the index of the element with the max value. template <typename DType, typename Lang> -void Amax(int count, const Blob *input, int *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Amax(const size_t num, const Blob *in, size_t *out, Context *ctx) { + LOG(FATAL) << "Amax Not Implemented"; } /// return the index of the element with the min value. template <typename DType, typename Lang> -void Amin(int count, const Blob *input, int *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Amin(const size_t num, const Blob *in, size_t *out, Context *ctx) { + LOG(FATAL) << "Amin Not Implemented"; } /// ret = sum |x| for all x in input template <typename DType, typename Lang> -void Asum(int count, const Blob *input, DType *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Asum(const size_t num, const Blob *in, DType *out, Context *ctx) { + LOG(FATAL) << "Asum Not Implemented"; } /// ret = alpha * input + ret template <typename DType, typename Lang> -void Axpy(int count, DType alpha, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Axpy(const size_t num, const DType alpha, const Blob *in, + Blob *out, Context *ctx) { + LOG(FATAL) << "Axpy Not Implemented"; } /// ret *= x template <typename DType, typename Lang> -void Scale(int count, DType x, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Scale(const size_t num, const DType x, Blob *out, Context *ctx) { + LOG(FATAL) << "Scale Not Implemented"; } template <typename DType, typename Lang> -void Dot(const size_t num, const Blob *in1, const Blob *in2, DType *out, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Dot(const size_t num, const Blob *in1, const Blob *in2, + DType *out, Context *ctx) { + LOG(FATAL) << "Dot Not Implemented"; } // ===== Level 2 /// ret = alpha * op(A) * v + beta * ret. /// op(A) = A if trans = false; A^T otherwise; rows(op(A)) = m, cols(op(A)) = n. template <typename DType, typename Lang> -void GEMV(bool trans, int m, int n, DType alpha, const Blob *A, const Blob *v, - DType beta, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; -} - -// ===== Level 3 - -// ================Random functions=========================================== -/// Each element of ret would be 1 with prob p and 0 with 1-p. 0<= p <= 1 -// Get the random generator from 'ctx' -// If DType is not float, then convert the threshold to DType -template <typename DType, typename Lang> -void Bernoulli(int count, float p, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; -} -// The random generator should be extracted from ctx. -// If DType is not float, then convert the low and high to DType -template <typename DType, typename Lang> -void Uniform(int count, float low, float high, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; -} -// The random generator should be extracted from ctx. -// If DType is not float, then convert the mean and std to DType -template <typename DType, typename Lang> -void Gaussian(int count, float mean, float std, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; -} - -// ========follow the consistency guide of math API - -template <typename DType, typename Lang> -void Set(const size_t num, const DType x, Blob *out, Context *ctx) { - LOG(FATAL) << "Not Implemented"; -} -/// Divide alpha by each element of 'in'. -template <typename DType, typename Lang> -void Div(const size_t num, const DType alpha, const Blob *in, Blob *out, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void GEMV(bool trans, const size_t m, const size_t n, const DType alpha, + const Blob *A, const Blob *v, + const DType beta, Blob *out, Context *ctx) { + LOG(FATAL) << "GEMV Not Implemented"; } /// multiply a matrix with a diagnoal matrix constructed using values from 'v'. @@ -328,7 +310,7 @@ void Div(const size_t num, const DType alpha, const Blob *in, Blob *out, template <typename DType, typename Lang> void DGMM(const bool side_right, const size_t nrow, const size_t ncol, const Blob *M, const Blob *v, Blob *out, Context *ctx) { - LOG(FATAL) << "Not Implemented"; + LOG(FATAL) << "DGMM Not Implemented"; } /// C = alpha * A * B + beta * C. @@ -338,32 +320,37 @@ void GEMM(const bool transA, const bool transB, const size_t nrowA, const size_t ncolB, const size_t ncolA, const DType alpha, const Blob *A, const Blob *B, const DType beta, Blob *C, Context *ctx) { - LOG(FATAL) << "Not Implemented"; + LOG(FATAL) << "GEMM Not Implemented"; } -/// ret[i]=(input[i]<x)?1.f:0.f -template <typename DType, typename Lang> -void LT(const size_t num, const Blob *in, const DType x, Blob *out, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; -} -/// ret[i]=(input[i]<=x)?1.f:0.f + + +// ===== Level 3 + +// ================Random functions=========================================== +/// Each element of ret would be 1 with prob p and 0 with 1-p. 0<= p <= 1 +// Get the random generator from 'ctx' +// If DType is not float, then convert the threshold to DType template <typename DType, typename Lang> -void LE(const size_t num, const Blob *in, const DType x, Blob *out, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Bernoulli(const size_t num, const float p, Blob *out, Context *ctx) { + LOG(FATAL) << "Bernoulli Not Implemented"; } -/// ret[i]=(input[i]>x)?1.f:0.f +// The random generator should be extracted from ctx. +// If DType is not float, then convert the low and high to DType template <typename DType, typename Lang> -void GT(const size_t num, const Blob *in, const DType x, Blob *out, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Uniform(const size_t num, const float low, const float high, + Blob *out, Context *ctx) { + LOG(FATAL) << "Uniform Not Implemented"; } -/// ret[i]=(input[i]>=x)?1.f:0.f +// The random generator should be extracted from ctx. +// If DType is not float, then convert the mean and std to DType template <typename DType, typename Lang> -void GE(const size_t num, const Blob *in, const DType x, Blob *out, - Context *ctx) { - LOG(FATAL) << "Not Implemented"; +void Gaussian(const size_t num, const float mean, const float std, + Blob *out, Context *ctx) { + LOG(FATAL) << "Gaussian Not Implemented"; } + + + } // namespace singa #endif // SINGA_CORE_MATH_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/07c49da5/src/core/tensor/tensor_math_cpp.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h index 693f09c..ec7a892 100644 --- a/src/core/tensor/tensor_math_cpp.h +++ b/src/core/tensor/tensor_math_cpp.h @@ -27,195 +27,317 @@ /// TODO(wangwei) Clean the implementations following the comments in /// tensor_math.h. -/// For Blob argument xxx, name its pointer as xxxPtr. namespace singa { + +template<> +void Abs<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = fabs(inPtr[i]); + } +} + template <> -void Square<float, lang::Cpp>(int count, const Blob *input, Blob *ret, - Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *in = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - dptr[i] = in[i] * in[i]; +void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + for (size_t i = 0; i < num; i++) outPtr[i] = x; +} + +// sum all elements of input into out +// TODO(wangwei) optimize using omp +template <> +void Sum<float, lang::Cpp>(const size_t num, const Blob *in, float *out, Context *ctx) { + float s = 0.f; + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + s += inPtr[i]; } + *out = s; } template <> -void Add<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs, - Blob *ret, Context *ctx) { - // CHECK_EQ(ctx->stream, nullptr); - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(lhs->data()); - const float *rptr = static_cast<const float *>(rhs->data()); - for (int i = 0; i < count; i++) { - dptr[i] = lptr[i] + rptr[i]; +void Sign<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float*>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = inPtr[i] > 0 ? 1.0f : 0.0f; } } template <> -void Add<float, lang::Cpp>(int count, const Blob *input, float x, Blob *ret, - Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - dptr[i] = lptr[i] + x; +void Exp<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = exp(inPtr[i]); } } template <> -void Sub<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs, - Blob *ret, Context *ctx) { - // CHECK_EQ(ctx->stream, nullptr); - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(lhs->data()); - const float *rptr = static_cast<const float *>(rhs->data()); - for (int i = 0; i < count; i++) { - dptr[i] = lptr[i] - rptr[i]; +void Log<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + CHECK_GT(inPtr[i], 0.f); + outPtr[i] = log(inPtr[i]); } } -// sum all elements of input into ret -// TODO(wangwei) optimize using omp template <> -void Sum<float, lang::Cpp>(int count, const Blob *input, float *ret, - Context *ctx) { - float s = 0.f; - const float *in = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - s += in[i]; +void Sqrt<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + CHECK_GT(inPtr[i], 0.f); + outPtr[i] = sqrt(inPtr[i]); } - *ret = s; } template <> -void EltwiseMult<float, lang::Cpp>(int count, const Blob *input, float x, - Blob *ret, Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - dptr[i] = lptr[i] * x; +void Square<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = inPtr[i] * inPtr[i]; } } template <> -void EltwiseMult<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs, - Blob *ret, Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(lhs->data()); - const float *rptr = static_cast<const float *>(rhs->data()); - for (int i = 0; i < count; i++) { - dptr[i] = lptr[i] * rptr[i]; +void Tanh<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = tanh(inPtr[i]); } } template <> -void Exp<float, lang::Cpp>(int count, const Blob *input, Blob *ret, - Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - dptr[i] = exp(lptr[i]); +void ReLU<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = (inPtr[i] >= 0.f) ? inPtr[i] : 0.f; } } template <> -void Log<float, lang::Cpp>(int count, const Blob *input, Blob *ret, - Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - CHECK_GT(lptr[i], 0.f); - dptr[i] = log(lptr[i]); +void Sigmoid<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = 1.f / (1.f + exp(-inPtr[i])); } } template <> -void Tanh<float, lang::Cpp>(int count, const Blob *input, Blob *ret, - Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - dptr[i] = tanh(lptr[i]); +void Softmax<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *in, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + float *bPtr = new float[ncol]; + for (size_t r = 0; r < nrow; r++) { + size_t offset = r * ncol; + float denom = 0.f; + for (size_t c = 0; c < ncol; c++) { + bPtr[c] = exp(inPtr[offset + c]); + denom += bPtr[c]; + } + for (size_t c = 0; c < ncol; c++) { + size_t idx = offset + c; + outPtr[idx] = bPtr[c] / denom; + } } + delete bPtr; } template <> -void ReLU<float, lang::Cpp>(int count, const Blob *input, Blob *ret, - Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - dptr[i] = (lptr[i] >= 0.f) ? lptr[i] : 0.f; +void SumRows<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *in, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t r = 0; r < nrow; r++) { + size_t offset = r * ncol; + outPtr[r] = 0.f; + for (size_t c = 0; c < ncol; c++) { + outPtr[r] += inPtr[offset + c]; + } + } +} + +template <> +void SumColumns<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *in, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t c = 0; c < ncol; c++) { + outPtr[c] = 0.f; + } + for (size_t r = 0; r < nrow; r++) { + size_t offset = r * ncol; + for (size_t c = 0; c < ncol; c++) { + outPtr[c] += inPtr[offset + c]; + } + } +} + +template <> +void AddRow<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *APtr = static_cast<const float *>(A->data()); + const float *vPtr = static_cast<const float *>(v->data()); + for (size_t r = 0; r < nrow; r++) { + size_t offset = r * ncol; + for (size_t c = 0; c < ncol; c++) { + outPtr[offset + c] = APtr[offset + c] + vPtr[c]; + } + } +} + +template <> +void AddCol<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *APtr = static_cast<const float *>(A->data()); + const float *vPtr = static_cast<const float *>(v->data()); + for (size_t r = 0; r < nrow; r++) { + size_t offset = r * ncol; + for (size_t c = 0; c < ncol; c++) { + outPtr[offset + c] = APtr[offset + c] + vPtr[r]; + } + } +} + +template <> +void Pow<float, lang::Cpp>(const size_t num, const Blob *in, const float x, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = pow(inPtr[i], x); } } template <> -void Sigmoid<float, lang::Cpp>(int count, const Blob *input, Blob *ret, - Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - dptr[i] = 1.f / (1.f + exp(-lptr[i])); +void Pow<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + float *outPtr= static_cast<float *>(out->mutable_data()); + const float *in1Ptr= static_cast<const float *>(in1->data()); + const float *in2Ptr = static_cast<const float *>(in2->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = pow(in1Ptr[i], in2Ptr[i]); } } template <> -void Pow<float, lang::Cpp>(int count, const Blob *input, float x, Blob *ret, - Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(input->data()); - for (int i = 0; i < count; i++) { - dptr[i] = pow(lptr[i], x); +void Clamp<float, lang::Cpp>(const size_t num, const float low, const float high, const Blob *in, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + if (inPtr[i] > high) { + outPtr[i] = high; + } + else if (inPtr[i] < low) { + outPtr[i] = low; + } + else { + outPtr[i] = inPtr[i]; + } + } +} + +template <> +void Add<float, lang::Cpp>(const size_t num, const Blob *in, const float x, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = inPtr[i] + x; } } template <> -void Pow<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs, - Blob *ret, Context *ctx) { - float *dptr = static_cast<float *>(ret->mutable_data()); - const float *lptr = static_cast<const float *>(lhs->data()); - const float *rptr = static_cast<const float *>(rhs->data()); - for (int i = 0; i < count; i++) { - dptr[i] = pow(lptr[i], rptr[i]); +void Add<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + // CHECK_EQ(ctx->stream, nullptr); + float *outPtr= static_cast<float *>(out->mutable_data()); + const float *in1Ptr = static_cast<const float *>(in1->data()); + const float *in2Ptr = static_cast<const float *>(in2->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = in1Ptr[i] + in2Ptr[i]; } } template <> -void Bernoulli<float, lang::Cpp>(int count, float p, Blob *ret, Context *ctx) { - std::bernoulli_distribution distribution(p); - float *ptr = static_cast<float *>(ret->mutable_data()); - for (int i = 0; i < count; i++) { - ptr[i] = distribution(ctx->random_generator) ? 1.0f : 0.0f; +void Sub<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + // CHECK_EQ(ctx->stream, nullptr); + float *outPtr= static_cast<float *>(out->mutable_data()); + const float *in1Ptr = static_cast<const float *>(in1->data()); + const float *in2Ptr = static_cast<const float *>(in2->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = in1Ptr[i] - in2Ptr[i]; } } template <> -void Uniform<float, lang::Cpp>(int count, float low, float high, Blob *ret, - Context *ctx) { - std::uniform_real_distribution<float> distribution(low, high); - float *ptr = static_cast<float *>(ret->mutable_data()); - for (int i = 0; i < count; i++) { - ptr[i] = static_cast<float>(distribution(ctx->random_generator)); +void EltwiseMult<float, lang::Cpp>(const size_t num, const Blob *in, const float x, + Blob *out, Context *ctx) { + float *outPtr= static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = inPtr[i] * x; } } template <> -void Gaussian<float, lang::Cpp>(int count, float mean, float std, Blob *ret, - Context *ctx) { - std::normal_distribution<float> distribution(mean, std); - float *ptr = static_cast<float *>(ret->mutable_data()); - for (int i = 0; i < count; i++) { - ptr[i] = static_cast<float>(distribution(ctx->random_generator)); +void EltwiseMult<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + float *outPtr= static_cast<float *>(out->mutable_data()); + const float *in1Ptr = static_cast<const float *>(in1->data()); + const float *in2Ptr = static_cast<const float *>(in2->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = in1Ptr[i] * in2Ptr[i]; } } -// follow the consistency guide of math API template <> -void Div<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in, - Blob *out, Context *ctx) { - float *outPtr = static_cast<float *>(out->mutable_data()); +void Div<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + float *outPtr= static_cast<float *>(out->mutable_data()); + const float *in1Ptr = static_cast<const float *>(in1->data()); + const float *in2Ptr = static_cast<const float *>(in2->data()); + for (size_t i = 0; i < num; i++) { + CHECK_NE(in2Ptr[i],0.f); + outPtr[i] = in1Ptr[i] / in2Ptr[i]; + } +} + +template <> +void Div<float, lang::Cpp>(const size_t num, const float x, const Blob *in, + Blob *out, Context *ctx) { + float *outPtr= static_cast<float *>(out->mutable_data()); const float *inPtr = static_cast<const float *>(in->data()); - for (size_t i = 0; i < num; i++) outPtr[i] = alpha / inPtr[i]; + for (size_t i = 0; i < num; i++) { + CHECK_NE(inPtr[i],0.f); + outPtr[i] = x / inPtr[i]; + } } + +template <> +void Outer<float, lang::Cpp>(const size_t m, const size_t n, const Blob *in1, const Blob *in2, + Blob *out, Context *ctx) { + float *outPtr= static_cast<float *>(out->mutable_data()); + const float *in1Ptr = static_cast<const float *>(in1->data()); + const float *in2Ptr = static_cast<const float *>(in2->data()); + for (size_t r = 0; r < m ; r++) { + size_t offset = r * n; + for (size_t c = 0; c < n; c++) { + outPtr[offset + c] = in1Ptr[r] * in2Ptr[c]; + } + } +} + template <> void LT<float, lang::Cpp>(const size_t num, const Blob *in, const float x, Blob *out, Context *ctx) { @@ -227,6 +349,125 @@ void LT<float, lang::Cpp>(const size_t num, const Blob *in, const float x, } template <> +void LE<float, lang::Cpp>(const size_t num, const Blob *in, const float x, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = (inPtr[i] <= x) ? 1.f : 0.f; + } +} + +template <> +void GT<float, lang::Cpp>(const size_t num, const Blob *in, const float x, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = (inPtr[i] > x) ? 1.f : 0.f; + } +} + +template <> +void GE<float, lang::Cpp>(const size_t num, const Blob *in, const float x, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] = (inPtr[i] >= x) ? 1.f : 0.f; + } +} + +template <> +void Amax<float, lang::Cpp>(const size_t num, const Blob *in, size_t *out, Context *ctx) { + size_t maxPos = 0; + float maxVal = 0; + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + if (i == 0) { + maxVal = inPtr[i]; + } + else if (inPtr[i] > maxVal) { + maxVal = inPtr[i]; + maxPos = i; + } + } + *out = maxPos; +} + +template <> +void Amin<float, lang::Cpp>(const size_t num, const Blob *in, size_t *out, Context *ctx) { + size_t minPos = 0; + float minVal = 0; + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + if (i == 0) { + minVal = inPtr[i]; + } + else if (inPtr[i] > minVal) { + minVal = inPtr[i]; + minPos = i; + } + } + *out = minPos; +} + +template <> +void Asum<float, lang::Cpp>(const size_t num, const Blob *in, float *out, Context *ctx) { + float sum = 0; + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + sum += fabs(inPtr[i]); + } +} + +template <> +void Axpy<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float *inPtr = static_cast<const float *>(in->data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] += alpha * inPtr[i]; + } +} + +template <> +void Scale<float, lang::Cpp>(const size_t num, const float x, Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + for (size_t i = 0; i < num; i++) { + outPtr[i] *= x; + } +} + +//template <> +//void Dot<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2, +// float *out, Context *ctx) { +// float sum = 0; +// const float *in1Ptr = static_cast<const float *>(in1->data()); +// const float *in2Ptr = static_cast<const float *>(in2->data()); +// for (size_t i = 0; i < num; i++) { +// sum += in1Ptr[i] * in2Ptr[i]; +// } +//} + +template <> +void GEMV<float, lang::Cpp>(bool trans, const size_t m, const size_t n, const float alpha, + const Blob *A, const Blob *v, const float beta, + Blob *out, Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + const float* APtr = static_cast<const float *>(A->data()); + const float* vPtr = static_cast<const float *>(v->data()); + for (size_t r = 0; r < m; r++) { + float sum = 0; + for (size_t c = 0; c < n; c++) { + size_t idx = trans ? c * m + r : r * n + c; + sum += APtr[idx] * vPtr[c]; + } + outPtr[r] = alpha * sum + beta * outPtr[r]; + } +} + +template <> void DGMM<float, lang::Cpp>(const bool side_right, const size_t nrow, const size_t ncol, const Blob *M, const Blob *v, Blob *out, Context *ctx) { @@ -251,41 +492,35 @@ void DGMM<float, lang::Cpp>(const bool side_right, const size_t nrow, } template <> -void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out, - Context *ctx) { - float *outPtr = static_cast<float *>(out->mutable_data()); - for (size_t i = 0; i < num; i++) outPtr[i] = x; -} -template <> -void LE<float, lang::Cpp>(const size_t num, const Blob *in, const float x, - Blob *out, Context *ctx) { +void Bernoulli<float, lang::Cpp>(const size_t num, const float p, Blob *out, Context *ctx) { + std::bernoulli_distribution distribution(p); float *outPtr = static_cast<float *>(out->mutable_data()); - const float *inPtr = static_cast<const float *>(in->data()); for (size_t i = 0; i < num; i++) { - outPtr[i] = (inPtr[i] <= x) ? 1.f : 0.f; + outPtr[i] = distribution(ctx->random_generator) ? 1.0f : 0.0f; } } template <> -void GT<float, lang::Cpp>(const size_t num, const Blob *in, const float x, - Blob *out, Context *ctx) { - float *outPtr = static_cast<float *>(out->mutable_data()); - const float *inPtr = static_cast<const float *>(in->data()); +void Uniform<float, lang::Cpp>(const size_t num, const float low, const float high, Blob *out, + Context *ctx) { + std::uniform_real_distribution<float> distribution(low, high); + float *outPtr= static_cast<float *>(out->mutable_data()); for (size_t i = 0; i < num; i++) { - outPtr[i] = (inPtr[i] > x) ? 1.f : 0.f; + outPtr[i] = static_cast<float>(distribution(ctx->random_generator)); } } template <> -void GE<float, lang::Cpp>(const size_t num, const Blob *in, const float x, - Blob *out, Context *ctx) { +void Gaussian<float, lang::Cpp>(const size_t num, const float mean, const float std, Blob *out, + Context *ctx) { + std::normal_distribution<float> distribution(mean, std); float *outPtr = static_cast<float *>(out->mutable_data()); - const float *inPtr = static_cast<const float *>(in->data()); for (size_t i = 0; i < num; i++) { - outPtr[i] = (inPtr[i] >= x) ? 1.f : 0.f; + outPtr[i] = static_cast<float>(distribution(ctx->random_generator)); } } + #ifdef USE_CBLAS template <> void Dot<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2, @@ -314,7 +549,6 @@ void GEMM<float, lang::Cpp>(const bool transA, const bool transB, } #endif // USE_CBLAS - } // namespace singa #endif // SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/07c49da5/test/singa/test_tensor_math.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc index 170b96c..823445f 100644 --- a/test/singa/test_tensor_math.cc +++ b/test/singa/test_tensor_math.cc @@ -11,15 +11,277 @@ protected: b.Reshape(singa::Shape{6}); c.Reshape(singa::Shape{6, 1}); d.Reshape(singa::Shape{3, 2}); + e.Reshape(singa::Shape{3, 2}); a.CopyDataFromHostPtr<float>(dat1, 6); b.CopyDataFromHostPtr<float>(dat2, 6); + e.CopyDataFromHostPtr<float>(dat1, 6); } - Tensor a, b, c, d; + Tensor a, b, c, d, e; const float dat1[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; const float dat2[6] = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f}; }; +TEST_F(TestTensorMath, MemberAbs) { + Tensor aa = a.Clone(); + Tensor bb = b.Clone(); + Tensor cc = aa - bb; + const float* dptr = cc.data<const float*>(); + EXPECT_NEAR(-0.1, dptr[0], 1e-5); + EXPECT_NEAR(-0.1, dptr[1], 1e-5); + EXPECT_NEAR(-0.1, dptr[2], 1e-5); + + Tensor p = Abs(cc); + const float* dptr1 = p.data<const float*>(); + EXPECT_NEAR(0.1, dptr1[0], 1e-5); + EXPECT_NEAR(0.1, dptr1[1], 1e-5); + EXPECT_NEAR(0.1, dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberExp) { + Tensor p = Exp(a); + const float* dptr1 = p.data<const float*>(); + EXPECT_NEAR(exp(1.0f), dptr1[0], 1e-5); + EXPECT_NEAR(exp(2.0f), dptr1[1], 1e-5); + EXPECT_NEAR(exp(3.0f), dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberLog) { + Tensor p = Log(a); + const float* dptr1 = p.data<const float*>(); + EXPECT_NEAR(log(1.0f), dptr1[0], 1e-5); + EXPECT_NEAR(log(2.0f), dptr1[1], 1e-5); + EXPECT_NEAR(log(3.0f), dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberReLU) { + Tensor aa = a.Clone(); + Tensor cc = aa - 2.0f; + const float* dptr = cc.data<const float*>(); + EXPECT_NEAR(-1.0f, dptr[0], 1e-5); + EXPECT_NEAR(0.0f, dptr[1], 1e-5); + EXPECT_NEAR(1.0f, dptr[2], 1e-5); + + Tensor p = ReLU(cc); + const float* dptr1 = p.data<const float*>(); + EXPECT_NEAR(0.0f, dptr1[0], 1e-5); + EXPECT_NEAR(0.0f, dptr1[1], 1e-5); + EXPECT_NEAR(1.0f, dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberSigmoid) { + Tensor p = Sigmoid(a); + const float* dptr1 = p.data<const float*>(); + EXPECT_NEAR(1.0f/(1.0f + exp(-1.0f)), dptr1[0], 1e-5); + EXPECT_NEAR(1.0f/(1.0f + exp(-2.0f)), dptr1[1], 1e-5); + EXPECT_NEAR(1.0f/(1.0f + exp(-3.0f)), dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberSign) { + Tensor aa = a.Clone(); + Tensor cc = aa - 2.0f; + const float* dptr = cc.data<const float*>(); + EXPECT_NEAR(-1.0f, dptr[0], 1e-5); + EXPECT_NEAR(0.0f, dptr[1], 1e-5); + EXPECT_NEAR(1.0f, dptr[2], 1e-5); + + Tensor p = Sign(cc); + const float* dptr1 = p.data<const float*>(); + EXPECT_EQ(0.0f, dptr1[0]); + EXPECT_EQ(0.0f, dptr1[1]); + EXPECT_EQ(1.0f, dptr1[2]); +} + +TEST_F(TestTensorMath, MemberSqrt) { + Tensor p = Sqrt(a); + const float* dptr1 = p.data<const float*>(); + EXPECT_NEAR(sqrt(1.0), dptr1[0], 1e-5); + EXPECT_NEAR(sqrt(2.0), dptr1[1], 1e-5); + EXPECT_NEAR(sqrt(3.0), dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberSquare) { + Tensor p = Square(a); + const float* dptr1 = p.data<const float*>(); + EXPECT_NEAR(1.0, dptr1[0], 1e-5); + EXPECT_NEAR(4.0, dptr1[1], 1e-5); + EXPECT_NEAR(9.0, dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberTanh) { + Tensor p = Tanh(a); + const float* dptr1 = p.data<const float*>(); + EXPECT_NEAR(tanh(1.0), dptr1[0], 1e-5); + EXPECT_NEAR(tanh(2.0), dptr1[1], 1e-5); + EXPECT_NEAR(tanh(3.0), dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, Sum) { + Tensor p1(Shape{1,2}); + p1 = Sum(e, 0); + const float *dptr1 = p1.data<const float *>(); + EXPECT_FLOAT_EQ(9.0f,dptr1[0]); + EXPECT_FLOAT_EQ(12.0f,dptr1[1]); + + Tensor p2(Shape{3,1}); + p2 = Sum(e, 1); + const float *dptr2 = p2.data<const float *>(); + EXPECT_FLOAT_EQ(3.0f,dptr2[0]); + EXPECT_FLOAT_EQ(7.0f,dptr2[1]); + EXPECT_FLOAT_EQ(11.0f,dptr2[2]); +} + +TEST_F(TestTensorMath, SoftMax) { + Tensor p1(Shape{3,2}); + p1 = SoftMax(e,0); + const float *dptr1 = p1.data<const float *>(); + float sum = 0; + for(int i = 0; i < 6; i++) sum += exp(i+1); + EXPECT_NEAR(exp(1)/sum, dptr1[0],1e-5); + EXPECT_NEAR(exp(3)/sum, dptr1[2],1e-5); + EXPECT_NEAR(exp(5)/sum, dptr1[4],1e-5); + EXPECT_NEAR(exp(2)/sum, dptr1[1],1e-5); + EXPECT_NEAR(exp(4)/sum, dptr1[3],1e-5); + EXPECT_NEAR(exp(6)/sum, dptr1[5],1e-5); + + Tensor p2(Shape{3,2}); + p2 = SoftMax(e,1); + const float *dptr2 = p2.data<const float *>(); + EXPECT_NEAR(exp(1)/(exp(1)+exp(2)),dptr2[0], 1e-5); + EXPECT_NEAR(exp(2)/(exp(1)+exp(2)),dptr2[1], 1e-5); +} + +TEST_F(TestTensorMath, MemberLT) { + Tensor p1 = a < 2.0f; + const float *dptr1 = p1.data<const float *>(); + EXPECT_FLOAT_EQ(1.0f, dptr1[0]); + EXPECT_FLOAT_EQ(0.0f, dptr1[1]); + EXPECT_FLOAT_EQ(0.0f, dptr1[2]); +} + +TEST_F(TestTensorMath, MemberLE) { + Tensor p1 = a <= 2.0f; + const float *dptr1 = p1.data<const float *>(); + EXPECT_FLOAT_EQ(1.0f, dptr1[0]); + EXPECT_FLOAT_EQ(1.0f, dptr1[1]); + EXPECT_FLOAT_EQ(0.0f, dptr1[2]); +} + +TEST_F(TestTensorMath, MemberGT) { + Tensor p1 = a > 2.0f; + const float *dptr1 = p1.data<const float *>(); + EXPECT_FLOAT_EQ(0.0f, dptr1[0]); + EXPECT_FLOAT_EQ(0.0f, dptr1[1]); + EXPECT_FLOAT_EQ(1.0f, dptr1[2]); +} + +TEST_F(TestTensorMath, MemberGE) { + Tensor p1 = a >= 2.0f; + const float *dptr1 = p1.data<const float *>(); + EXPECT_FLOAT_EQ(0.0f, dptr1[0]); + EXPECT_FLOAT_EQ(1.0f, dptr1[1]); + EXPECT_FLOAT_EQ(1.0f, dptr1[2]); +} + +TEST_F(TestTensorMath, MemberPow) { + Tensor p1 = Pow(b,3.0f); + const float *dptr1 = p1.data<const float *>(); + EXPECT_FLOAT_EQ(pow(1.1f,3.0f), dptr1[0]); + EXPECT_FLOAT_EQ(pow(2.1f,3.0f), dptr1[1]); + EXPECT_FLOAT_EQ(pow(3.1f,3.0f), dptr1[2]); + + //TODO(Yuchen): check pow(tensor a, tensor b) and add testcase after the function is complete + //Tensor p2 = Pow(a,b); + //const float *dptr2 = p2.data<const float *>(); + //EXPECT_FLOAT_EQ(pow(1.0f,1.1f), dptr2[0]); + //EXPECT_FLOAT_EQ(pow(2.0f,2.1f), dptr2[1]); + //EXPECT_FLOAT_EQ(pow(3.0f,3.1f), dptr2[2]); +} + + +TEST_F(TestTensorMath, MemberSub) { + Tensor p1 = a - b; + const float* dptr1 = p1.data<const float*>(); + EXPECT_NEAR(-0.1, dptr1[0], 1e-5); + EXPECT_NEAR(-0.1, dptr1[1], 1e-5); + EXPECT_NEAR(-0.1, dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberEltwiseMult) { + Tensor p1 = a * b; + const float* dptr1 = p1.data<const float*>(); + EXPECT_NEAR(1.0*1.1, dptr1[0], 1e-5); + EXPECT_NEAR(2.0*2.1, dptr1[1], 1e-5); + EXPECT_NEAR(3.0*3.1, dptr1[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberDiv) { + Tensor p1 = a / b; + const float* dptr1 = p1.data<const float*>(); + EXPECT_NEAR(1.0/1.1, dptr1[0], 1e-5); + EXPECT_NEAR(2.0/2.1, dptr1[1], 1e-5); + EXPECT_NEAR(3.0/3.1, dptr1[2], 1e-5); + + Tensor p2 = Div(10.0f,b); + const float* dptr2 = p2.data<const float*>(); + EXPECT_NEAR(10.0/1.1, dptr2[0], 1e-5); + EXPECT_NEAR(10.0/2.1, dptr2[1], 1e-5); + EXPECT_NEAR(10.0/3.1, dptr2[2], 1e-5); + + Tensor p3 = a / 8.0f; + const float* dptr3 = p3.data<const float*>(); + EXPECT_NEAR(1.0/8.0, dptr3[0], 1e-5); + EXPECT_NEAR(2.0/8.0, dptr3[1], 1e-5); + EXPECT_NEAR(3.0/8.0, dptr3[2], 1e-5); +} + +TEST_F(TestTensorMath, MemberBernoulli) { + Tensor p1(Shape{10000}); + Bernoulli(0.3,&p1); + const float* dptr1 = p1.data<const float*>(); + float sum = 0; + for(int i = 0; i < 10000; i++) sum += dptr1[i]; + float mean = sum/10000; + EXPECT_NEAR(mean, 0.3, 1e-2); + + sum = 0; + for(int i = 0; i < 10000; i++) sum += (dptr1[i]-mean)*(dptr1[i]-mean); + float variance = sum/9999; + EXPECT_NEAR(variance, 0.3*0.7, 1e-2); +} + +TEST_F(TestTensorMath, MemberUniform) { + Tensor p1(Shape{10000}); + Uniform(0.1f,0.2f,&p1); + const float* dptr1 = p1.data<const float*>(); + float sum = 0; + for(int i = 0; i < 10000; i++) sum += dptr1[i]; + float mean = sum/10000; + EXPECT_NEAR(mean, 0.15f, 1e-3); + + sum = 0; + for(int i = 0; i < 10000; i++) sum += (dptr1[i]-mean)*(dptr1[i]-mean); + float variance = sum/9999; + EXPECT_NEAR(variance, 0.01f/12, 1e-3); +} + +TEST_F(TestTensorMath, MemberGaussian) { + Tensor p1(Shape{50000}); + Gaussian(0.0,1.0,&p1); + const float* dptr1 = p1.data<const float*>(); + float sum = 0; + for(int i = 0; i < 50000; i++) sum += dptr1[i]; + float mean = sum/50000; + EXPECT_NEAR(mean, 0.0, 1e-2); + + sum = 0; + for(int i = 0; i < 50000; i++) sum += (dptr1[i]-mean)*(dptr1[i]-mean); + float variance = sum/49999; + EXPECT_NEAR(variance, 1.0, 1e-2); +} + + + TEST_F(TestTensorMath, MemberAddTensor) { Tensor aa = a.Clone(); aa += a;
