SINGA-184 Add Cross Entropy loss computation Update softmaxcrossentropy layer to support both cpp and cuda devices;
Fix bugs from crossentropy fwd and bwd; need the cuda version exp(); Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ec17acab Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ec17acab Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ec17acab Branch: refs/heads/master Commit: ec17acab49d595fdc48b2dae6f71901b5a4c8191 Parents: efd7b62 Author: Wei Wang <[email protected]> Authored: Fri May 27 17:25:01 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Mon Jun 13 11:12:05 2016 +0800 ---------------------------------------------------------------------- include/singa/core/tensor.h | 17 +++-- include/singa/model/loss.h | 47 ++++++++++++ src/CMakeLists.txt | 3 +- src/core/tensor/math_kernel.cu | 37 +++++++++- src/core/tensor/math_kernel.h | 9 ++- src/core/tensor/tensor.cc | 52 +++++++++---- src/core/tensor/tensor_math.h | 24 ++++-- src/core/tensor/tensor_math_cpp.h | 50 +++++++++++-- src/core/tensor/tensor_math_cuda.h | 41 ++++++++--- src/model/layer/softmax.cc | 7 +- src/model/loss/cross_entropy.h | 105 --------------------------- src/model/loss/mse.cc | 41 +++++++++++ src/model/loss/mse.h | 66 ----------------- src/model/loss/softmax_cross_entropy.cc | 53 ++++++++++++++ test/singa/test_cross_entropy.cc | 64 ++++++++++++++-- test/singa/test_mse.cc | 6 +- test/singa/test_softmax.cc | 9 +-- 17 files changed, 393 insertions(+), 238 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/include/singa/core/tensor.h ---------------------------------------------------------------------- diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h index bb8d7f8..865e1e4 100644 --- a/include/singa/core/tensor.h +++ b/include/singa/core/tensor.h @@ -239,11 +239,10 @@ Tensor Sum(const Tensor &t, int axis); /// if 'axis' is 1, average all columns into a single column /// TODO(wangwei) support arbitrary Tensor like numpy.average Tensor Average(const Tensor &t, int axis); -/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis-1] rows, -/// and shape_[axis]*...*shape_[nDim()] columns. -/// and do softmax along each row. -Tensor SoftMax(const Tensor &t, int axis = 0); -void SoftMax(const Tensor &t, int axis, Tensor *ret); +/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor. +Tensor SoftMax(const Tensor &in); +/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor. +void SoftMax(const Tensor &in, Tensor *out); /// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis] rows, /// and shape_[axis+1]*...*shape_[nDim()] columns. @@ -398,6 +397,14 @@ Tensor DivRow(const Tensor &lhs, const Tensor &rhs); void DivRow(const Tensor &lhs, const Tensor &rhs, Tensor *ret); */ +/// Compute the cross entropy loss given the prediction probability 'p' and +/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector +/// or 2-d matrix. 'loss' is 1-d vector. The loss is computed into p. +void ComputeCrossEntropy(const Tensor& t, Tensor* p); +/// Compute the dx, given prediction probability 'p' (p=softmax(x)) and +/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector +/// or 2-d matrix. 'grad' has the same shape as 'p'. dx is computed into p. +void SoftmaxCrossEntropyBwd(const Tensor& t, Tensor* p); } // namespace singa #endif // SINGA_CORE_TENSOR_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/include/singa/model/loss.h ---------------------------------------------------------------------- diff --git a/include/singa/model/loss.h b/include/singa/model/loss.h index 6a23067..d188de0 100644 --- a/include/singa/model/loss.h +++ b/include/singa/model/loss.h @@ -18,6 +18,7 @@ #ifndef SINGA_MODEL_LOSS_H_ #define SINGA_MODEL_LOSS_H_ +#include <stack> #include "singa/proto/model.pb.h" #include "singa/core/tensor.h" namespace singa { @@ -54,6 +55,52 @@ class Loss { /// Compute the gradients of the loss values w.r.t. the prediction. virtual Tensor Backward() = 0; }; + + + +// ============= Mean Squared Error =========================================== +/// MSE is for mean squared error or squared euclidean distance. +class MSE : public Loss<Tensor> { + public: + /// Compute the loss values for each sample/instance given the prediction + /// and the target, which is 0.5/||prediction-target||^2 + /// Users can call Average(const Tensor&) to get the average + /// loss value over all samples in the batch. + Tensor Forward(const Tensor& prediction, const Tensor& target) override; + + /// Compute the gradients of the loss values w.r.t. the prediction, + /// which is (prediction-target)/batchsize + Tensor Backward() override; + + private: + // to buffer intermediate data, i.e., prediction-target + std::stack<Tensor> buf_; +}; + + +// ===============Softamx Cross Entropy ======================================= +/// Softmax + cross entropy for multi-category classification +class SoftmaxCrossEntropy : public Loss<Tensor> { + public: + /// Compute the loss values for each sample/instance given the prediction + /// and the target, which is -log(p[idx_truth]), idx_truth is the truth + /// category's index and p[] is the probability for each category, computed + /// from Softmax(prediction). + /// Users can call Average(const Tensor&) to get the average + /// loss value over all samples in the batch. + Tensor Forward(const Tensor& prediction, const Tensor& target) override; + + /// Compute the gradients of the loss values w.r.t. the prediction, + /// which is: p[idx] - 1 if idx is the truth category's index; else, + /// p[idx] + Tensor Backward() override; + + private: + // to buffer intermediate data, i.e., probability for each category and + // the target (ground truth) + std::stack<Tensor> buf_; +}; + } // namespace singa #endif // SINGA_MODEL_LOSS_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 28066de..23cae85 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,7 +21,7 @@ AUX_SOURCE_DIRECTORY(core/tensor core_source) FILE(GLOB_RECURSE cuda_source core "*.cu") set(FLAGS_BACKUP ${CMAKE_CXX_FLAGS}) set(CMAKE_CXX_FLAGS "") -CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC") +CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC ") #message(STATUS "FLAGS ${CMAKE_CXX_FLAGS}") #message(STATUS "CORE ${cuda_source}") #message(STATUS "OBJ ${cuda_objs}") @@ -36,6 +36,7 @@ LIST(APPEND SINGA_LINKER_LIBS singa_core) AUX_SOURCE_DIRECTORY(model model_source) AUX_SOURCE_DIRECTORY(model/layer model_source) AUX_SOURCE_DIRECTORY(model/optimizer model_source) +AUX_SOURCE_DIRECTORY(model/loss model_source) #MESSAGE(STATUS "MODEL ${model_source}") ADD_LIBRARY(singa_model SHARED ${model_source}) TARGET_LINK_LIBRARIES(singa_model ${SINGA_LINKER_LIBS}) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/math_kernel.cu ---------------------------------------------------------------------- diff --git a/src/core/tensor/math_kernel.cu b/src/core/tensor/math_kernel.cu index aed6add..f12763e 100644 --- a/src/core/tensor/math_kernel.cu +++ b/src/core/tensor/math_kernel.cu @@ -485,8 +485,26 @@ __global__ void KernelSet(const size_t num, const float x, float *out) { } } -void Set(const size_t num, const float x, float *out, cudaStream_t s) { - KernelSet << <ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, x, out); +__global__ +void KernelComputeCrossEntropy(const size_t batchsize, const size_t dim, const float* p, + const int* t, float* loss) { + size_t sample = blockIdx.x * blockDim.x + threadIdx.x; + size_t num_threads = blockDim.x * gridDim.x; + for (; sample < batchsize; sample += num_threads) { + float prob_of_truth = p[sample * dim + t[sample]]; + loss[sample] -= std::log(max(prob_of_truth, FLT_MIN)); + } +} + +__global__ +void KernelSoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim, const float* p, + const int* t, float* grad) { + size_t sample = blockIdx.x * blockDim.x + threadIdx.x; + size_t num_threads = blockDim.x * gridDim.x; + for (; sample < batchsize; sample += num_threads) { + size_t pos = sample * dim + t[sample]; + grad[pos] = p[pos] - 1.0f; // TODO(wangwei) Consider p and grad are diff + } } void Div(const size_t num, float alpha, const float *in, float *out, cudaStream_t s) { @@ -510,6 +528,21 @@ void LE(const size_t num, const float *in, const float x, float *out, KernelLE << <ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out); } +void ComputeCrossEntropy(size_t batchsize, const size_t dim, const float* p, + const int *t, float *loss, cudaStream_t stream) { + KernelComputeCrossEntropy<<<ceil(batchsize/CU1DBLOCKF), CU1DBLOCKF>>>(batchsize, + dim, p, t, loss); +} + +void Set(const size_t num, const float x, float *out, cudaStream_t s) { + KernelSet<<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>>(num, x, out); +} + +void SoftmaxCrossEntropyBwd(size_t batchsize, const size_t dim, const float* p, + const int *t, float *grad, cudaStream_t stream) { + KernelSoftmaxCrossEntropyBwd<<<ceil(batchsize/CU1DBLOCKF), CU1DBLOCKF>>>(batchsize, + dim, p, t, grad); +} } // namespace cuda } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/math_kernel.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/math_kernel.h b/src/core/tensor/math_kernel.h index 5c906a9..09953e4 100644 --- a/src/core/tensor/math_kernel.h +++ b/src/core/tensor/math_kernel.h @@ -83,13 +83,20 @@ void set_value(int n, float v, float *out); void threshold(int n, float alpha, const float *in, float *out); // follow the consistency guide for math API +void ComputeCrossEntropy(const size_t batchsize, const size_t dim, + const float *p, const int *t, float *loss, + cudaStream_t stream); void Div(const size_t num, const float x, const float *in, float *out, cudaStream_t s); -void Set(const size_t num, const float x, float *out, cudaStream_t s); void GT(size_t num, const float *in, const float x, float *out, cudaStream_t s); void GE(size_t num, const float *in, const float x, float *out, cudaStream_t s); void LT(size_t num, const float *in, const float x, float *out, cudaStream_t s); void LE(size_t num, const float *in, const float x, float *out, cudaStream_t s); +void Set(const size_t num, const float x, float *out, cudaStream_t s); +void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim, + const float *p, const int *t, float *grad, + cudaStream_t stream); + } // cuda } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor.cc ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc index 5ae375c..1ac25c6 100644 --- a/src/core/tensor/tensor.cc +++ b/src/core/tensor/tensor.cc @@ -77,10 +77,9 @@ void Tensor::ResetLike(const Tensor &t) { } } -void Tensor::Reshape(const Shape &shape) { - if (Product(shape_) != Product(shape)) { - if (blob_ != nullptr && blob_->DecRefCount() == 0) - device_->FreeBlob(blob_); +void Tensor::Reshape(const Shape& shape) { + if (Product(shape) != Product(shape_)) { + if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_); blob_ = device_->NewBlob(Product(shape) * SizeOf(data_type_)); } shape_ = shape; @@ -403,22 +402,21 @@ Tensor Average(const Tensor &t, int axis) { } } -Tensor SoftMax(const Tensor &in, int axis) { +Tensor SoftMax(const Tensor &in) { Tensor out(in.shape(), in.device(), in.data_type()); - SoftMax(in, axis, &out); + SoftMax(in, &out); return out; } -void SoftMax(const Tensor &in, int axis, Tensor *out) { +void SoftMax(const Tensor &in, Tensor *out) { + CHECK_LE(in.nDim(), 2u); + Exp(in, out); size_t nrow = 1, ncol = in.Size(), size = ncol; - CHECK_GE(axis, 0); - if (axis > 0) { - nrow = Product(in.shape(), 0, axis); - CHECK_EQ(size % nrow, 0u) << "Size = " << size << " nrow = " << nrow; + if (in.nDim() == 2u) { + nrow = in.shape(0); ncol = size / nrow; + out->Reshape(Shape{nrow, ncol}); } - Exp(in, out); - out->Reshape(Shape{nrow, ncol}); Tensor sum(Shape{nrow}, in.device(), in.data_type()); SumColumns(*out, &sum); DivColumn(sum, out); @@ -594,6 +592,19 @@ void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M) { Mult(alpha, one, vmat, beta, M); } } +void ComputeCrossEntropy(const Tensor& t, Tensor* p) { + CHECK_LE(p->nDim(), 2u); + CHECK_LE(t.nDim(), 2u); // TODO(wangwei) consider multi-labels. + size_t batchsize = 1; + if (p->nDim() == 2u) batchsize = p->shape(0); + size_t dim = p->Size() / batchsize; + TYPE_LANG_SWITCH(p->data_type(), DType, p->device()->lang(), Lang, { + p->device()->Exec([batchsize, dim, t, p](Context *ctx) { + ComputeCrossEntropy<DType, Lang>(batchsize, dim, p->blob(), t.blob(), + p->blob(), ctx); + }, {p->blob(), t.blob()}, {p->blob()}); + }); +} template <typename SType> Tensor Div(const SType alpha, const Tensor &in) { Tensor out(in.shape(), in.device(), in.data_type()); @@ -665,7 +676,20 @@ void MultRow(const Tensor &v, Tensor *M) { {M->blob(), v.blob()}, {M->blob()}); }); } - +void SoftmaxCrossEntropyBwd(const Tensor &t, Tensor *p) { + CHECK_LE(p->nDim(), 2u); + CHECK_LE(t.nDim(), 2u); // TODO(wangwei) consider multi-labels. + size_t batchsize = 1; + if (p->nDim() == 2u) + batchsize = p->shape(0); + size_t dim = p->Size() / batchsize; + TYPE_LANG_SWITCH(p->data_type(), DType, p->device()->lang(), Lang, { + p->device()->Exec([batchsize, dim, t, p](Context *ctx) { + SoftmaxCrossEntropyBwd<DType, Lang>(batchsize, dim, p->blob(), t.blob(), + p->blob(), ctx); + }, {p->blob(), t.blob()}, {p->blob()}); + }); +} void SubColumn(const Tensor &v, Tensor *M) { AddColumn(-1, 1, v, M); } void SubRow(const Tensor &v, Tensor *M) { AddRow(-1, 1, v, M); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor_math.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h index ff865e0..bcf4908 100644 --- a/src/core/tensor/tensor_math.h +++ b/src/core/tensor/tensor_math.h @@ -110,12 +110,6 @@ void Sigmoid(int count, const Blob *input, Blob *ret, Context *ctx) { LOG(FATAL) << "Not Implemented"; } -/// Do softmax for each row invidually -template <typename DType, typename Lang> -void Softmax(int nrow, int ncol, const Blob *input, Blob *ret, Context *ctx) { - LOG(FATAL) << "Not Implemented"; -} - // TODO(wangwei) unify SumRow and SumCol. /// Sum the rows of the input matrix into a vector template <typename DType, typename Lang> @@ -312,11 +306,14 @@ void Gaussian(int count, float mean, float std, Blob *ret, Context *ctx) { // ========follow the consistency guide of math API +/// Divide alpha by each element of 'in'. +// following the consistency guide. template <typename DType, typename Lang> -void Set(const size_t num, const DType x, Blob *out, Context *ctx) { +void ComputeCrossEntropy(const size_t batchsize, const size_t dim, + const Blob *p, const Blob *t, Blob *loss, + Context *ctx) { LOG(FATAL) << "Not Implemented"; } -/// Divide alpha by each element of 'in'. template <typename DType, typename Lang> void Div(const size_t num, const DType alpha, const Blob *in, Blob *out, Context *ctx) { @@ -364,6 +361,17 @@ void GE(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) { LOG(FATAL) << "Not Implemented"; } +template <typename DType, typename Lang> +void Set(const size_t num, const DType x, Blob *out, Context *ctx) { + LOG(FATAL) << "Not Implemented"; +} + +template <typename DType, typename Lang> +void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim, + const Blob *p, const Blob *t, Blob *grad, + Context *ctx) { + LOG(FATAL) << "Not Implemented"; +} } // namespace singa #endif // SINGA_CORE_MATH_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor_math_cpp.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h index 693f09c..907c656 100644 --- a/src/core/tensor/tensor_math_cpp.h +++ b/src/core/tensor/tensor_math_cpp.h @@ -17,7 +17,9 @@ */ #ifndef SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_ #define SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_ + #include "./tensor_math.h" +#include <cfloat> #include "singa/core/common.h" #include <math.h> @@ -210,6 +212,22 @@ void Gaussian<float, lang::Cpp>(int count, float mean, float std, Blob *ret, // follow the consistency guide of math API template <> +void ComputeCrossEntropy<float, lang::Cpp>(const size_t batchsize, + const size_t dim, const Blob *p, + const Blob *t, Blob *loss, + Context *ctx) { + const float *pPtr = static_cast<const float *>(p->data()); + const float *tPtr = static_cast<const float *>(t->data()); + float *lossPtr = static_cast<float *>(loss->mutable_data()); + for (size_t i = 0; i < batchsize; i++) { + int truth_idx = static_cast<int>(tPtr[i]); + CHECK_GE(truth_idx, 0); + float prob_of_truth = pPtr[i * dim + truth_idx]; + lossPtr[i] = -std::log(std::max(prob_of_truth, FLT_MIN)); + } +} + +template <> void Div<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in, Blob *out, Context *ctx) { float *outPtr = static_cast<float *>(out->mutable_data()); @@ -249,13 +267,6 @@ void DGMM<float, lang::Cpp>(const bool side_right, const size_t nrow, } } } - -template <> -void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out, - Context *ctx) { - float *outPtr = static_cast<float *>(out->mutable_data()); - for (size_t i = 0; i < num; i++) outPtr[i] = x; -} template <> void LE<float, lang::Cpp>(const size_t num, const Blob *in, const float x, Blob *out, Context *ctx) { @@ -312,9 +323,32 @@ void GEMM<float, lang::Cpp>(const bool transA, const bool transB, cblas_sgemm(CblasRowMajor, transa, transb, nrowA, ncolB, ncolA, alpha, APtr, lda, BPtr, ldb, beta, CPtr, ldc); } - #endif // USE_CBLAS +template <> +void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out, + Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + for (size_t i = 0; i < num; i++) outPtr[i] = x; +} +template <> +void SoftmaxCrossEntropyBwd<float, lang::Cpp>(const size_t batchsize, + const size_t dim, const Blob *p, + const Blob *t, + Blob *grad, Context *ctx) { + CHECK_EQ(p, grad) << "Use the same pointer to optimize performance"; + // const float* pPtr = static_cast<const float*>(p->data()); + const float *tPtr = static_cast<const float *>(t->data()); + float *gradPtr = static_cast<float *>(grad->mutable_data()); + + for (size_t i = 0; i < batchsize; i++) { + int truth_idx = static_cast<int>(tPtr[i]); + CHECK_GE(truth_idx, 0); + gradPtr[i * dim + truth_idx] -= 1.0; + } +} + + } // namespace singa #endif // SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor_math_cuda.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h index 4a2ba66..c69620c 100644 --- a/src/core/tensor/tensor_math_cuda.h +++ b/src/core/tensor/tensor_math_cuda.h @@ -75,6 +75,17 @@ void Sum<float, lang::Cuda>(int count, const Blob *input, float *ret, // follow the consistency guide of math API template <> +void ComputeCrossEntropy<float, lang::Cuda>(const size_t batchsize, + const size_t dim, const Blob *p, + const Blob *t, Blob *loss, + Context *ctx) { + const float *pPtr = static_cast<const float *>(p->data()); + const int *tPtr = static_cast<const int *>(t->data()); + float *lossPtr = static_cast<float *>(loss->mutable_data()); + cuda::ComputeCrossEntropy(batchsize, dim, pPtr, tPtr, lossPtr, ctx->stream); +} + +template <> void Div<float, lang::Cuda>(const size_t num, const float alpha, const Blob *in, Blob *out, Context *ctx) { float *outPtr = static_cast<float *>(out->mutable_data()); @@ -82,19 +93,13 @@ void Div<float, lang::Cuda>(const size_t num, const float alpha, const Blob *in, cuda::Div(num, alpha, inPtr, outPtr, ctx->stream); } -template <> -void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out, - Context *ctx) { - float *outPtr = static_cast<float *>(out->mutable_data()); - cuda::Set(num, x, outPtr, ctx->stream); -} // NOTE: cublas uses column major order. // http://peterwittek.com/cublas-matrix-c-style.html template <> void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow, const size_t ncol, const Blob *M, const Blob *v, Blob *out, Context *ctx) { - auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream + auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream const float *MPtr = static_cast<const float *>(M->data()); const float *vPtr = static_cast<const float *>(v->data()); float *outPtr = static_cast<float *>(out->mutable_data()); @@ -121,7 +126,7 @@ void GEMM<float, lang::Cuda>(const bool transA, const bool transB, const float *APtr = static_cast<const float *>(A->data()); const float *BPtr = static_cast<const float *>(B->data()); float *CPtr = static_cast<float *>(C->mutable_data()); - auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream + auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream CUBLAS_CHECK(cublasSgemm(handle, transb, transa, ncolB, nrowA, ncolA, &alpha, BPtr, ldb, APtr, lda, &beta, CPtr, ldc)); } @@ -155,9 +160,25 @@ void LT<float, lang::Cuda>(const size_t num, const Blob* in, const float x, cuda::LT(num, inPtr, x, outPtr, ctx->stream); } +template<> +void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out, + Context *ctx) { + float *outPtr = static_cast<float *>(out->mutable_data()); + cuda::Set(num, x, outPtr, ctx->stream); +} - - +template <> +void SoftmaxCrossEntropyBwd<float, lang::Cuda>(const size_t batchsize, + const size_t dim, const Blob *p, + const Blob *t, Blob *grad, + Context *ctx) { + CHECK_EQ(p, grad) << "Use the same pointer to optimize performance"; + const float *pPtr = static_cast<const float *>(p->data()); + const int *tPtr = static_cast<const int *>(t->data()); + float *gradPtr = static_cast<float *>(grad->mutable_data()); + cuda::SoftmaxCrossEntropyBwd(batchsize, dim, pPtr, tPtr, gradPtr, + ctx->stream); +} } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/layer/softmax.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/softmax.cc b/src/model/layer/softmax.cc index 813ebf0..8af1d76 100644 --- a/src/model/layer/softmax.cc +++ b/src/model/layer/softmax.cc @@ -26,10 +26,11 @@ void Softmax::Setup(const LayerConf& conf) { const Tensor Softmax::Forward(int flag, const Tensor& input) { if (input.nDim() == 1) { - Tensor tmp = Reshape(input, Shape{1, input.Size()}); - buf_.push(SoftMax(tmp, 0)); + buf_.push(SoftMax(input)); } else { - buf_.push(SoftMax(input, axis_)); + size_t nrow = Product(input.shape(), 0, axis_); + const Tensor& tmp = Reshape(input, Shape{nrow, input.Size() / nrow}); + buf_.push(SoftMax(tmp)); } return buf_.top(); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/cross_entropy.h ---------------------------------------------------------------------- diff --git a/src/model/loss/cross_entropy.h b/src/model/loss/cross_entropy.h deleted file mode 100644 index 815b795..0000000 --- a/src/model/loss/cross_entropy.h +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef SRC_MODEL_LOSS_CROSS_ENTROPY_H_ -#define SRC_MODEL_LOSS_CROSS_ENTROPY_H_ -#include <stack> -#include "singa/model/loss.h" - -namespace singa { - -/// Cross entropy is for cross entropy loss. -class CrossEntropy : public Loss<Tensor> { - public: - /// Compute the loss values for each sample/instance given the prediction - /// and the target, which is sum {-log(prob_of_truth)} - /// Users can call Average(const Tensor&) to get the average - /// loss value over all samples in the batch. - Tensor Forward(const Tensor& prediction, const Tensor& target) override; - - /// Compute the gradients of the loss values w.r.t. the prediction, - /// which is: if the entry x corresponds to ground truth, - /// then softmax(x) - 1; else, softmax(x) - Tensor Backward() override; - - private: - // to buffer intermediate data, i.e., softmax(prediction), target - std::stack<Tensor> buf_; -}; - -Tensor CrossEntropy::Forward(const Tensor& prediction, const Tensor& target) { - CHECK(buf_.empty()) << "Do not call Forward successively for more than twice." - << " The calling pattern is [Forward|Evaluate] Backward"; - - size_t batchsize = 1; - if (prediction.nDim() > 1) batchsize = prediction.shape().at(0); - size_t dim = prediction.Size() / batchsize; - // a temporal Softmax layer for forward computation -// LayerConf conf; // TODO(kaiping): this is currently commented -// Softmax softmax_tmp; -// softmax_tmp.Setup(conf); -// Tensor softmax = softmax_tmp.Forward(0, prediction); - - Tensor softmax(Shape{batchsize, dim}); // TODO(kaiping): Delete -// softmax.SetValue<float>(0.5f); // TODO(kaiping): Delete - - softmax.Reshape(Shape{batchsize, dim}); - // buffer intermediate data - buf_.push(softmax); - buf_.push(target); - - // Compute loss for each sample - Tensor loss(Shape{batchsize, 1}); - float * pre_ptr = reinterpret_cast<float*>(softmax.blob()->mutable_data()); - float * truth_ptr = reinterpret_cast<float*>(target.blob()->mutable_data()); - float * loss_ptr = reinterpret_cast<float*>(loss.blob()->mutable_data()); - for (size_t i = 0; i < batchsize; i++) { - int ilabel = static_cast<int>(truth_ptr[i]); - CHECK_GE(ilabel, 0); - float prob_of_truth = pre_ptr[ilabel]; - loss_ptr[i] = -log(prob_of_truth); - pre_ptr += dim; // change to the next sample - } - return loss; -} - -Tensor CrossEntropy::Backward() { - const Tensor& target = buf_.top(); - buf_.pop(); - Tensor softmax = buf_.top(); - buf_.pop(); - - size_t batchsize = 1; - if (softmax.nDim() > 1) - batchsize = softmax.shape().at(0); - size_t dim = softmax.Size() / batchsize; - float * truth_ptr = reinterpret_cast<float*>(target.blob()->mutable_data()); - float * pre_ptr = reinterpret_cast<float*>(softmax.blob()->mutable_data()); - for (size_t i = 0; i < batchsize; i++) { - int ilabel = static_cast<int>(truth_ptr[i]); - // CHECK_GE(ilabel, 0); - pre_ptr[ilabel] -= 1.0; - pre_ptr += dim; // change to the next sample - } - return softmax; -} -} // namespace singa - -#endif // SRC_MODEL_LOSS_CROSS_ENTROPY_H_ - - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/mse.cc ---------------------------------------------------------------------- diff --git a/src/model/loss/mse.cc b/src/model/loss/mse.cc new file mode 100644 index 0000000..a4bbb72 --- /dev/null +++ b/src/model/loss/mse.cc @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "singa/model/loss.h" + +namespace singa { + +Tensor MSE::Forward(const Tensor& prediction, const Tensor& target) { + CHECK(buf_.empty()) << "Do not call Forward successively for more than twice." + << " The calling pattern is [Forward|Evaluate] Backward"; + Tensor t = prediction - target; + size_t batchsize = 1; + if (t.nDim() > 1) batchsize = t.shape().at(0); + size_t dim = t.Size() / batchsize; + t.Reshape(Shape{batchsize, dim}); + buf_.push(t); + // TODO(wangwei) use CastType for operator/ + return Sum(Square(t), 1) * 0.5f; +} + +Tensor MSE::Backward() { + Tensor ret = buf_.top(); + buf_.pop(); + return ret * (1.0f / ret.shape().at(0)); +} +} // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/mse.h ---------------------------------------------------------------------- diff --git a/src/model/loss/mse.h b/src/model/loss/mse.h deleted file mode 100644 index 1a022f9..0000000 --- a/src/model/loss/mse.h +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef SINGA_MODEL_LOSS_MSE_H_ -#define SINGA_MODEL_LOSS_MSE_H_ -#include <stack> -#include "singa/model/loss.h" - -namespace singa { - -/// MSE is for mean squared error or squared euclidean distance. -class MSE : public Loss<Tensor> { - public: - /// Compute the loss values for each sample/instance given the prediction - /// and the target, which is 0.5/||prediction-target||^2 - /// Users can call Average(const Tensor&) to get the average - /// loss value over all samples in the batch. - Tensor Forward(const Tensor& prediction, const Tensor& target) override; - - /// Compute the gradients of the loss values w.r.t. the prediction, - /// which is (prediction-target)/batchsize - Tensor Backward() override; - - private: - // to buffer intermediate data, i.e., prediction-target - std::stack<Tensor> buf_; -}; - -Tensor MSE::Forward(const Tensor& prediction, const Tensor& target) { - CHECK(buf_.empty()) << "Do not call Forward successively for more than twice." - << " The calling pattern is [Forward|Evaluate] Backward"; - Tensor t = prediction - target; - size_t batchsize = 1; - if (t.nDim() > 1) batchsize = t.shape().at(0); - size_t dim = t.Size() / batchsize; - t.Reshape(Shape{batchsize, dim}); - buf_.push(t); - // TODO(wangwei) use CastType for operator/ - return Sum(Square(t), 1) * 0.5f; -} - -Tensor MSE::Backward() { - Tensor ret = buf_.top(); - buf_.pop(); - return ret * (1.0f / ret.shape().at(0)); -} -} // namespace singa - -#endif // SINGA_MODEL_LOSS_H_ - - http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/softmax_cross_entropy.cc ---------------------------------------------------------------------- diff --git a/src/model/loss/softmax_cross_entropy.cc b/src/model/loss/softmax_cross_entropy.cc new file mode 100644 index 0000000..4ca323a --- /dev/null +++ b/src/model/loss/softmax_cross_entropy.cc @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stack> +#include "singa/model/loss.h" + +namespace singa { + + +Tensor SoftmaxCrossEntropy::Forward(const Tensor& prediction, const Tensor& target) { + CHECK(buf_.empty()) << "Do not call Forward successively for more than twice." + << " The calling pattern is [Forward|Evaluate] Backward"; + size_t batchsize = 1; + if (prediction.nDim() > 1) batchsize = prediction.shape().at(0); + size_t dim = prediction.Size() / batchsize; + const Tensor& input = Reshape(prediction, Shape{batchsize, dim}); + Tensor prob = SoftMax(input); + + // buffer intermediate data + buf_.push(prob); + buf_.push(target); + Tensor loss = prob.Clone(); + + ComputeCrossEntropy(target, &loss); + return loss; +} + +Tensor SoftmaxCrossEntropy::Backward() { + const Tensor target = buf_.top(); + buf_.pop(); + Tensor prob = buf_.top(); + buf_.pop(); + SoftmaxCrossEntropyBwd(target, &prob); + return prob; +} +} // namespace singa + + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/test/singa/test_cross_entropy.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_cross_entropy.cc b/test/singa/test_cross_entropy.cc index 9bb2321..6b8cb69 100644 --- a/test/singa/test_cross_entropy.cc +++ b/test/singa/test_cross_entropy.cc @@ -22,16 +22,15 @@ #include "gtest/gtest.h" #include "singa/core/tensor.h" #include "singa/core/device.h" -#include "../src/model/loss/cross_entropy.h" +#include "singa/model/loss.h" +#include "singa_config.h" using singa::Tensor; -class TestCrossEntropy : public ::testing::Test { +class TestSoftmaxCrossEntropy : public ::testing::Test { protected: virtual void SetUp() { p.Reshape(singa::Shape{2, 4}); t.Reshape(singa::Shape{2, 1}); - p.CopyDataFromHostPtr(pdat, sizeof(pdat) / sizeof(float)); - t.CopyDataFromHostPtr(tdat, sizeof(pdat) / sizeof(float)); } const float pdat[8] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1}; const float tdat[2] = {0.0, 2.0}; @@ -39,8 +38,11 @@ class TestCrossEntropy : public ::testing::Test { singa::Tensor p, t; }; -TEST_F(TestCrossEntropy, CppForward) { - singa::CrossEntropy cross_entropy; +TEST_F(TestSoftmaxCrossEntropy, CppForward) { + p.CopyDataFromHostPtr(pdat, 8); + t.CopyDataFromHostPtr(tdat, 2); + + singa::SoftmaxCrossEntropy cross_entropy; const Tensor& loss = cross_entropy.Forward(p, t); auto ldat = loss.data<const float*>(); @@ -49,8 +51,11 @@ TEST_F(TestCrossEntropy, CppForward) { EXPECT_FLOAT_EQ(ldat[1], result_test); } -TEST_F(TestCrossEntropy, CppBackward) { - singa::CrossEntropy cross_entropy; +TEST_F(TestSoftmaxCrossEntropy, CppBackward) { + p.CopyDataFromHostPtr(pdat, 8); + t.CopyDataFromHostPtr(tdat, 2); + + singa::SoftmaxCrossEntropy cross_entropy; cross_entropy.Forward(p, t); const Tensor& grad = cross_entropy.Backward(); @@ -64,3 +69,46 @@ TEST_F(TestCrossEntropy, CppBackward) { EXPECT_FLOAT_EQ(gdat[6], -0.75); EXPECT_FLOAT_EQ(gdat[7], 0.25); } + +#ifdef USE_CUDA + +TEST_F(TestSoftmaxCrossEntropy, CudaForward) { + singa::SoftmaxCrossEntropy cross_entropy; + singa::CudaGPU dev; + p.ToDevice(&dev); + t.ToDevice(&dev); + p.CopyDataFromHostPtr(pdat, 8); + t.CopyDataFromHostPtr(tdat, 2); + + Tensor loss = cross_entropy.Forward(p, t); + loss.ToHost(); + auto ldat = loss.data<const float*>(); + + const float result_test = -log(0.25); + EXPECT_FLOAT_EQ(ldat[0], result_test); + EXPECT_FLOAT_EQ(ldat[1], result_test); +} + +TEST_F(TestSoftmaxCrossEntropy, CudaBackward) { + singa::SoftmaxCrossEntropy cross_entropy; + singa::CudaGPU dev; + p.ToDevice(&dev); + t.ToDevice(&dev); + p.CopyDataFromHostPtr(pdat, 8); + t.CopyDataFromHostPtr(tdat, 2); + + cross_entropy.Forward(p, t); + Tensor grad = cross_entropy.Backward(); + + grad.ToHost(); + auto gdat = grad.data<const float*>(); + EXPECT_FLOAT_EQ(gdat[0], -0.75); + EXPECT_FLOAT_EQ(gdat[1], 0.25); + EXPECT_FLOAT_EQ(gdat[2], 0.25); + EXPECT_FLOAT_EQ(gdat[3], 0.25); + EXPECT_FLOAT_EQ(gdat[4], 0.25); + EXPECT_FLOAT_EQ(gdat[5], 0.25); + EXPECT_FLOAT_EQ(gdat[6], -0.75); + EXPECT_FLOAT_EQ(gdat[7], 0.25); +} +#endif // USE_CUDA http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/test/singa/test_mse.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_mse.cc b/test/singa/test_mse.cc index 67f583c..a6bd1c3 100644 --- a/test/singa/test_mse.cc +++ b/test/singa/test_mse.cc @@ -22,8 +22,9 @@ #include "gtest/gtest.h" #include "singa/core/tensor.h" #include "singa/core/device.h" -#include "../src/model/loss/mse.h" +#include "singa/model/loss.h" #include "singa_config.h" + using singa::Tensor; class TestMSE : public ::testing::Test { protected: @@ -66,6 +67,8 @@ TEST_F(TestMSE, CppBackward) { EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i])); } #endif + +#ifdef USE_CUDA TEST_F(TestMSE, CudaForward) { singa::MSE mse; singa::CudaGPU dev; @@ -98,3 +101,4 @@ TEST_F(TestMSE, CudaBackward) { for (size_t i = 0; i < grad.Size(); i++) EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i])); } +#endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/test/singa/test_softmax.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_softmax.cc b/test/singa/test_softmax.cc index da2a6ef..09dfcd9 100644 --- a/test/singa/test_softmax.cc +++ b/test/singa/test_softmax.cc @@ -55,7 +55,6 @@ TEST(Softmax, Forward) { const float* yptr = out.data<const float*>(); EXPECT_EQ(n, out.Size()); - float* y = new float[n]; float* sigma = new float[row]; for (size_t i = 0; i < row; i++) sigma[i] = 0.f; @@ -63,11 +62,9 @@ TEST(Softmax, Forward) { sigma[i / col] += exp(x[i]); //EXPECT_EQ(0, sigma[1]); for (size_t i = 0; i < row; i++) - for (size_t j = 0; j < col; j++) - y[i * col + j] = exp(x[i * col + j]) / sigma[i]; - EXPECT_FLOAT_EQ(y[0], yptr[0]); - EXPECT_FLOAT_EQ(y[4], yptr[4]); - EXPECT_FLOAT_EQ(y[5], yptr[5]); + for (size_t j = 0; j < col; j++) { + EXPECT_FLOAT_EQ(yptr[i * col + j], exp(x[i * col + j]) / sigma[i]); + } } TEST(Softmax, Backward) {
