Repository: incubator-singa Updated Branches: refs/heads/dev 74f02143a -> b91002b55
SINGA-180 Add Activation layer and Softmax layer Fix a bug in cudnn softmax and let softmax support 1D or 2D tensor as input. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b91002b5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b91002b5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b91002b5 Branch: refs/heads/dev Commit: b91002b551781503507f0a15acfcd6e12279b765 Parents: 74f0214 Author: jixin <[email protected]> Authored: Thu Jun 16 22:55:02 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Fri Jun 17 15:51:46 2016 +0800 ---------------------------------------------------------------------- include/singa/model/layer.h | 2 +- src/model/layer/activation.cc | 22 +++---- src/model/layer/activation.h | 2 +- src/model/layer/batchnorm.h | 2 +- src/model/layer/convolution.h | 2 +- src/model/layer/cudnn_activation.cc | 7 +-- src/model/layer/cudnn_activation.h | 11 ++-- src/model/layer/cudnn_batchnorm.h | 36 +++++------ src/model/layer/cudnn_dropout.h | 1 + src/model/layer/cudnn_lrn.h | 32 +++++----- src/model/layer/cudnn_pooling.h | 2 +- src/model/layer/cudnn_softmax.cc | 40 +++++++++--- src/model/layer/cudnn_softmax.h | 11 +++- src/model/layer/dense.h | 2 +- src/model/layer/dropout.h | 2 +- src/model/layer/flatten.h | 2 +- src/model/layer/lrn.h | 2 +- src/model/layer/pooling.h | 2 +- src/model/layer/prelu.h | 2 +- src/model/layer/softmax.cc | 23 +++---- src/model/layer/softmax.h | 6 +- src/proto/model.proto | 4 ++ test/singa/test_cudnn_activation.cc | 5 +- test/singa/test_cudnn_softmax.cc | 105 +++++++++++++++++++++++++------ 24 files changed, 202 insertions(+), 123 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/include/singa/model/layer.h ---------------------------------------------------------------------- diff --git a/include/singa/model/layer.h b/include/singa/model/layer.h index 5f5c197..a505f15 100644 --- a/include/singa/model/layer.h +++ b/include/singa/model/layer.h @@ -79,7 +79,7 @@ class Layer { } /// Return the shape of the generated Tensor without the batchsize dimension - virtual const Shape GetOutputSampleShape() { + virtual const Shape GetOutputSampleShape() const { LOG(FATAL) << "Pls override this function"; return vector<size_t>{}; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/activation.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/activation.cc b/src/model/layer/activation.cc index 2f76a6d..b00834f 100644 --- a/src/model/layer/activation.cc +++ b/src/model/layer/activation.cc @@ -33,19 +33,15 @@ const Tensor Activation::Forward(int flag, const Tensor& input) { Tensor output; if (mode_ == "SIGMOID") { output = Sigmoid(input); - if (flag & kTrain) - buf_.push(output); + if (flag & kTrain) buf_.push(output); } else if (mode_ == "TANH") { output = Tanh(input); - if (flag & kTrain) - buf_.push(output); + if (flag & kTrain) buf_.push(output); } else if (mode_ == "RELU") { output = ReLU(input); - if (flag & kTrain) - buf_.push(input); - } else { + if (flag & kTrain) buf_.push(input); + } else LOG(FATAL) << "Unkown activation: " << mode_; - } return output; } @@ -57,15 +53,13 @@ const std::pair<Tensor, vector<Tensor>> Activation::Backward( // activation. Tensor input_grad, inout = buf_.top(); buf_.pop(); - if (mode_ == "SIGMOID") { + if (mode_ == "SIGMOID") input_grad = grad * inout * (inout * (-1.f) + 1.f); - } else if (mode_ == "TANH") { + else if (mode_ == "TANH") input_grad = grad * (inout * inout * (-1.f) + 1.f); - } else if (mode_ == "RELU") { + else if (mode_ == "RELU") input_grad = grad * (inout > 0.f) + (inout <= 0.f) * neg_slope_; - } else { - LOG(FATAL) << "Unkown activation: " << mode_; - } + else LOG(FATAL) << "Unkown activation: " << mode_; return std::make_pair(input_grad, param_grad); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/activation.h ---------------------------------------------------------------------- diff --git a/src/model/layer/activation.h b/src/model/layer/activation.h index 1799514..db3a8f5 100644 --- a/src/model/layer/activation.h +++ b/src/model/layer/activation.h @@ -30,7 +30,7 @@ class Activation : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(out_sample_shape_.size()) << "You may haven't call Setup()"; return out_sample_shape_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/batchnorm.h ---------------------------------------------------------------------- diff --git a/src/model/layer/batchnorm.h b/src/model/layer/batchnorm.h index 433e0c7..35b05b1 100644 --- a/src/model/layer/batchnorm.h +++ b/src/model/layer/batchnorm.h @@ -35,7 +35,7 @@ class BatchNorm : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(out_sample_shape_.size()) << "You may haven't call Setup()"; return out_sample_shape_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/convolution.h ---------------------------------------------------------------------- diff --git a/src/model/layer/convolution.h b/src/model/layer/convolution.h index 3901049..7ea5712 100644 --- a/src/model/layer/convolution.h +++ b/src/model/layer/convolution.h @@ -31,7 +31,7 @@ class Convolution : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const vector<size_t>& in_shape, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(out_sample_shape_.size()) << "You may haven't call Setup()"; return out_sample_shape_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/cudnn_activation.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_activation.cc b/src/model/layer/cudnn_activation.cc index 98a5758..72352b8 100644 --- a/src/model/layer/cudnn_activation.cc +++ b/src/model/layer/cudnn_activation.cc @@ -48,9 +48,8 @@ void CudnnActivation::InitCudnn(size_t size, DataType dtype) { else LOG(FATAL) << "Unkown activation: " << mode_; - nan_opt_ = CUDNN_PROPAGATE_NAN; - CUDNN_CHECK( - cudnnSetActivationDescriptor(acti_desc_, cudnn_mode_, nan_opt_, 0.0f)); + CUDNN_CHECK(cudnnSetActivationDescriptor( + acti_desc_, cudnn_mode_, CUDNN_PROPAGATE_NAN, 0.0f)); has_init_cudnn_ = true; } @@ -89,7 +88,7 @@ const Tensor CudnnActivation::Forward(int flag, const Tensor& input) { const std::pair<Tensor, vector<Tensor>> CudnnActivation::Backward( int flag, const Tensor& grad) { vector<Tensor> param_grad; - Tensor dx; // inout = buf_.top(); + Tensor dx; CHECK(!buf_.empty()); // inout means either used as input or output, only one is valid for one type // of activation http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/cudnn_activation.h ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_activation.h b/src/model/layer/cudnn_activation.h index b572db7..71bede5 100644 --- a/src/model/layer/cudnn_activation.h +++ b/src/model/layer/cudnn_activation.h @@ -41,16 +41,17 @@ class CudnnActivation : public Activation { const std::pair<Tensor, vector<Tensor>> Backward(int flag, const Tensor& grad) override; - /// Init cudnn related data structures. - void InitCudnn(size_t size, DataType dtype); const cudnnActivationMode_t CudnnMode() const { return cudnn_mode_; } private: + /// Init cudnn related data structures. + void InitCudnn(size_t size, DataType dtype); + + private: bool has_init_cudnn_ = false; - cudnnActivationDescriptor_t acti_desc_; - cudnnTensorDescriptor_t desc_; - cudnnNanPropagation_t nan_opt_; + cudnnActivationDescriptor_t acti_desc_ = nullptr; + cudnnTensorDescriptor_t desc_ = nullptr; cudnnActivationMode_t cudnn_mode_; }; } // namespace http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/cudnn_batchnorm.h ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_batchnorm.h b/src/model/layer/cudnn_batchnorm.h index 47fd4c5..36dbbce 100644 --- a/src/model/layer/cudnn_batchnorm.h +++ b/src/model/layer/cudnn_batchnorm.h @@ -29,31 +29,29 @@ namespace singa { class CudnnBatchNorm : public BatchNorm { public: - ~CudnnBatchNorm(); - /// \copy doc Layer::layer_type() - const std::string layer_type() const override { - return "CudnnBatchNorm"; - } + ~CudnnBatchNorm(); + /// \copy doc Layer::layer_type() + const std::string layer_type() const override { return "CudnnBatchNorm"; } - void Setup(const Shape& in_sample, const LayerConf& conf) override; + void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Tensor Forward(int flag, const Tensor& input) - override; - const std::pair<Tensor, vector<Tensor>> Backward( - int flag, const Tensor& grad) override; + const Tensor Forward(int flag, const Tensor& input) override; + const std::pair<Tensor, vector<Tensor>> Backward(int flag, + const Tensor& grad) override; + void ToDevice(Device* device) override; - /// Init cudnn related data structures. - void InitCudnn(const Shape& shape, DataType dtype); - void ToDevice(Device* device) override; + private: + /// Init cudnn related data structures. + void InitCudnn(const Shape& shape, DataType dtype); private: - bool has_init_cudnn_ = false; - cudnnBatchNormMode_t mode_; - cudnnLRNDescriptor_t lrn_desc_; - cudnnTensorDescriptor_t shape_desc_, param_desc_; - Tensor resultSaveMean_, resultSaveVariance_; + bool has_init_cudnn_ = false; + cudnnBatchNormMode_t mode_; + cudnnLRNDescriptor_t lrn_desc_ = nullptr; + cudnnTensorDescriptor_t shape_desc_ = nullptr, param_desc_ = nullptr; + Tensor resultSaveMean_, resultSaveVariance_; -}; // class CudnnBatchNorm +}; // class CudnnBatchNorm } // namespace #endif // USE_CUDNN http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/cudnn_dropout.h ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_dropout.h b/src/model/layer/cudnn_dropout.h index 7cb185b..83572cf 100644 --- a/src/model/layer/cudnn_dropout.h +++ b/src/model/layer/cudnn_dropout.h @@ -42,6 +42,7 @@ class CudnnDropout : public Dropout { const std::pair<Tensor, vector<Tensor>> Backward(int flag, const Tensor& grad) override; + private: /// Init cudnn related data structures. void InitCudnn(int size, DataType dtype, Device* dev, Context* ctx); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/cudnn_lrn.h ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_lrn.h b/src/model/layer/cudnn_lrn.h index 0f650fe..ddf4a37 100644 --- a/src/model/layer/cudnn_lrn.h +++ b/src/model/layer/cudnn_lrn.h @@ -29,27 +29,25 @@ namespace singa { class CudnnLRN : public LRN { public: - ~CudnnLRN(); - /// \copy doc Layer::layer_type() - const std::string layer_type() const override { - return "CudnnLRN"; - } + ~CudnnLRN(); + /// \copy doc Layer::layer_type() + const std::string layer_type() const override { return "CudnnLRN"; } - const Tensor Forward(int flag, const Tensor& input) - override; - const std::pair<Tensor, vector<Tensor>> Backward( - int flag, const Tensor& grad) override; + const Tensor Forward(int flag, const Tensor& input) override; + const std::pair<Tensor, vector<Tensor>> Backward(int flag, + const Tensor& grad) override; - /// Init cudnn related data structures. - void InitCudnn(const Shape& shape, DataType dtype); + private: + /// Init cudnn related data structures. + void InitCudnn(const Shape& shape, DataType dtype); private: - bool has_init_cudnn_ = false; - cudnnLRNMode_t mode_; - cudnnLRNDescriptor_t lrn_desc_; - cudnnTensorDescriptor_t shape_desc_; - -}; // class CudnnLRN + bool has_init_cudnn_ = false; + cudnnLRNMode_t mode_; + cudnnLRNDescriptor_t lrn_desc_ = nullptr; + cudnnTensorDescriptor_t shape_desc_ = nullptr; + +}; // class CudnnLRN } // namespcae #endif // USE_CUDNN http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/cudnn_pooling.h ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_pooling.h b/src/model/layer/cudnn_pooling.h index c3c7060..c323222 100644 --- a/src/model/layer/cudnn_pooling.h +++ b/src/model/layer/cudnn_pooling.h @@ -41,7 +41,7 @@ class CudnnPooling : public Pooling { const Tensor Forward(int flag, const Tensor &input) override; const std::pair<Tensor, vector<Tensor>> Backward(int flag, const Tensor &grad) override; - + private: /// Init cudnn related data structures. void InitCudnn(const Tensor& input); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/cudnn_softmax.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_softmax.cc b/src/model/layer/cudnn_softmax.cc index 16d4022..7efc797 100644 --- a/src/model/layer/cudnn_softmax.cc +++ b/src/model/layer/cudnn_softmax.cc @@ -26,30 +26,49 @@ CudnnSoftmax::~CudnnSoftmax() { if (desc_ != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(desc_)); } -void CudnnSoftmax::InitCudnn(size_t size, DataType dtype) { +void CudnnSoftmax::Setup(const Shape& in_sample, const LayerConf &conf) { + Softmax::Setup(in_sample, conf); + SoftmaxConf sft_conf = conf.softmax_conf(); + std::string algorithm = sft_conf.algorithm(); + CHECK(algorithm == "accurate" || algorithm == "fast" || algorithm == "log") + << "CudnnSoftmax only supports three algorithm preferences: " + << "accurate, fast and log."; + if (algorithm == "accurate") + algorithm_ = CUDNN_SOFTMAX_ACCURATE; + else if (algorithm == "fast") + algorithm_ = CUDNN_SOFTMAX_FAST; + else algorithm_ = CUDNN_SOFTMAX_LOG; +} + +void CudnnSoftmax::InitCudnn(Shape shape, DataType dtype) { CHECK(!has_init_cudnn_); CUDNN_CHECK(cudnnCreateTensorDescriptor(&desc_)); - CUDNN_CHECK(cudnnSetTensor4dDescriptor( - desc_, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), 1, 1, 1, size)); - - algorithm_ = CUDNN_SOFTMAX_ACCURATE; - mode_ = CUDNN_SOFTMAX_MODE_INSTANCE; + CHECK_LE(shape.size(), 2u) + << "Tensor shape should range from 1 to 2D;" + << "otherwise, add flatten layer to transform"; + if (shape.size() == 1u) + CUDNN_CHECK(cudnnSetTensor4dDescriptor( desc_, + CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), 1, shape[0], 1, 1)); + else + CUDNN_CHECK(cudnnSetTensor4dDescriptor( desc_, CUDNN_TENSOR_NCHW, + GetCudnnDataType(dtype), shape[0], shape[1], 1, 1)); has_init_cudnn_ = true; } const Tensor CudnnSoftmax::Forward(int flag, const Tensor& input) { - auto size = input.Size(); + auto shape = input.shape(); DataType dtype = input.data_type(); if (!has_init_cudnn_) { - InitCudnn(size, dtype); + InitCudnn(shape, dtype); } Tensor output; output.ResetLike(input); output.device()->Exec([input, output, this](Context* ctx) { Block* inblock = input.block(), * outblock = output.block(); float alpha = 1.0f, beta = 0.0f; - cudnnSoftmaxForward(ctx->cudnn_handle, this->algorithm_, this->mode_, + cudnnSoftmaxForward(ctx->cudnn_handle, this->algorithm_, + CUDNN_SOFTMAX_MODE_INSTANCE, &alpha, this->desc_, inblock->data(), &beta, this->desc_, outblock->mutable_data()); }, {input.block()}, {output.block()}); @@ -68,7 +87,8 @@ const std::pair<Tensor, vector<Tensor>> CudnnSoftmax::Backward( Block* dyblock = grad.block(), * dxblock = dx.block(), * yblock = output.block(); float alpha = 1.0f, beta = 0.0f; - cudnnSoftmaxBackward(ctx->cudnn_handle, this->algorithm_, this->mode_, + cudnnSoftmaxBackward(ctx->cudnn_handle, this->algorithm_, + CUDNN_SOFTMAX_MODE_INSTANCE, &alpha, this->desc_, yblock->data(), this->desc_, dyblock->data(), &beta, this->desc_, dxblock->mutable_data()); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/cudnn_softmax.h ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_softmax.h b/src/model/layer/cudnn_softmax.h index ee92d6f..aca3729 100644 --- a/src/model/layer/cudnn_softmax.h +++ b/src/model/layer/cudnn_softmax.h @@ -36,18 +36,23 @@ class CudnnSoftmax : public Softmax { /// \copydoc Layer::layer_type() const std::string layer_type() const override { return "CudnnSoftmax"; } + /// \copydoc Layer::Setup(const LayerConf&); + void Setup(const Shape& in_sample_shape, const LayerConf &conf) override; + const Tensor Forward(int flag, const Tensor& input) override; const std::pair<Tensor, vector<Tensor>> Backward(int flag, const Tensor& grad) override; + const cudnnSoftmaxAlgorithm_t Algorithm() const { return algorithm_; } + + private: /// Init cudnn related data structures. - void InitCudnn(size_t size, DataType dtype); + void InitCudnn(Shape shape, DataType dtype); private: bool has_init_cudnn_ = false; - cudnnTensorDescriptor_t desc_; + cudnnTensorDescriptor_t desc_ = nullptr; cudnnSoftmaxAlgorithm_t algorithm_; - cudnnSoftmaxMode_t mode_; }; } // namespace #endif // USE_CUDNN http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/dense.h ---------------------------------------------------------------------- diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h index 6704106..8438d5c 100644 --- a/src/model/layer/dense.h +++ b/src/model/layer/dense.h @@ -32,7 +32,7 @@ class Dense : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(hdim_) << "You may haven't call Setup()"; return vector<size_t>{hdim_}; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/dropout.h ---------------------------------------------------------------------- diff --git a/src/model/layer/dropout.h b/src/model/layer/dropout.h index e9ff798..14be6a0 100644 --- a/src/model/layer/dropout.h +++ b/src/model/layer/dropout.h @@ -30,7 +30,7 @@ class Dropout : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(out_sample_shape_.size()) << "You may haven't call Setup()"; return out_sample_shape_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/flatten.h ---------------------------------------------------------------------- diff --git a/src/model/layer/flatten.h b/src/model/layer/flatten.h index 0981f32..6ac90c2 100644 --- a/src/model/layer/flatten.h +++ b/src/model/layer/flatten.h @@ -30,7 +30,7 @@ class Flatten : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(out_sample_shape_.size()) << "You may haven't call Setup()"; return out_sample_shape_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/lrn.h ---------------------------------------------------------------------- diff --git a/src/model/layer/lrn.h b/src/model/layer/lrn.h index a165d12..0632f8c 100644 --- a/src/model/layer/lrn.h +++ b/src/model/layer/lrn.h @@ -33,7 +33,7 @@ class LRN : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(out_sample_shape_.size()) << "You may haven't call Setup()"; return out_sample_shape_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/pooling.h ---------------------------------------------------------------------- diff --git a/src/model/layer/pooling.h b/src/model/layer/pooling.h index ddee45b..26a1d07 100644 --- a/src/model/layer/pooling.h +++ b/src/model/layer/pooling.h @@ -31,7 +31,7 @@ class Pooling : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(out_sample_shape_.size()) << "You may haven't call Setup()"; return out_sample_shape_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/prelu.h ---------------------------------------------------------------------- diff --git a/src/model/layer/prelu.h b/src/model/layer/prelu.h index 7387bfb..ee571e1 100644 --- a/src/model/layer/prelu.h +++ b/src/model/layer/prelu.h @@ -32,7 +32,7 @@ class PReLU : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(out_sample_shape_.size()) << "You may haven't call Setup()"; return out_sample_shape_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/softmax.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/softmax.cc b/src/model/layer/softmax.cc index 25bb9fe..cccb06b 100644 --- a/src/model/layer/softmax.cc +++ b/src/model/layer/softmax.cc @@ -21,8 +21,6 @@ namespace singa { void Softmax::Setup(const Shape& in_sample, const LayerConf& conf) { Layer::Setup(in_sample, conf); - // TODO(wangwei) disable axis, use a flatten layer to reshape the tensor. - // axis_ = conf.softmax_conf().axis(); // default is 1 CHECK_EQ(in_sample.size(), 1u); out_sample_shape_ = in_sample; } @@ -30,11 +28,6 @@ void Softmax::Setup(const Shape& in_sample, const LayerConf& conf) { const Tensor Softmax::Forward(int flag, const Tensor& input) { CHECK_LE(input.nDim(), 2u); Tensor output = SoftMax(input); - /* - size_t nrow = Product(input.shape(), 0, axis_); - const Tensor& tmp = Reshape(input, Shape{nrow, input.Size() / nrow}); - output = SoftMax(tmp); - */ if (flag & kTrain) buf_.push(output); return output; @@ -43,19 +36,21 @@ const Tensor Softmax::Forward(int flag, const Tensor& input) { const std::pair<Tensor, vector<Tensor>> Softmax::Backward(int flag, const Tensor& grad) { CHECK_LE(grad.nDim(), 2u); - size_t nrow = 1, ncol = grad.Size(); Tensor input_grad = grad.Clone(); + CHECK(!buf_.empty()); + Tensor y = buf_.top(); + buf_.pop(); + CHECK(y.shape() == input_grad.shape()); + Tensor sigma = input_grad * y; + + size_t nrow = 1, ncol = grad.Size(); if (grad.nDim() > 1) { nrow = grad.shape(0); ncol = grad.shape(1); } else { input_grad.Reshape({nrow, ncol}); + sigma.Reshape({nrow, ncol}); } - CHECK(!buf_.empty()); - Tensor y = buf_.top(); - buf_.pop(); - CHECK(y.shape() == input_grad.shape()); - Tensor sigma = input_grad * y; Tensor sum(Shape{nrow}, grad.device(), grad.data_type()); SumColumns(sigma, &sum); // dL / dy_i = grad_i @@ -65,6 +60,8 @@ const std::pair<Tensor, vector<Tensor>> Softmax::Backward(int flag, // dL / dx_i = y_i * (grad_i - sum), where sum = sum_i(grad_i * y_i); SubColumn(sum, &input_grad); input_grad = input_grad * y; + if (grad.nDim() == 1) + input_grad.Reshape(Shape{ncol}); // Mult(input_grad, y, &input_grad); vector<Tensor> param_grad; return std::make_pair(input_grad, param_grad); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/model/layer/softmax.h ---------------------------------------------------------------------- diff --git a/src/model/layer/softmax.h b/src/model/layer/softmax.h index fed544e..837b23a 100644 --- a/src/model/layer/softmax.h +++ b/src/model/layer/softmax.h @@ -20,6 +20,7 @@ #include "singa/model/layer.h" #include <stack> namespace singa { +/// Do softmax for 1D or 2D tensors along the last dimension. class Softmax : public Layer { public: /// \copydoc Layer::layer_type() @@ -27,7 +28,7 @@ class Softmax : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const Shape& in_sample, const LayerConf& conf) override; - const Shape GetOutputSampleShape() { + const Shape GetOutputSampleShape() const override { CHECK(out_sample_shape_.size()) << "You may haven't call Setup()"; return out_sample_shape_; } @@ -39,10 +40,7 @@ class Softmax : public Layer { const std::pair<Tensor, vector<Tensor>> Backward(int flag, const Tensor& grad) override; - const int Axis() const { return axis_; } - protected: - int axis_; std::stack<Tensor> buf_; Shape out_sample_shape_; }; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/src/proto/model.proto ---------------------------------------------------------------------- diff --git a/src/proto/model.proto b/src/proto/model.proto index e9746c1..c06deec 100644 --- a/src/proto/model.proto +++ b/src/proto/model.proto @@ -829,6 +829,10 @@ message SoftmaxConf { // from the end (e.g., -1 for the last axis). // Any other axes will be evaluated as independent softmaxes. // optional int32 axis = 2 [default = 1]; + + /// The cudnn algorithm preferences + /// Options are: accurate, fast and log + optional string algorithm = 50 [default = "accurate"]; } message TanHConf { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/test/singa/test_cudnn_activation.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_cudnn_activation.cc b/test/singa/test_cudnn_activation.cc index 0dac497..da8ec62 100644 --- a/test/singa/test_cudnn_activation.cc +++ b/test/singa/test_cudnn_activation.cc @@ -39,8 +39,7 @@ TEST(TCudnnActivation, Setup) { reluconf->set_negative_slope(0.5f); acti.Setup(Shape{3}, conf); - acti.InitCudnn(1, singa::kFloat32); - EXPECT_EQ(CUDNN_ACTIVATION_RELU, acti.CudnnMode()); +// EXPECT_EQ(CUDNN_ACTIVATION_RELU, acti.CudnnMode()); EXPECT_EQ(0.5f, acti.Negative_slope()); } @@ -63,7 +62,6 @@ TEST(TCudnnActivation, Forward) { reluconf->set_negative_slope(neg_slope); } acti.Setup(Shape{n}, conf); - // acti.InitCudnn(n, singa::kFloat32); singa::Tensor out = acti.Forward(singa::kTrain, in); EXPECT_EQ(n, out.Size()); @@ -103,7 +101,6 @@ TEST(TCudnnActivation, Backward) { reluconf->set_negative_slope(neg_slope); } acti.Setup(Shape{n}, conf); - acti.InitCudnn(n, singa::kFloat32); singa::Tensor out = acti.Forward(singa::kTrain, in); EXPECT_EQ(n, out.Size()); singa::CppCPU host(0, 1); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b91002b5/test/singa/test_cudnn_softmax.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_cudnn_softmax.cc b/test/singa/test_cudnn_softmax.cc index d671ecf..067491f 100644 --- a/test/singa/test_cudnn_softmax.cc +++ b/test/singa/test_cudnn_softmax.cc @@ -34,23 +34,25 @@ TEST(CudnnSoftmax, Setup) { EXPECT_EQ("CudnnSoftmax", sft.layer_type()); singa::LayerConf conf; - - sft.Setup(Shape{4}, conf); - sft.InitCudnn(1, singa::kFloat32); + singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); + softmaxconf->set_algorithm("fast"); + sft.Setup(Shape{1}, conf); + EXPECT_EQ(CUDNN_SOFTMAX_FAST, sft.Algorithm()); } -TEST(CudnnSoftmax, Forward) { - const float x[] = {1.0f, 2.0f, 0.0f, -2.0f, -3.0f, -1.0}; +TEST(CudnnSoftmax, Forward1D) { + const float x[] = {1.f, 2.f, 0.f, -2.f, -3.f, -1.f}; size_t n = sizeof(x) / sizeof(float); singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{n}, &cuda); + singa::Shape shape = {n}; + singa::Tensor in(shape, &cuda); in.CopyDataFromHostPtr<float>(x, n); CudnnSoftmax sft; singa::LayerConf conf; + singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); + softmaxconf->set_algorithm("accurate"); sft.Setup(Shape{1}, conf); - sft.InitCudnn(n, singa::kFloat32); - singa::Tensor out = sft.Forward(singa::kTrain, in); singa::CppCPU host(0, 1); out.ToDevice(&host); @@ -61,28 +63,30 @@ TEST(CudnnSoftmax, Forward) { float sigma = 0.f; for (size_t i = 0; i < n; i++) sigma += exp(x[i]); for (size_t i = 0; i < n; i++) y[i] = exp(x[i]) / sigma; - EXPECT_FLOAT_EQ(y[0], yptr[0]); - EXPECT_FLOAT_EQ(y[4], yptr[4]); - EXPECT_FLOAT_EQ(y[5], yptr[5]); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(y[i], yptr[i]); } -TEST(CudnnSoftmax, Backward) { - const float x[] = {1.0f, 2.0f, 3.0f, -2.0f, -3.0f, -1.0}; +TEST(CudnnSoftmax, Backward1D) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f}; size_t n = sizeof(x) / sizeof(float); singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{n}, &cuda); + singa::Shape shape = {n}; + singa::Tensor in(shape, &cuda); in.CopyDataFromHostPtr<float>(x, n); CudnnSoftmax sft; singa::LayerConf conf; + singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); + softmaxconf->set_algorithm("accurate"); sft.Setup(Shape{1}, conf); + singa::Tensor out = sft.Forward(singa::kTrain, in); singa::CppCPU host(0, 1); out.ToDevice(&host); const float* yptr = out.data<const float*>(); - const float grad[] = {2.0f, -3.0f, 1.0f, 3.0f, -1.0f, -2.0}; - singa::Tensor out_diff(singa::Shape{n}, &cuda); + const float grad[] = {2.f, -3.f, 1.f, 3.f, -1.f, -2.f}; + singa::Tensor out_diff(shape, &cuda); out_diff.CopyDataFromHostPtr<float>(grad, n); const auto ret = sft.Backward(singa::kTrain, out_diff); singa::Tensor in_diff = ret.first; @@ -93,8 +97,71 @@ TEST(CudnnSoftmax, Backward) { float sigma = 0.f; for (size_t i = 0; i < n; i++) sigma += grad[i] * yptr[i]; for (size_t i = 0; i < n; i++) dx[i] = (grad[i] - sigma) * yptr[i]; - EXPECT_FLOAT_EQ(dx[0], xptr[0]); - EXPECT_FLOAT_EQ(dx[4], xptr[4]); - EXPECT_FLOAT_EQ(dx[5], xptr[5]); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dx[i], xptr[i]); +} + +TEST(CudnnSoftmax, Forward2D) { + const float x[] = {1.f, 2.f, 0.f, -2.f, -3.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); + size_t batch = 2, c = 3; + singa::CudaGPU cuda(0, 1); + singa::Shape shape = {batch, c}; + singa::Tensor in(shape, &cuda); + in.CopyDataFromHostPtr<float>(x, n); + + CudnnSoftmax sft; + singa::LayerConf conf; + singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); + softmaxconf->set_algorithm("accurate"); + sft.Setup(Shape{c}, conf); + + singa::Tensor out = sft.Forward(singa::kTrain, in); + singa::CppCPU host(0, 1); + out.ToDevice(&host); + const float* yptr = out.data<const float*>(); + EXPECT_EQ(n, out.Size()); + + float* y = new float[n]; + float* sigma = new float[batch]; + for (size_t i = 0; i < batch; i++) sigma[i] = 0.f; + for (size_t i = 0; i < n; i++) sigma[i / c] += exp(x[i]); + for (size_t i = 0; i < n; i++) y[i] = exp(x[i]) / sigma[i / c]; + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(y[i], yptr[i]); +} + +TEST(CudnnSoftmax, Backward2D) { + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f}; + size_t n = sizeof(x) / sizeof(float); + size_t batch = 2, c = 3; + singa::CudaGPU cuda(0, 1); + singa::Shape shape = {batch, c}; + singa::Tensor in(shape, &cuda); + in.CopyDataFromHostPtr<float>(x, n); + + CudnnSoftmax sft; + singa::LayerConf conf; + singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf(); + softmaxconf->set_algorithm("accurate"); + sft.Setup(Shape{c}, conf); + + singa::Tensor out = sft.Forward(singa::kTrain, in); + singa::CppCPU host(0, 1); + out.ToDevice(&host); + const float* yptr = out.data<const float*>(); + + const float grad[] = {2.f, -3.f, 1.f, 3.f, -1.f, -2.f}; + singa::Tensor out_diff(shape, &cuda); + out_diff.CopyDataFromHostPtr<float>(grad, n); + const auto ret = sft.Backward(singa::kTrain, out_diff); + singa::Tensor in_diff = ret.first; + in_diff.ToDevice(&host); + const float* xptr = in_diff.data<const float*>(); + + float* dx = new float[n]; + float* sigma = new float[batch]; + for (size_t i = 0; i < batch; i++) sigma[i] = 0.f; + for (size_t i = 0; i < n; i++) sigma[i / c] += grad[i] * yptr[i]; + for (size_t i = 0; i < n; i++) dx[i] = (grad[i] - sigma[i / c]) * yptr[i]; + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dx[i], xptr[i]); } #endif // USE_CUDNN
