SINGA-174 Add Batch Normalization layer and Local Response Nomalization layer.
Add batch normalization layer inplementation in C++ language. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/96ed638b Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/96ed638b Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/96ed638b Branch: refs/heads/dev Commit: 96ed638b7b7c1fa66072aaf94f97804b634bdcb3 Parents: f07e354 Author: WANG Ji <[email protected]> Authored: Thu Jul 28 21:53:43 2016 +0800 Committer: WANG Ji <[email protected]> Committed: Wed Aug 3 16:49:13 2016 +0800 ---------------------------------------------------------------------- src/model/layer/batchnorm.cc | 120 ++++++++++++++++++++++++++++++++-- test/singa/test_batchnorm.cc | 132 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 248 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/96ed638b/src/model/layer/batchnorm.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc index e583f8e..b6edc9e 100644 --- a/src/model/layer/batchnorm.cc +++ b/src/model/layer/batchnorm.cc @@ -39,7 +39,7 @@ void BatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) { dbnBias_.ResetLike(bnBias_); // Push back params into param_values_ // Assume the order of param is: bnScale, bnBias, runningMean, runningVariance - for (const auto &spec : conf.param()) param_specs_.push_back(spec); + for (const auto& spec : conf.param()) param_specs_.push_back(spec); } void BatchNorm::ToDevice(std::shared_ptr<Device> device) { @@ -52,16 +52,128 @@ void BatchNorm::ToDevice(std::shared_ptr<Device> device) { } const Tensor BatchNorm::Forward(int flag, const Tensor& input) { - LOG(FATAL) << "Not implemented"; - Tensor output; + Tensor x = input.Clone(); + x.Reshape(Shape{input.shape(0), input.Size() / input.shape(0)}); + Tensor output, mean, var, xnorm; + output.ResetLike(x); + + if ((flag & kTrain) == kTrain) { + mean = Average(x, 0); + runningMean_ *= 1.0f - factor_; + Axpy(factor_, mean, &runningMean_); + xnorm = x.Clone(); + SubRow(mean, &xnorm); + xnorm = Square(xnorm); + var = Average(xnorm, 0); + runningVariance_ *= 1.0f - factor_; + Axpy(factor_, var, &runningVariance_); + Tensor tmp = var.Clone(); + tmp += 1e-6f; + tmp = Sqrt(tmp); + xnorm = x.Clone(); + SubRow(mean, &xnorm); + DivRow(tmp, &xnorm); + output = xnorm.Clone(); + MultRow(bnScale_, &output); + AddRow(bnBias_, &output); + buf_.push(x); + buf_.push(mean); + buf_.push(var); + buf_.push(xnorm); + } else { + xnorm = x.Clone(); + SubRow(runningMean_, &xnorm); + Tensor tmp = runningVariance_.Clone(); + tmp += 1e-6f; + tmp = Sqrt(tmp); + DivRow(tmp, &xnorm); + output = xnorm.Clone(); + MultRow(bnScale_, &output); + AddRow(bnBias_, &output); + } + + output.Reshape(Shape{output.shape(0), channels_, height_, width_}); return output; } const std::pair<Tensor, vector<Tensor>> BatchNorm::Backward( int flag, const Tensor& grad) { - LOG(FATAL) << "Not implemented"; + Tensor dy = grad.Clone(); + dy.Reshape(Shape{grad.shape(0), grad.Size() / grad.shape(0)}); + Tensor xnorm = buf_.top(); + buf_.pop(); + Tensor var = buf_.top(); + buf_.pop(); + Tensor mean = buf_.top(); + buf_.pop(); + Tensor input = buf_.top(); + buf_.pop(); + Tensor dx; vector<Tensor> param_grad; + + if ((flag & kTrain) == kTrain) { + // gxnrom + Tensor gxnorm = dy.Clone(); + MultRow(bnScale_, &gxnorm); + // gvar + Tensor tmp = var.Clone(); + tmp += 1e-6f; + tmp = Pow(var, -1.5f); + tmp *= -0.5f; + + Tensor tmpx = input.Clone(); + SubRow(mean, &tmpx); + + tmpx = tmpx * gxnorm; + MultRow(tmp, &tmpx); + Tensor gvar; + gvar.ResetLike(var); + SumRows(tmpx, &gvar); + // gmean + tmp = var.Clone(); + tmp += 1e-6f; + tmp = Pow(tmp, -0.5f); + tmp *= -1.0f; + Tensor tmpx_r; + tmpx_r.ResetLike(tmp); + SumRows(gxnorm, &tmpx_r); + Tensor gmean = tmpx_r * tmp; + + tmpx = input.Clone(); + SubRow(mean, &tmpx); + SumRows(tmpx, &tmp); + tmp *= -2.0f / input.shape(0); + tmp = tmp * gvar; + gmean = gmean + tmp; + // dx + tmp = var.Clone(); + tmp += 1e-6f; + tmp = Pow(tmp, -0.5f); + dx = gxnorm.Clone(); + MultRow(tmp, &dx); + + tmpx = input.Clone(); + SubRow(mean, &tmpx); + tmpx *= 2.0f / input.shape(0); + MultRow(gvar, &tmpx); + dx = dx + tmpx; + + tmp = gmean.Clone(); + tmp *= 1.0f / input.shape(0); + + AddRow(tmp, &dx); + // dbnScale + tmpx = dy * xnorm; + SumRows(tmpx, &dbnScale_); + // dbnBias + SumRows(dy, &dbnBias_); + param_grad.push_back(dbnScale_); + param_grad.push_back(dbnBias_); + } else { + LOG(ERROR) << "Do not call backward for evaluation phase"; + } + dx.Reshape(Shape{dx.shape(0), channels_, height_, width_}); return std::make_pair(dx, param_grad); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/96ed638b/test/singa/test_batchnorm.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_batchnorm.cc b/test/singa/test_batchnorm.cc new file mode 100644 index 0000000..c72dc0f --- /dev/null +++ b/test/singa/test_batchnorm.cc @@ -0,0 +1,132 @@ +/********************************************************* +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +************************************************************/ + +#include "../src/model/layer/batchnorm.h" +#include "gtest/gtest.h" +#include <iostream> + +using namespace singa; + +TEST(BatchNorm, Setup) { + BatchNorm batchnorm; + EXPECT_EQ("BatchNorm", batchnorm.layer_type()); + + singa::LayerConf conf; + singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf(); + batchnorm_conf->set_factor(0.01); + batchnorm.Setup(Shape{2, 4, 4}, conf); + + EXPECT_FLOAT_EQ(0.01, batchnorm.factor()); + EXPECT_EQ(2u, batchnorm.channels()); + EXPECT_EQ(4u, batchnorm.height()); + EXPECT_EQ(4u, batchnorm.width()); +} + +TEST(BatchNorm, Forward) { + BatchNorm batchnorm; + const float x[] = {1, 2, 3, 4}; + Tensor in(Shape{2, 1, 2, 1}); + in.CopyDataFromHostPtr(x, 2 * 1 * 2 * 1); + const float alpha_[] = {1, 1}; + Tensor alpha(Shape{1, 2}); + alpha.CopyDataFromHostPtr(alpha_, 1 * 2); + + const float beta_[] = {2, 2}; + Tensor beta(Shape{1, 2}); + beta.CopyDataFromHostPtr(beta_, 1 * 2); + singa::LayerConf conf; + singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf(); + batchnorm_conf->set_factor(1); + batchnorm.Setup(Shape{1, 2, 1}, conf); + batchnorm.set_bnScale(alpha); + batchnorm.set_bnBias(beta); + batchnorm.set_runningMean(beta); + batchnorm.set_runningVariance(beta); + Tensor out = batchnorm.Forward(kTrain, in); + const float *outptr = out.data<float>(); + const auto &shape = out.shape(); + EXPECT_EQ(4u, shape.size()); + EXPECT_EQ(2u, shape[0]); + EXPECT_EQ(1u, shape[1]); + EXPECT_EQ(2u, shape[2]); + EXPECT_EQ(1u, shape[3]); + EXPECT_NEAR(1.0f, outptr[0], 1e-6f); + EXPECT_NEAR(1.0f, outptr[1], 1e-6f); + EXPECT_NEAR(3.0f, outptr[2], 1e-6f); + EXPECT_NEAR(3.0f, outptr[3], 1e-6f); +} + +TEST(BatchNorm, Backward) { + BatchNorm batchnorm; + const float x[] = {1, 2, 3, 4}; + Tensor in(Shape{2, 1, 2, 1}); + in.CopyDataFromHostPtr(x, 2 * 1 * 2 * 1); + const float dy[] = {4, 3, 2, 1}; + Tensor dy_in(Shape{2, 1, 2, 1}); + dy_in.CopyDataFromHostPtr(dy, 2 * 1 * 2 * 1); + const float alpha_[] = {1, 1}; + Tensor alpha(Shape{1, 2}); + alpha.CopyDataFromHostPtr(alpha_, 1 * 2); + + const float beta_[] = {0, 0}; + Tensor beta(Shape{1, 2}); + beta.CopyDataFromHostPtr(beta_, 1 * 2); + singa::LayerConf conf; + singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf(); + batchnorm_conf->set_factor(1); + batchnorm.Setup(Shape{1, 2, 1}, conf); + batchnorm.set_bnScale(alpha); + batchnorm.set_bnBias(beta); + batchnorm.set_runningMean(beta); + batchnorm.set_runningVariance(beta); + Tensor out = batchnorm.Forward(kTrain, in); + auto ret = batchnorm.Backward(kTrain, dy_in); + Tensor dx = ret.first; + const auto & shape = dx.shape(); + EXPECT_EQ(4u, shape.size()); + EXPECT_EQ(2u, shape[0]); + EXPECT_EQ(1u, shape[1]); + EXPECT_EQ(2u, shape[2]); + EXPECT_EQ(1u, shape[3]); + const float *dxptr = ret.first.data<float>(); + EXPECT_NEAR(.0f, dxptr[0], 1e-6f); + EXPECT_NEAR(.0f, dxptr[1], 1e-6f); + EXPECT_NEAR(.0f, dxptr[2], 1e-6f); + EXPECT_NEAR(.0f, dxptr[3], 1e-6f); + + Tensor dbnScale = ret.second.at(0); + const float *dbnScaleptr = dbnScale.data<float>(); + const auto & dbnScaleShape = dbnScale.shape(); + EXPECT_EQ(1u, dbnScaleShape.size()); + EXPECT_EQ(2u, dbnScaleShape[0]); + + EXPECT_NEAR(-2.0f, dbnScaleptr[0], 1e-6f); + EXPECT_NEAR(-2.0f, dbnScaleptr[1], 1e-6f); + + Tensor dbnBias = ret.second.at(1); + const float *dbnBiasptr = dbnBias.data<float>(); + const auto & dbnBiasShape = dbnBias.shape(); + EXPECT_EQ(1u, dbnBiasShape.size()); + EXPECT_EQ(2u, dbnBiasShape[0]); + + EXPECT_NEAR(6.0f, dbnBiasptr[0], 1e-6f); + EXPECT_NEAR(4.0f, dbnBiasptr[1], 1e-6f); +}
