This is an automated email from the ASF dual-hosted git repository. taolv pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new 37bed6e Fix BatchNorm backward synchronization (#18644) 37bed6e is described below commit 37bed6e3af794624d651e888101eceb30c27c001 Author: Andrzej Kotłowski <andrzej.kotlow...@intel.com> AuthorDate: Wed Jul 1 16:39:22 2020 +0200 Fix BatchNorm backward synchronization (#18644) * Add test for BatchNorm running variables synchronization * Fix BatchNorm backward synchronization It fixes issue #18610 --- src/operator/nn/batch_norm.cc | 3 +++ tests/python/unittest/test_gluon.py | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 8dbd271..7e540ca 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -653,6 +653,9 @@ then set ``gamma`` to 1 and its gradient to 0. NNVM_REGISTER_OP(_backward_BatchNorm) .set_num_inputs(8) .set_num_outputs(3) +.set_attr<nnvm::FMutateInputs>("FMutateInputs", [](const nnvm::NodeAttrs& attrs) { + return std::vector<uint32_t>{6, 7}; // moving_mean, moving_var +}) .set_attr<nnvm::TIsBackward>("TIsBackward", true) .set_attr<FInferStorageType>("FInferStorageType", BatchNormStorageType) #if MXNET_USE_MKLDNN == 1 diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index 47ef86f..77d5119 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -666,6 +666,32 @@ def test_pool(): @with_seed() +@pytest.mark.parametrize('variable', ['running_var', 'running_mean']) +def test_batchnorm_backward_synchronization(variable): + """ + Tests if synchronization of BatchNorm running variables is done correctly. + If not, the test sometimes fails - depending on the timing. + """ + ctx = mx.test_utils.default_context() + + for _ in range(20): + layer = nn.BatchNorm() + layer.initialize(ctx=ctx) + for _ in range(3): + data = mx.nd.random.normal(loc=10, scale=2, shape=(1, 3, 10, 10), ctx=ctx) + with mx.autograd.record(): + out = layer(data) + out.backward() + + # check if each read give the same value + var1 = getattr(layer, variable).data().asnumpy() + for _ in range(10): + var2 = getattr(layer, variable).data().asnumpy() + if (var1 != var2).any(): + raise AssertionError("Two consecutive reads of " + variable + " give different results") + + +@with_seed() def test_batchnorm(): layer = nn.BatchNorm(in_channels=10) check_layer_forward(layer, (2, 10, 10, 10))