sammieghabra opened a new issue #18673: URL: https://github.com/apache/incubator-mxnet/issues/18673
## Description running mean and running var dont get updated when scale and center are false ### Error Message There is no error message, but the parameters `running_mean` and `running_var` don't get updated when `scale` and `center` are `False`. ## To Reproduce ``` from mxnet import gluon from mxnet.gluon import HybridBlock, Block from mxnet import initializer from mxnet.symbol import Variable, BlockGrad from mxnet.initializer import Constant import numpy as np class ShiftScaleLayer(HybridBlock): def __init__(self, axis=-1, momentum=0.9, epsilon=1e-5, center=False, scale=False, use_global_stats=False, beta_initializer='zeros', gamma_initializer='ones', running_mean_initializer='zeros', running_variance_initializer='ones', in_channels=0, **kwargs): super(ShiftScaleLayer, self).__init__(**kwargs) self._kwargs = {'axis': axis, 'eps': epsilon, 'momentum': momentum, 'fix_gamma': not scale, 'use_global_stats': use_global_stats} if in_channels != 0: self.in_channels = in_channels self.gamma = self.params.get('gamma', grad_req='write' if scale else 'null', shape=(in_channels,), init=gamma_initializer, allow_deferred_init=True, differentiable=scale) self.beta = self.params.get('beta', grad_req='write' if center else 'null', shape=(in_channels,), init=beta_initializer, allow_deferred_init=True, differentiable=center) self.running_mean = self.params.get('running_mean', grad_req='null', shape=(in_channels,), init=running_mean_initializer, allow_deferred_init=True, differentiable=False) self.running_var = self.params.get('running_var', grad_req='null', shape=(in_channels,), init=running_variance_initializer, allow_deferred_init=True, differentiable=False) def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var): return F.BatchNorm(x, gamma, beta, running_mean, running_var, name='fwd', **self._kwargs) def print_params(title, net): """ Helper function to print out the state of parameters of NormalizationHybridLayer """ print(title) hybridlayer_params = {k: v for k, v in net.collect_params().items() } for key, value in hybridlayer_params.items(): print('{} = {}\n'.format(key, value.data())) from mxnet.gluon import nn from mxnet.gluon.nn import Dense from mxnet import nd net = gluon.nn.HybridSequential() # Define a Neural Network as a sequence of hybrid blocks with net.name_scope(): # Used to disambiguate saving and loading net parameters net.add(ShiftScaleLayer()) net.add(Dense(10)) net.initialize(initializer.Xavier(magnitude=2.24)) # Initialize parameters of all layers net.hybridize() input = nd.array([[[2, 4], [6, 8], [10, 12], [14, 16], [18, 20]]]) label = nd.array([[[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]]]) mse_loss = gluon.loss.L2Loss() # Mean squared error between output and label trainer = gluon.Trainer(net.collect_params(), # Init trainer with Stochastic Gradient Descent (sgd) optimization method and parameters for it 'sgd', {'learning_rate': 0.1, 'momentum': 0.9 }) from mxnet import autograd with autograd.record(): # Autograd records computations done on NDArrays inside "with" block output = net(input) # Run forward propogation print_params("=========== Parameters after forward pass ===========\n", net) loss = mse_loss(output, label) print(output) loss.backward() # Backward computes gradients and stores them as a separate array within each NDArray in .grad field trainer.step(input.shape[0]) # Trainer updates parameters of every block, using .grad field using oprimization method (sgd in this example) # We provide batch size that is used as a divider in cost function formula print_params("=========== Parameters after backward pass ===========\n", net) print(net(input)) ``` ### Steps to reproduce (Paste the commands you ran that produced the error.) 1. Run the python script from above 2. Observe that the ShiftScale layer's running mean and running var are not getting updated after backwards prop when scale and center are false. ## What have you tried to solve it? N/A ## Environment MXNet 1.6 # paste outputs here ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org