eric-haibin-lin commented on a change in pull request #11502: [MXNET-614]
Adding Synchronized Batch Normalization
URL: https://github.com/apache/incubator-mxnet/pull/11502#discussion_r200207912
##########
File path: tests/python/gpu/test_operator_gpu.py
##########
@@ -1919,6 +1921,82 @@ def test_context_num_gpus():
# Test that num_gpus reports at least one GPU, as the test is run on a GPU
host.
assert mx.context.num_gpus() > 0
+
+def _checkBatchNormResult(bn1, bn2, input, num_devices=1, cuda=False):
+ def _assert_tensor_close(a, b, atol=1e-3, rtol=1e-3):
+ npa, npb = a.asnumpy(), b.asnumpy()
+ assert np.allclose(npa, npb, rtol=rtol, atol=atol), \
+ 'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format(
+ a, b, np.abs(npa - npb).max(), np.abs((npa - npb) /
np.fmax(npa, 1e-5)).max())
+
+ def _find_bn(module):
+ if isinstance(module, (nn.BatchNorm,
mx.gluon.contrib.nn.SyncBatchNorm)):
+ return module
+ elif isinstance(module.module, (nn.BatchNorm,
mx.gluon.contrib.nn.SyncBatchNorm)):
+ return module.module
+
+ raise RuntimeError('BN not found')
+
+ def _syncParameters(bn1, bn2):
+ ctx = input.context
+ bn2.gamma.set_data(bn1.gamma.data(mx.gpu(0)))
+ bn2.beta.set_data(bn1.beta.data(ctx))
+ bn2.running_mean.set_data(bn1.running_mean.data(ctx))
+ bn2.running_var.set_data(bn1.running_var.data(ctx))
+
+ input1 = input.copy()
+ input2 = input.copy()
+
+ if cuda:
+ input1 = input.as_in_context(mx.gpu(0))
+ ctx_list = [mx.gpu(0) for _ in range(num_devices)]
+ else:
+ ctx_list = [mx.cpu(0) for _ in range(num_devices)]
+
+ bn1.initialize(ctx=ctx_list[0])
+ bn2.initialize(ctx=ctx_list)
+
+ # using the same values for gamma and beta
+ #_syncParameters(_find_bn(bn1), _find_bn(bn2))
+
+
+ input1.attach_grad()
+ inputs2 = split_and_load(input2, ctx_list, batch_axis=0)
+ for xi in inputs2:
+ xi.attach_grad()
+
+ with mx.autograd.record():
+ output1 = bn1(input1)
+ output2 = [bn2(xi) for xi in inputs2]
+ loss1 = (output1 ** 2).sum()
+ loss2 = [(output ** 2).sum() for output in output2]
+ mx.autograd.backward(loss1)
+ mx.autograd.backward(loss2)
+
+ output2 = mx.nd.concat(*[output.as_in_context(input.context) for output in
output2], dim=0)
+ # assert forwarding
+ _assert_tensor_close(input1, input2)
+ _assert_tensor_close(output1, output2)
+ _assert_tensor_close(_find_bn(bn1).running_mean.data(ctx_list[0]),
+ _find_bn(bn2).running_mean.data(ctx_list[0]))
+ _assert_tensor_close(_find_bn(bn1).running_var.data(ctx_list[0]),
+ _find_bn(bn2).running_var.data(ctx_list[0]))
+ input2grad = mx.nd.concat(*[output.grad.as_in_context(input.context) for
output in inputs2], dim=0)
+ _assert_tensor_close(input1.grad, input2grad)
+
+def testSyncBN():
Review comment:
Is there test for inference?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services