This is an automated email from the ASF dual-hosted git repository.
patriczhao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.
from 99d5773 Update CustomOp doc with changes for GPU support (#17486)
add 065d48e Add bfloat16 floating-point format support based on AMP
(#17265)
No new revisions were added by this update.
Summary of changes:
3rdparty/dlpack | 2 +-
3rdparty/mshadow/mshadow/base.h | 161 ++++++++-
3rdparty/mshadow/mshadow/bfloat.h | 186 +++++++++++
example/quantization/imagenet_inference.py | 63 +++-
include/mxnet/ndarray.h | 6 +
include/mxnet/tensor_blob.h | 6 +
plugin/caffe/caffe_data_iter.cc | 4 +-
plugin/caffe/caffe_loss.cc | 3 +
plugin/caffe/caffe_loss.cu | 3 +
plugin/caffe/caffe_op.cc | 3 +
plugin/caffe/caffe_op.cu | 3 +
python/mxnet/contrib/amp/amp.py | 252 +++++++++-----
python/mxnet/contrib/amp/lists/__init__.py | 3 +-
.../amp/lists/{symbol.py => symbol_bf16.py} | 69 ++--
.../amp/lists/{symbol.py => symbol_fp16.py} | 0
python/mxnet/executor.py | 17 +-
python/mxnet/gluon/parameter.py | 17 +-
python/mxnet/ndarray/ndarray.py | 8 +-
python/mxnet/ndarray/register.py | 13 +-
python/mxnet/symbol/register.py | 14 +-
python/mxnet/symbol/symbol.py | 6 +-
python/mxnet/test_utils.py | 91 +++---
src/c_api/c_api_symbolic.cc | 2 +
src/common/utils.h | 5 +
src/engine/naive_engine.cc | 13 +-
src/engine/threaded_engine.cc | 6 +-
src/engine/threaded_engine.h | 6 +-
src/executor/graph_executor.cc | 3 +-
src/executor/graph_executor.h | 2 -
src/imperative/imperative_utils.h | 18 +-
src/io/image_iter_common.h | 1 +
src/ndarray/ndarray.cc | 20 +-
src/nnvm/amp_infer_unknown.cc | 6 +-
src/nnvm/low_precision_pass.cc | 194 +++++++++--
src/nnvm/plan_memory.cc | 1 +
src/operator/mxnet_op.h | 3 +
src/operator/nn/batch_norm.cc | 10 +-
src/operator/nn/concat.cc | 2 +-
src/operator/nn/fully_connected.cc | 3 +-
src/operator/nn/mkldnn/mkldnn_act.cc | 4 +-
src/operator/nn/mkldnn/mkldnn_base-inl.h | 50 ++-
src/operator/nn/mkldnn/mkldnn_base.cc | 57 +++-
src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h | 24 +-
src/operator/nn/mkldnn/mkldnn_deconvolution.cc | 3 +-
src/operator/nn/mkldnn/mkldnn_fully_connected.cc | 36 +-
src/operator/nn/mkldnn/mkldnn_transpose.cc | 3 +-
src/operator/numpy/linalg/np_norm-inl.h | 56 ++--
src/operator/numpy/np_broadcast_reduce_op_value.cc | 169 +---------
src/operator/numpy/np_moments_op.cc | 199 ++++++++++++
src/operator/operator_common.h | 2 +
src/operator/operator_tune-inl.h | 2 +
src/operator/operator_tune.cc | 18 +-
src/operator/subgraph/mkldnn/mkldnn_conv.cc | 27 +-
.../subgraph/mkldnn/mkldnn_subgraph_base-inl.h | 4 +-
src/operator/tensor/amp_cast.cc | 121 ++++++-
src/operator/tensor/amp_cast.h | 12 +-
src/operator/tensor/elemwise_binary_op_basic.cc | 4 +-
tests/cpp/include/test_op.h | 6 +-
tests/python/gpu/test_contrib_amp.py | 26 +-
tests/python/mkl/test_bf16_operator.py | 290 +++++++++++++++++
tests/python/{gpu => mkl}/test_contrib_amp.py | 361 +++++++++++----------
tests/python/unittest/test_operator.py | 5 +-
62 files changed, 1996 insertions(+), 708 deletions(-)
create mode 100644 3rdparty/mshadow/mshadow/bfloat.h
copy python/mxnet/contrib/amp/lists/{symbol.py => symbol_bf16.py} (97%)
rename python/mxnet/contrib/amp/lists/{symbol.py => symbol_fp16.py} (100%)
create mode 100644 src/operator/numpy/np_moments_op.cc
create mode 100644 tests/python/mkl/test_bf16_operator.py
copy tests/python/{gpu => mkl}/test_contrib_amp.py (58%)