This is an automated email from the ASF dual-hosted git repository.

zhasheng pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


    from bbc39fa  add signal handler for fpe, bus error (#18956)
     add 29d6f27  Use RTC for elementwise and broadcast ops (#18622)

No new revisions were added by this update.

Summary of changes:
 3rdparty/mshadow/mshadow/base.h                    |  48 --
 3rdparty/mshadow/mshadow/half2.h                   | 162 ----
 CMakeLists.txt                                     |  16 +-
 ci/build_windows.py                                |   6 -
 ci/docker/runtime_functions.sh                     |  22 -
 ci/jenkins/Jenkins_steps.groovy                    |  14 -
 ci/jenkins/Jenkinsfile_unix_gpu                    |   1 -
 config/darwin.cmake                                |   1 -
 config/linux.cmake                                 |   1 -
 config/linux_gpu.cmake                             |   1 -
 docs/python_docs/python/tutorials/extend/index.rst |   7 +
 .../src/pages/api/faq/add_op_in_backend.md         |   1 +
 docs/static_site/src/pages/api/faq/env_var.md      |   6 +-
 docs/static_site/src/pages/api/faq/using_rtc.md    | 465 +++++++++++
 include/mxnet/libinfo.h                            |   5 -
 include/mxnet/rtc.h                                |   4 +-
 python/mxnet/contrib/amp/lists/symbol_fp16.py      |   3 -
 python/mxnet/runtime.py                            |   2 +-
 src/c_api/c_api.cc                                 |  20 +-
 src/common/cuda/rtc.cc                             | 244 ++++++
 src/common/cuda/rtc.h                              |  92 +++
 src/common/cuda/rtc/backward_functions-inl.h       | 480 +++++++++++
 src/common/cuda/rtc/forward_functions-inl.h        | 917 +++++++++++++++++++++
 src/common/cuda/rtc/half-inl.h                     |  84 ++
 src/common/cuda/rtc/reducer-inl.h                  | 109 +++
 .../cuda/rtc}/special_functions-inl.h              | 112 ++-
 src/common/cuda/rtc/util-inl.h                     | 389 +++++++++
 src/common/cuda/rtc/vectorization-inl.h            | 463 +++++++++++
 src/common/{cuda_utils.cc => cuda/utils.cc}        |   2 +-
 src/common/{cuda_utils.h => cuda/utils.h}          |   6 +-
 src/common/rtc.cc                                  |   6 +-
 src/common/utils.cc                                |  25 +
 src/common/utils.h                                 |  14 +
 src/engine/stream_manager.h                        |   2 +-
 src/engine/threaded_engine.cc                      |   2 +-
 src/engine/threaded_engine_pooled.cc               |   2 +-
 src/imperative/cached_op.h                         |   4 +-
 src/imperative/pointwise_fusion_pass.cc            |   6 +-
 src/kvstore/kvstore_nccl.h                         |   2 +-
 src/libinfo.cc                                     |   2 -
 src/ndarray/ndarray_function.cu                    |   2 +-
 src/operator/bilinear_sampler.cu                   |   2 +-
 src/operator/contrib/deformable_psroi_pooling.cu   |   2 +-
 src/operator/contrib/gradient_multiplier_op.cu     |   4 +-
 src/operator/contrib/nn/deformable_im2col.cuh      |   2 +-
 .../contrib/nn/modulated_deformable_im2col.cuh     |   2 +-
 src/operator/contrib/psroi_pooling.cu              |   2 +-
 src/operator/contrib/stes_op.cu                    |   8 +-
 src/operator/contrib/transformer.cu                |   2 +-
 src/operator/fusion/fused_op-inl.h                 | 872 +-------------------
 src/operator/fusion/fused_op.cc                    |   4 +-
 src/operator/fusion/fused_op.cu                    | 148 +---
 src/operator/fusion/fused_op.h                     |   5 +-
 src/operator/leaky_relu-inl.h                      |  18 +
 src/operator/linalg_impl.h                         |   2 +-
 src/operator/mshadow_op.h                          |  69 --
 src/operator/mxnet_op.h                            |   2 +-
 src/operator/nn/batch_norm.cu                      |   3 +-
 src/operator/nn/cudnn/cudnn_activation-inl.h       |   2 +-
 src/operator/nn/cudnn/cudnn_algoreg-inl.h          |   2 +-
 src/operator/nn/cudnn/cudnn_convolution-inl.h      |   2 +-
 src/operator/nn/cudnn/cudnn_deconvolution-inl.h    |   2 +-
 src/operator/nn/depthwise_convolution-inl.h        |   2 +-
 src/operator/nn/depthwise_convolution_tf.cuh       |   2 +-
 src/operator/nn/group_norm-inl.h                   |  93 ++-
 src/operator/nn/layer_norm-inl.h                   |  87 +-
 src/operator/nn/pool.cuh                           |   2 +-
 src/operator/nn/softmax-inl.h                      |   2 +-
 .../linalg/broadcast_reduce_customized-inl.cuh     |  21 +-
 .../numpy/linalg/broadcast_reduce_customized-inl.h |   4 +
 .../numpy/linalg/broadcast_reduce_op_customized.h  |   4 +-
 src/operator/numpy/linalg/np_matrix_rank-inl.h     |  16 +-
 src/operator/numpy/linalg/np_pinv-inl.h            |  30 +-
 src/operator/numpy/np_broadcast_reduce_op.h        |  25 +-
 src/operator/numpy/np_cross-inl.h                  |  44 +-
 src/operator/numpy/np_diff-inl.h                   |   4 +-
 .../numpy/np_elemwise_broadcast_logic_op.cu        |   4 +-
 src/operator/numpy/np_elemwise_broadcast_op.cu     |  56 +-
 src/operator/numpy/np_elemwise_broadcast_op.h      |  10 +-
 .../numpy/np_elemwise_broadcast_op_extended.cc     |   6 +-
 .../numpy/np_elemwise_broadcast_op_extended.cu     |  67 +-
 .../numpy/np_elemwise_broadcast_op_extended_sec.cu |  33 +-
 src/operator/numpy/np_elemwise_unary_op_basic.cc   |   2 +-
 src/operator/numpy/np_elemwise_unary_op_basic.cu   | 171 ++--
 src/operator/numpy/np_polynomial_op.cu             |   2 +-
 src/operator/numpy/np_true_divide.cu               |   3 +-
 src/operator/numpy/np_where_op-inl.h               |  12 +-
 src/operator/numpy/random/np_exponential_op.h      |   2 +-
 src/operator/numpy/random/np_gamma_op.h            |   2 +-
 src/operator/numpy/random/np_location_scale_op.h   |  10 +-
 src/operator/numpy/random/np_normal_op.h           |  10 +-
 src/operator/numpy/random/np_pareto_op.h           |   2 +-
 src/operator/numpy/random/np_rayleigh_op.h         |   2 +-
 src/operator/numpy/random/np_weibull_op.h          |   2 +-
 src/operator/operator_common.h                     |   2 +-
 src/operator/operator_tune.cc                      |   2 -
 src/operator/pad.cu                                |   2 +-
 src/operator/quantization/quantization_utils.h     |   2 +-
 src/operator/random/pdf_op.h                       |   4 +-
 src/operator/tensor/broadcast_reduce-inl.cuh       | 338 +-------
 src/operator/tensor/broadcast_reduce-inl.h         | 580 ++++++++++---
 src/operator/tensor/broadcast_reduce_op.h          |  12 +-
 src/operator/tensor/cast_storage-inl.h             |   2 +-
 .../tensor/elemwise_binary_broadcast_op-inl.cuh    |  82 --
 .../tensor/elemwise_binary_broadcast_op.cc         | 452 ++++++++++
 src/operator/tensor/elemwise_binary_broadcast_op.h | 231 +-----
 .../tensor/elemwise_binary_broadcast_op_basic.cu   |  25 +-
 .../elemwise_binary_broadcast_op_extended.cu       |  21 +-
 .../tensor/elemwise_binary_broadcast_op_logic.cu   |  18 +-
 src/operator/tensor/elemwise_binary_op.cc          | 351 +++++++-
 src/operator/tensor/elemwise_binary_op.h           | 192 +++--
 src/operator/tensor/elemwise_binary_op_basic.cu    |  28 +-
 src/operator/tensor/elemwise_binary_op_extended.cu |  22 +-
 src/operator/tensor/elemwise_binary_op_logic.cu    |  19 +-
 src/operator/tensor/elemwise_binary_scalar_op.cc   | 257 ++++++
 src/operator/tensor/elemwise_binary_scalar_op.h    |  91 +-
 .../tensor/elemwise_binary_scalar_op_basic.cu      |  37 +-
 .../tensor/elemwise_binary_scalar_op_extended.cu   |  29 +-
 .../tensor/elemwise_binary_scalar_op_logic.cu      |  30 +-
 src/operator/tensor/elemwise_scatter_op.cc         | 142 ----
 src/operator/tensor/elemwise_scatter_op.cu         |  47 --
 src/operator/tensor/elemwise_scatter_op.h          | 318 -------
 src/operator/tensor/elemwise_sum.cu                | 130 ++-
 src/operator/tensor/elemwise_sum.h                 |  12 -
 src/operator/tensor/elemwise_unary_op.cc           | 163 ++++
 src/operator/tensor/elemwise_unary_op.h            | 279 ++++---
 src/operator/tensor/elemwise_unary_op_basic.cc     |   4 +-
 src/operator/tensor/elemwise_unary_op_basic.cu     |  85 +-
 src/operator/tensor/elemwise_unary_op_logexp.cu    |  31 +-
 src/operator/tensor/elemwise_unary_op_pow.cu       |  35 +-
 src/operator/tensor/elemwise_unary_op_trig.cu      |  91 +-
 src/operator/tensor/pseudo2DTranspose_op-inl.cuh   |   2 +-
 src/operator/tensor/reduce_rtc.cc                  | 524 ++++++++++++
 src/profiler/profiler.cc                           |   2 +-
 src/profiler/storage_profiler.cc                   |   2 +-
 src/resource.cc                                    |   2 +-
 src/storage/storage_manager_helpers.h              |   2 +-
 tests/python/gpu/test_fusion.py                    |   1 +
 tests/python/unittest/test_numpy_op.py             |  81 +-
 tests/python/unittest/test_operator.py             |  72 ++
 tests/python/unittest/test_sparse_operator.py      | 146 ----
 141 files changed, 7027 insertions(+), 3610 deletions(-)
 delete mode 100644 3rdparty/mshadow/mshadow/half2.h
 create mode 100644 docs/static_site/src/pages/api/faq/using_rtc.md
 create mode 100644 src/common/cuda/rtc.cc
 create mode 100644 src/common/cuda/rtc.h
 create mode 100644 src/common/cuda/rtc/backward_functions-inl.h
 create mode 100644 src/common/cuda/rtc/forward_functions-inl.h
 create mode 100644 src/common/cuda/rtc/half-inl.h
 create mode 100644 src/common/cuda/rtc/reducer-inl.h
 copy src/{operator => common/cuda/rtc}/special_functions-inl.h (83%)
 create mode 100644 src/common/cuda/rtc/util-inl.h
 create mode 100644 src/common/cuda/rtc/vectorization-inl.h
 rename src/common/{cuda_utils.cc => cuda/utils.cc} (99%)
 rename src/common/{cuda_utils.h => cuda/utils.h} (99%)
 delete mode 100644 src/operator/tensor/elemwise_binary_broadcast_op-inl.cuh
 create mode 100644 src/operator/tensor/elemwise_binary_broadcast_op.cc
 create mode 100644 src/operator/tensor/elemwise_binary_scalar_op.cc
 delete mode 100644 src/operator/tensor/elemwise_scatter_op.cc
 delete mode 100644 src/operator/tensor/elemwise_scatter_op.cu
 delete mode 100644 src/operator/tensor/elemwise_scatter_op.h
 create mode 100644 src/operator/tensor/elemwise_unary_op.cc
 create mode 100644 src/operator/tensor/reduce_rtc.cc

Reply via email to