This is an automated email from the ASF dual-hosted git repository.
dickjc123 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.
from f882de0 [Numpy] add cross product op (#17637)
add 5542d03 Fix and optimize handling of vectorized memory accesses
(#17767)
No new revisions were added by this update.
Summary of changes:
3rdparty/mshadow/mshadow/base.h | 48 ---
3rdparty/mshadow/mshadow/half2.h | 143 ---------
src/common/cuda_vectorization.cuh | 283 ++++++++++++++++++
src/operator/mshadow_op.h | 67 -----
src/operator/tensor/elemwise_binary_op.cuh | 322 +++++++++++++++++++++
src/operator/tensor/elemwise_binary_op.h | 206 +++++++------
src/operator/tensor/elemwise_binary_op_basic.cu | 23 +-
src/operator/tensor/elemwise_binary_scalar_op.cuh | 207 +++++++++++++
src/operator/tensor/elemwise_binary_scalar_op.h | 75 ++++-
.../tensor/elemwise_binary_scalar_op_basic.cu | 9 +-
.../tensor/elemwise_binary_scalar_op_extended.cu | 15 +-
src/operator/tensor/elemwise_sum.cu | 112 ++++++-
src/operator/tensor/elemwise_sum.h | 12 -
src/operator/tensor/elemwise_unary_op.cuh | 127 ++++++++
src/operator/tensor/elemwise_unary_op.h | 56 ++--
src/operator/tensor/elemwise_unary_op_basic.cu | 1 +
src/operator/tensor/elemwise_unary_op_pow.cu | 1 +
src/operator/tensor/elemwise_unary_op_trig.cu | 1 +
tests/python/unittest/test_operator.py | 78 +++++
19 files changed, 1342 insertions(+), 444 deletions(-)
delete mode 100755 3rdparty/mshadow/mshadow/half2.h
create mode 100644 src/common/cuda_vectorization.cuh
create mode 100644 src/operator/tensor/elemwise_binary_op.cuh
create mode 100644 src/operator/tensor/elemwise_binary_scalar_op.cuh
create mode 100644 src/operator/tensor/elemwise_unary_op.cuh