This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new fcbab28 [API] Add floor_divide (#20620)
fcbab28 is described below
commit fcbab288a8411ddcea471f6a4134b03c5f917878
Author: Zhenghui Jin <[email protected]>
AuthorDate: Fri Oct 29 10:45:15 2021 -0700
[API] Add floor_divide (#20620)
* [API] Add floor_divide
* fix lint
* fix sanity
* update implementation
* fix lint
* update operator_tune.cc
* fix
* fix lint
* fix build
* fix include
* fix rtc functions
* add amp list
* add floor_divide in GPU
* fix lint
* fix docstring
* Fix docstring
* fix lint
* update rtc
* fix rtc
---
docs/python_docs/python/api/np/routines.math.rst | 1 +
python/mxnet/amp/lists/symbol_fp16.py | 3 +
python/mxnet/ndarray/numpy/_op.py | 41 +++++++-
python/mxnet/numpy/multiarray.py | 63 +++++++++++-
python/mxnet/numpy_dispatch_protocol.py | 1 +
src/api/operator/numpy/np_elemwise_broadcast_op.cc | 9 ++
src/common/cuda/rtc/forward_functions-inl.h | 20 ++++
src/operator/mshadow_op.h | 112 +++++++++++++++++++++
.../numpy/np_elemwise_broadcast_op_scalar.cc | 9 ++
.../numpy/np_elemwise_broadcast_op_scalar.cu | 6 ++
src/operator/numpy/np_floor_divide.cc | 39 +++++++
src/operator/numpy/np_floor_divide.cu | 34 +++++++
src/operator/operator_tune.cc | 4 +
src/operator/tensor/elemwise_binary_broadcast_op.h | 2 +-
src/operator/tensor/elemwise_binary_op.h | 2 +-
.../python/unittest/test_numpy_interoperability.py | 12 +++
tests/python/unittest/test_numpy_op.py | 2 +
17 files changed, 356 insertions(+), 4 deletions(-)
diff --git a/docs/python_docs/python/api/np/routines.math.rst
b/docs/python_docs/python/api/np/routines.math.rst
index c909a56..83b3db1 100644
--- a/docs/python_docs/python/api/np/routines.math.rst
+++ b/docs/python_docs/python/api/np/routines.math.rst
@@ -157,6 +157,7 @@ Arithmetic operations
fmod
modf
divmod
+ floor_divide
Miscellaneous
diff --git a/python/mxnet/amp/lists/symbol_fp16.py
b/python/mxnet/amp/lists/symbol_fp16.py
index 307336c..7e2f715 100644
--- a/python/mxnet/amp/lists/symbol_fp16.py
+++ b/python/mxnet/amp/lists/symbol_fp16.py
@@ -265,6 +265,9 @@ FP16_FP32_FUNCS = [
'_npi_multinomial',
'_npi_multiply',
'_npi_multiply_scalar',
+ '_npi_floor_divide',
+ '_npi_floor_divide_scalar',
+ '_npi_rfloor_divide_scalar',
'_npi_nan_to_num',
'_npi_negative',
'_npi_normal',
diff --git a/python/mxnet/ndarray/numpy/_op.py
b/python/mxnet/ndarray/numpy/_op.py
index ef1c6b7..4faa11d 100644
--- a/python/mxnet/ndarray/numpy/_op.py
+++ b/python/mxnet/ndarray/numpy/_op.py
@@ -51,7 +51,7 @@ __all__ = ['shape', 'zeros', 'zeros_like', 'ones',
'ones_like', 'full', 'full_li
'diff', 'ediff1d', 'resize', 'polyval', 'nan_to_num', 'isnan',
'isinf', 'isposinf', 'isneginf', 'isfinite',
'atleast_1d', 'atleast_2d', 'atleast_3d', 'fill_diagonal',
'squeeze',
'where', 'bincount', 'rollaxis', 'diagflat', 'repeat', 'prod',
'pad', 'cumsum', 'sum', 'diag', 'diagonal',
- 'positive', 'logaddexp']
+ 'positive', 'logaddexp', 'floor_divide']
@set_module('mxnet.ndarray.numpy')
@@ -1170,6 +1170,45 @@ def true_divide(x1, x2, out=None):
@set_module('mxnet.ndarray.numpy')
@wrap_np_binary_func
+def floor_divide(x1, x2, out=None):
+ """Return the largest integer smaller or equal to the division of the
inputs.
+ It is equivalent to the Python // operator and pairs with the Python %
(remainder),
+ function so that a = a % b + b * (a // b) up to roundoff.
+
+ Parameters
+ ----------
+ x1 : ndarray or scalar
+ Dividend array.
+ x2 : ndarray or scalar
+ Divisor array.
+ out : ndarray
+ A location into which the result is stored. If provided, it must have
a shape
+ that the inputs broadcast to. If not provided or None, a
freshly-allocated array
+ is returned.
+
+ Returns
+ -------
+ out : ndarray or scalar
+ This is a scalar if both x1 and x2 are scalars.
+
+ .. note::
+
+ This operator now supports automatic type promotion. The resulting type
will be determined
+ according to the following rules:
+
+ * If both inputs are of floating number types, the output is the more
precise type.
+ * If only one of the inputs is floating number type, the result is that
type.
+ * If both inputs are of integer types (including boolean), the output
is the more
+ precise type
+
+ """
+ if isinstance(x1, numeric_types) and isinstance(x2, numeric_types):
+ return _np.floor_divide(x1, x2, out=out)
+ return _api_internal.floor_divide(x1, x2, out)
+
+
+@set_module('mxnet.ndarray.numpy')
+@wrap_np_binary_func
def mod(x1, x2, out=None, **kwargs):
"""
Return element-wise remainder of division.
diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py
index 427f8ff..1381165 100644
--- a/python/mxnet/numpy/multiarray.py
+++ b/python/mxnet/numpy/multiarray.py
@@ -81,7 +81,7 @@ __all__ = ['ndarray', 'empty', 'empty_like', 'array',
'shape', 'median',
'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite',
'polyval', 'where', 'bincount',
'atleast_1d', 'atleast_2d', 'atleast_3d', 'fill_diagonal',
'squeeze',
'diagflat', 'repeat', 'prod', 'pad', 'cumsum', 'sum', 'rollaxis',
'diag', 'diagonal',
- 'positive', 'logaddexp']
+ 'positive', 'logaddexp', 'floor_divide']
__all__ += fallback.__all__
@@ -1114,6 +1114,23 @@ class ndarray(NDArray): # pylint: disable=invalid-name
"""x.__mul__(y) <=> x * y"""
return multiply(self, other)
+ @wrap_mxnp_np_ufunc
+ def __floordiv__(self, other):
+ """x.__floordiv__(y) <=> x // y"""
+ return floor_divide(self, other)
+
+ @wrap_mxnp_np_ufunc
+ def __ifloordiv__(self, other):
+ """x.__ifloordiv__(y) <=> x //= y"""
+ if not self.writable:
+ raise ValueError('trying to divide from a readonly ndarray')
+ return floor_divide(self, other, out=self)
+
+ @wrap_mxnp_np_ufunc
+ def __rfloordiv__(self, other):
+ """x.__rfloordiv__(y) <=> y // x"""
+ return floor_divide(other, self)
+
def __neg__(self):
"""x.__neg__() <=> -x"""
return negative(self)
@@ -3435,6 +3452,50 @@ def true_divide(x1, x2, out=None):
@set_module('mxnet.numpy')
@wrap_np_binary_func
+def floor_divide(x1, x2, out=None):
+ """Return the largest integer smaller or equal to the division of the
inputs.
+
+ It is equivalent to the Python // operator and pairs with the Python %
(remainder),
+ function so that a = a % b + b * (a // b) up to roundoff.
+
+ Parameters
+ ----------
+ x1 : ndarray or scalar
+ Dividend array.
+ x2 : ndarray or scalar
+ Divisor array.
+ out : ndarray
+ A location into which the result is stored. If provided, it must have
a shape
+ that the inputs broadcast to. If not provided or None, a
freshly-allocated array
+ is returned.
+
+ Returns
+ -------
+ out : ndarray or scalar
+ This is a scalar if both x1 and x2 are scalars.
+
+ .. note::
+
+ This operator now supports automatic type promotion. The resulting type
will be determined
+ according to the following rules:
+
+ * If both inputs are of floating number types, the output is the more
precise type.
+ * If only one of the inputs is floating number type, the result is that
type.
+ * If both inputs are of integer types (including boolean), the output
is the more
+ precise type
+
+ Examples
+ --------
+ >>> np.floor_divide(7,3)
+ 2
+ >>> np.floor_divide([1., 2., 3., 4.], 2.5)
+ array([ 0., 0., 1., 1.])
+ """
+ return _mx_nd_np.floor_divide(x1, x2, out=out)
+
+
+@set_module('mxnet.numpy')
+@wrap_np_binary_func
def mod(x1, x2, out=None, **kwargs):
"""
Return element-wise remainder of division.
diff --git a/python/mxnet/numpy_dispatch_protocol.py
b/python/mxnet/numpy_dispatch_protocol.py
index c293621..ac86019 100644
--- a/python/mxnet/numpy_dispatch_protocol.py
+++ b/python/mxnet/numpy_dispatch_protocol.py
@@ -254,6 +254,7 @@ _NUMPY_ARRAY_UFUNC_LIST = [
'logaddexp',
'subtract',
'multiply',
+ 'floor_divide',
'true_divide',
'negative',
'power',
diff --git a/src/api/operator/numpy/np_elemwise_broadcast_op.cc
b/src/api/operator/numpy/np_elemwise_broadcast_op.cc
index b9f1060..067d419 100644
--- a/src/api/operator/numpy/np_elemwise_broadcast_op.cc
+++ b/src/api/operator/numpy/np_elemwise_broadcast_op.cc
@@ -61,6 +61,15 @@ MXNET_REGISTER_API("_npi.true_divide")
UFuncHelper(args, ret, op, op_scalar, op_rscalar);
});
+MXNET_REGISTER_API("_npi.floor_divide")
+ .set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) {
+ using namespace runtime;
+ const nnvm::Op* op = Op::Get("_npi_floor_divide");
+ const nnvm::Op* op_scalar = Op::Get("_npi_floor_divide_scalar");
+ const nnvm::Op* op_rscalar = Op::Get("_npi_rfloor_divide_scalar");
+ UFuncHelper(args, ret, op, op_scalar, op_rscalar);
+ });
+
MXNET_REGISTER_API("_npi.mod").set_body([](runtime::MXNetArgs args,
runtime::MXNetRetValue* ret) {
using namespace runtime;
const nnvm::Op* op = Op::Get("_npi_mod");
diff --git a/src/common/cuda/rtc/forward_functions-inl.h
b/src/common/cuda/rtc/forward_functions-inl.h
index 333ae04..2b45709 100644
--- a/src/common/cuda/rtc/forward_functions-inl.h
+++ b/src/common/cuda/rtc/forward_functions-inl.h
@@ -261,6 +261,26 @@ rsub(const DType a, const DType2 b) {
template <typename DType, typename DType2>
__device__ inline mixed_type<DType, DType2>
+floor_divide(const DType a, const DType2 b) {
+ if (type_util::has_double_or_integral<DType, DType2>::value) {
+ return ::floor((double)a / (double)b);
+ } else {
+ return ::floorf((float)a / (float)b);
+ }
+}
+
+template <typename DType, typename DType2>
+__device__ inline mixed_type<DType, DType2>
+rfloor_divide(const DType a, const DType2 b) {
+ if (type_util::has_double_or_integral<DType, DType2>::value) {
+ return ::floor((double)b / (double)a);
+ } else {
+ return ::floorf((float)b / (float)a);
+ }
+}
+
+template <typename DType, typename DType2>
+__device__ inline mixed_type<DType, DType2>
mul(const DType a, const DType2 b) {
return a * b;
}
diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h
index 677d924..34f852d 100644
--- a/src/operator/mshadow_op.h
+++ b/src/operator/mshadow_op.h
@@ -231,6 +231,118 @@ struct rtrue_divide : public mxnet_op::tunable {
}
};
+/***** floor_divide ******/
+
+struct floor_divide : public mxnet_op::tunable {
+ template <
+ typename DType,
+ typename std::enable_if<!std::is_same<DType, bool>::value &&
std::is_integral<DType>::value,
+ int>::type = 0>
+ MSHADOW_XINLINE static DType Map(DType a, DType b) {
+ DType c = static_cast<DType>(::floor(a / b));
+ if ((c * a != b) && ((a < 0) != (b < 0))) {
+ return DType(c - 1);
+ } else {
+ return c;
+ }
+ }
+
+ MSHADOW_XINLINE static bool Map(bool a, bool b) {
+ return static_cast<bool>(::floor(a / b));
+ }
+
+ template <
+ typename DType,
+ typename std::enable_if<!std::is_integral<DType>::value &&
!std::is_same<DType, float>::value,
+ int>::type = 0>
+ MSHADOW_XINLINE static DType Map(DType a, DType b) {
+ return ::floor(a / b);
+ }
+
+ MSHADOW_XINLINE static float Map(float a, float b) {
+ return ::floorf(a / b);
+ }
+};
+
+struct rfloor_divide : public mxnet_op::tunable {
+ template <
+ typename DType,
+ typename std::enable_if<!std::is_same<DType, bool>::value &&
std::is_integral<DType>::value,
+ int>::type = 0>
+ MSHADOW_XINLINE static DType Map(DType a, DType b) {
+ DType c = static_cast<DType>(::floor(b / a));
+ if ((c * a != b) && ((a < 0) != (b < 0))) {
+ return DType(c - 1);
+ } else {
+ return c;
+ }
+ }
+
+ MSHADOW_XINLINE static bool Map(bool a, bool b) {
+ return static_cast<bool>(::floor(b / a));
+ }
+
+ template <
+ typename DType,
+ typename std::enable_if<!std::is_integral<DType>::value &&
!std::is_same<DType, float>::value,
+ int>::type = 0>
+ MSHADOW_XINLINE static DType Map(DType a, DType b) {
+ return ::floor(b / a);
+ }
+
+ MSHADOW_XINLINE static float Map(float a, float b) {
+ return ::floorf(b / a);
+ }
+};
+
+struct mixed_floor_divide {
+ template <typename DType, typename
std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+ MSHADOW_XINLINE static mshadow::half::half_t Map(DType a,
mshadow::half::half_t b) {
+ return ::floor(a / static_cast<mshadow::half::half_t>(b));
+ }
+
+ template <typename DType,
+ typename std::enable_if<std::is_same<DType,
mshadow::half::half_t>::value ||
+ std::is_integral<DType>::value,
+ int>::type = 0>
+ MSHADOW_XINLINE static float Map(DType a, float b) {
+ return ::floorf(a / static_cast<float>(b));
+ }
+
+ template <typename DType,
+ typename std::enable_if<std::is_same<DType,
mshadow::half::half_t>::value ||
+ std::is_same<DType, float>::value ||
+ std::is_integral<DType>::value,
+ int>::type = 0>
+ MSHADOW_XINLINE static double Map(DType a, double b) {
+ return ::floor(a / static_cast<double>(b));
+ }
+};
+
+struct mixed_rfloor_divide {
+ template <typename DType, typename
std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+ MSHADOW_XINLINE static mshadow::half::half_t Map(DType a,
mshadow::half::half_t b) {
+ return ::floor(b / static_cast<mshadow::half::half_t>(a));
+ }
+
+ template <typename DType,
+ typename std::enable_if<std::is_same<DType,
mshadow::half::half_t>::value ||
+ std::is_integral<DType>::value,
+ int>::type = 0>
+ MSHADOW_XINLINE static float Map(DType a, float b) {
+ return ::floorf(b / static_cast<float>(a));
+ }
+
+ template <typename DType,
+ typename std::enable_if<std::is_same<DType,
mshadow::half::half_t>::value ||
+ std::is_same<DType, float>::value ||
+ std::is_integral<DType>::value,
+ int>::type = 0>
+ MSHADOW_XINLINE static double Map(DType a, double b) {
+ return ::floor(b / static_cast<double>(a));
+ }
+};
+
MXNET_BINARY_MATH_OP_NC(left, a);
MXNET_BINARY_MATH_OP_NC(right, b);
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc
b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc
index c0d6b40..4fd1f2c 100644
--- a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc
+++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc
@@ -61,5 +61,14 @@ MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rpower_scalar)
.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu,
mshadow_op::rpower>)
.set_attr<nnvm::FGradient>("FGradient",
ElemwiseGradUseOut{"_backward_rpower_scalar"});
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_floor_divide_scalar)
+ .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu,
op::mshadow_op::floor_divide>)
+ .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rfloor_divide_scalar)
+ .set_attr<FCompute>("FCompute<cpu>",
+ BinaryScalarOp::Compute<cpu,
op::mshadow_op::rfloor_divide>)
+ .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
+
} // namespace op
} // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu
b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu
index 024d02a..c7bbeef 100644
--- a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu
+++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu
@@ -51,5 +51,11 @@ NNVM_REGISTER_OP(_npi_power_scalar)
NNVM_REGISTER_OP(_npi_rpower_scalar)
.set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"rpow"});
+NNVM_REGISTER_OP(_npi_floor_divide_scalar)
+ .set_attr<FCompute>("FCompute<gpu>",
BinaryScalarRTCCompute{"floor_divide"});
+
+NNVM_REGISTER_OP(_npi_rfloor_divide_scalar)
+ .set_attr<FCompute>("FCompute<gpu>",
BinaryScalarRTCCompute{"rfloor_divide"});
+
} // namespace op
} // namespace mxnet
diff --git a/src/operator/numpy/np_floor_divide.cc
b/src/operator/numpy/np_floor_divide.cc
new file mode 100644
index 0000000..78f6cf5
--- /dev/null
+++ b/src/operator/numpy/np_floor_divide.cc
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file np_floor_divide.cc
+ * \brief CPU Implementation of floor_divide operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_floor_divide)
+ .set_attr<FCompute>("FCompute<cpu>",
+ NumpyBinaryBroadcastComputeWithBool<cpu,
+
op::mshadow_op::floor_divide,
+
op::mshadow_op::mixed_floor_divide,
+
op::mshadow_op::mixed_rfloor_divide>)
+ .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
+
+} // namespace op
+} // namespace mxnet
diff --git a/src/operator/numpy/np_floor_divide.cu
b/src/operator/numpy/np_floor_divide.cu
new file mode 100644
index 0000000..54fbd9d
--- /dev/null
+++ b/src/operator/numpy/np_floor_divide.cu
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file np_floor_divide.cu
+ * \brief GPU Implementation of floor_divide operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_npi_floor_divide)
+ .set_attr<FCompute>("FCompute<gpu>",
BinaryBroadcastRTCCompute{"floor_divide"});
+
+} // namespace op
+} // namespace mxnet
diff --git a/src/operator/operator_tune.cc b/src/operator/operator_tune.cc
index 02cf907..d36a881 100644
--- a/src/operator/operator_tune.cc
+++ b/src/operator/operator_tune.cc
@@ -362,17 +362,21 @@
IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::plus);
IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::minus);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::mul);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::div);
// NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::floor_divide);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::true_divide);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::minus_sign);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rminus);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rdiv);
// NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rfloor_divide);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::plus);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::minus);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::mul);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::div);
// NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::floor_divide);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::minus_sign);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rminus);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rdiv);
// NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rfloor_divide);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rtrue_divide);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::div_grad);
// NOLINT()
IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::div_grad);
// NOLINT()
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h
b/src/operator/tensor/elemwise_binary_broadcast_op.h
index 9bfcbc7..ef7bb83 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op.h
+++ b/src/operator/tensor/elemwise_binary_broadcast_op.h
@@ -321,7 +321,7 @@ void BinaryBroadcastComputeWithBool(const nnvm::NodeAttrs&
attrs,
if (req[0] == kNullOp)
return;
mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
- MSHADOW_TYPE_SWITCH_WITH_BOOL(outputs[0].type_flag_, DType, {
+ MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(outputs[0].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(ndim, NDim, {
mshadow::Shape<NDim> oshape = new_oshape.get<NDim>();
mshadow::Shape<NDim> lstride =
mxnet_op::calc_stride(new_lshape.get<NDim>());
diff --git a/src/operator/tensor/elemwise_binary_op.h
b/src/operator/tensor/elemwise_binary_op.h
index aa350b8..8339f20 100644
--- a/src/operator/tensor/elemwise_binary_op.h
+++ b/src/operator/tensor/elemwise_binary_op.h
@@ -560,7 +560,7 @@ class ElemwiseBinaryOp : public OpBase {
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 1U);
MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
- MSHADOW_TYPE_SWITCH_WITH_BOOL(outputs[0].type_flag_, DType, {
+ MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(outputs[0].type_flag_, DType, {
const size_t size = (minthree(outputs[0].Size(), inputs[0].Size(),
inputs[1].Size()) +
DataType<DType>::kLanes - 1) /
DataType<DType>::kLanes;
diff --git a/tests/python/unittest/test_numpy_interoperability.py
b/tests/python/unittest/test_numpy_interoperability.py
index 09deace..c8edad6 100644
--- a/tests/python/unittest/test_numpy_interoperability.py
+++ b/tests/python/unittest/test_numpy_interoperability.py
@@ -1586,6 +1586,17 @@ def _add_workload_fmod(array_pool):
OpArgMngr.add_workload('fmod', array_pool['4x1'], array_pool['1x1x0'])
+def _add_workload_floor_divide(array_pool):
+ OpArgMngr.add_workload('floor_divide', array_pool['4x1'],
array_pool['1x2'])
+ OpArgMngr.add_workload('floor_divide', array_pool['4x1'], 2)
+ OpArgMngr.add_workload('floor_divide', 2, array_pool['4x1'])
+ OpArgMngr.add_workload('floor_divide', array_pool['4x1'],
array_pool['1x1x0'])
+ OpArgMngr.add_workload('floor_divide', np.array([-1, -2, -3], np.float32),
1.9999)
+ OpArgMngr.add_workload('floor_divide', np.array([1000, -200, -3],
np.int64), 3)
+ OpArgMngr.add_workload('floor_divide', np.array([1, -2, -3, 4, -5],
np.int32), 2.0001)
+ OpArgMngr.add_workload('floor_divide', np.array([1, -50, -0.2, 40000, 0],
np.float64), -7)
+
+
def _add_workload_remainder():
# test remainder basic
OpArgMngr.add_workload('remainder', np.array([0, 1, 2, 4, 2],
dtype=np.float16),
@@ -3095,6 +3106,7 @@ def _prepare_workloads():
_add_workload_power(array_pool)
_add_workload_mod(array_pool)
_add_workload_fmod(array_pool)
+ _add_workload_floor_divide(array_pool)
_add_workload_remainder()
_add_workload_maximum(array_pool)
_add_workload_fmax(array_pool)
diff --git a/tests/python/unittest/test_numpy_op.py
b/tests/python/unittest/test_numpy_op.py
index b7d2c86..bf32c69 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -3070,6 +3070,8 @@ def test_np_binary_funcs():
[lambda y, x1, x2: onp.broadcast_to(x1,
y.shape)]),
'divide': (0.1, 1.0, [lambda y, x1, x2: onp.ones(y.shape) / x2],
[lambda y, x1, x2: -x1 / (x2 * x2)]),
+ 'floor_divide': (0.1, 1.0, [lambda y, x1, x2: onp.zeros(y.shape)],
+ [lambda y, x1, x2: onp.zeros(y.shape)]),
'mod': (1.0, 10.0,
[lambda y, x1, x2: onp.ones(y.shape),
lambda y, x1, x2: onp.zeros(y.shape)],