This is an automated email from the ASF dual-hosted git repository.

zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new fcbab28  [API] Add floor_divide (#20620)
fcbab28 is described below

commit fcbab288a8411ddcea471f6a4134b03c5f917878
Author: Zhenghui Jin <[email protected]>
AuthorDate: Fri Oct 29 10:45:15 2021 -0700

    [API] Add floor_divide (#20620)
    
    * [API] Add floor_divide
    
    * fix lint
    
    * fix sanity
    
    * update implementation
    
    * fix lint
    
    * update operator_tune.cc
    
    * fix
    
    * fix lint
    
    * fix build
    
    * fix include
    
    * fix rtc functions
    
    * add amp list
    
    * add floor_divide in GPU
    
    * fix lint
    
    * fix docstring
    
    * Fix docstring
    
    * fix lint
    
    * update rtc
    
    * fix rtc
---
 docs/python_docs/python/api/np/routines.math.rst   |   1 +
 python/mxnet/amp/lists/symbol_fp16.py              |   3 +
 python/mxnet/ndarray/numpy/_op.py                  |  41 +++++++-
 python/mxnet/numpy/multiarray.py                   |  63 +++++++++++-
 python/mxnet/numpy_dispatch_protocol.py            |   1 +
 src/api/operator/numpy/np_elemwise_broadcast_op.cc |   9 ++
 src/common/cuda/rtc/forward_functions-inl.h        |  20 ++++
 src/operator/mshadow_op.h                          | 112 +++++++++++++++++++++
 .../numpy/np_elemwise_broadcast_op_scalar.cc       |   9 ++
 .../numpy/np_elemwise_broadcast_op_scalar.cu       |   6 ++
 src/operator/numpy/np_floor_divide.cc              |  39 +++++++
 src/operator/numpy/np_floor_divide.cu              |  34 +++++++
 src/operator/operator_tune.cc                      |   4 +
 src/operator/tensor/elemwise_binary_broadcast_op.h |   2 +-
 src/operator/tensor/elemwise_binary_op.h           |   2 +-
 .../python/unittest/test_numpy_interoperability.py |  12 +++
 tests/python/unittest/test_numpy_op.py             |   2 +
 17 files changed, 356 insertions(+), 4 deletions(-)

diff --git a/docs/python_docs/python/api/np/routines.math.rst 
b/docs/python_docs/python/api/np/routines.math.rst
index c909a56..83b3db1 100644
--- a/docs/python_docs/python/api/np/routines.math.rst
+++ b/docs/python_docs/python/api/np/routines.math.rst
@@ -157,6 +157,7 @@ Arithmetic operations
    fmod
    modf
    divmod
+   floor_divide
 
 
 Miscellaneous
diff --git a/python/mxnet/amp/lists/symbol_fp16.py 
b/python/mxnet/amp/lists/symbol_fp16.py
index 307336c..7e2f715 100644
--- a/python/mxnet/amp/lists/symbol_fp16.py
+++ b/python/mxnet/amp/lists/symbol_fp16.py
@@ -265,6 +265,9 @@ FP16_FP32_FUNCS = [
     '_npi_multinomial',
     '_npi_multiply',
     '_npi_multiply_scalar',
+    '_npi_floor_divide',
+    '_npi_floor_divide_scalar',
+    '_npi_rfloor_divide_scalar',
     '_npi_nan_to_num',
     '_npi_negative',
     '_npi_normal',
diff --git a/python/mxnet/ndarray/numpy/_op.py 
b/python/mxnet/ndarray/numpy/_op.py
index ef1c6b7..4faa11d 100644
--- a/python/mxnet/ndarray/numpy/_op.py
+++ b/python/mxnet/ndarray/numpy/_op.py
@@ -51,7 +51,7 @@ __all__ = ['shape', 'zeros', 'zeros_like', 'ones', 
'ones_like', 'full', 'full_li
            'diff', 'ediff1d', 'resize', 'polyval', 'nan_to_num', 'isnan', 
'isinf', 'isposinf', 'isneginf', 'isfinite',
            'atleast_1d', 'atleast_2d', 'atleast_3d', 'fill_diagonal', 
'squeeze',
            'where', 'bincount', 'rollaxis', 'diagflat', 'repeat', 'prod', 
'pad', 'cumsum', 'sum', 'diag', 'diagonal',
-           'positive', 'logaddexp']
+           'positive', 'logaddexp', 'floor_divide']
 
 
 @set_module('mxnet.ndarray.numpy')
@@ -1170,6 +1170,45 @@ def true_divide(x1, x2, out=None):
 
 @set_module('mxnet.ndarray.numpy')
 @wrap_np_binary_func
+def floor_divide(x1, x2, out=None):
+    """Return the largest integer smaller or equal to the division of the 
inputs.
+    It is equivalent to the Python // operator and pairs with the Python % 
(remainder),
+    function so that a = a % b + b * (a // b) up to roundoff.
+
+    Parameters
+    ----------
+    x1 : ndarray or scalar
+        Dividend array.
+    x2 : ndarray or scalar
+        Divisor array.
+    out : ndarray
+        A location into which the result is stored. If provided, it must have 
a shape
+        that the inputs broadcast to. If not provided or None, a 
freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        This is a scalar if both x1 and x2 are scalars.
+
+    .. note::
+
+       This operator now supports automatic type promotion. The resulting type 
will be determined
+       according to the following rules:
+
+       * If both inputs are of floating number types, the output is the more 
precise type.
+       * If only one of the inputs is floating number type, the result is that 
type.
+       * If both inputs are of integer types (including boolean), the output 
is the more
+       precise type
+
+    """
+    if isinstance(x1, numeric_types) and isinstance(x2, numeric_types):
+        return _np.floor_divide(x1, x2, out=out)
+    return _api_internal.floor_divide(x1, x2, out)
+
+
+@set_module('mxnet.ndarray.numpy')
+@wrap_np_binary_func
 def mod(x1, x2, out=None, **kwargs):
     """
     Return element-wise remainder of division.
diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py
index 427f8ff..1381165 100644
--- a/python/mxnet/numpy/multiarray.py
+++ b/python/mxnet/numpy/multiarray.py
@@ -81,7 +81,7 @@ __all__ = ['ndarray', 'empty', 'empty_like', 'array', 
'shape', 'median',
            'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite', 
'polyval', 'where', 'bincount',
            'atleast_1d', 'atleast_2d', 'atleast_3d', 'fill_diagonal', 
'squeeze',
            'diagflat', 'repeat', 'prod', 'pad', 'cumsum', 'sum', 'rollaxis', 
'diag', 'diagonal',
-           'positive', 'logaddexp']
+           'positive', 'logaddexp', 'floor_divide']
 
 __all__ += fallback.__all__
 
@@ -1114,6 +1114,23 @@ class ndarray(NDArray):  # pylint: disable=invalid-name
         """x.__mul__(y) <=> x * y"""
         return multiply(self, other)
 
+    @wrap_mxnp_np_ufunc
+    def __floordiv__(self, other):
+        """x.__floordiv__(y) <=> x // y"""
+        return floor_divide(self, other)
+
+    @wrap_mxnp_np_ufunc
+    def __ifloordiv__(self, other):
+        """x.__ifloordiv__(y) <=> x //= y"""
+        if not self.writable:
+            raise ValueError('trying to divide from a readonly ndarray')
+        return floor_divide(self, other, out=self)
+
+    @wrap_mxnp_np_ufunc
+    def __rfloordiv__(self, other):
+        """x.__rfloordiv__(y) <=> y // x"""
+        return floor_divide(other, self)
+
     def __neg__(self):
         """x.__neg__() <=> -x"""
         return negative(self)
@@ -3435,6 +3452,50 @@ def true_divide(x1, x2, out=None):
 
 @set_module('mxnet.numpy')
 @wrap_np_binary_func
+def floor_divide(x1, x2, out=None):
+    """Return the largest integer smaller or equal to the division of the 
inputs.
+
+    It is equivalent to the Python // operator and pairs with the Python % 
(remainder),
+    function so that a = a % b + b * (a // b) up to roundoff.
+
+    Parameters
+    ----------
+    x1 : ndarray or scalar
+        Dividend array.
+    x2 : ndarray or scalar
+        Divisor array.
+    out : ndarray
+        A location into which the result is stored. If provided, it must have 
a shape
+        that the inputs broadcast to. If not provided or None, a 
freshly-allocated array
+        is returned.
+
+    Returns
+    -------
+    out : ndarray or scalar
+        This is a scalar if both x1 and x2 are scalars.
+
+    .. note::
+
+       This operator now supports automatic type promotion. The resulting type 
will be determined
+       according to the following rules:
+
+       * If both inputs are of floating number types, the output is the more 
precise type.
+       * If only one of the inputs is floating number type, the result is that 
type.
+       * If both inputs are of integer types (including boolean), the output 
is the more
+         precise type
+
+    Examples
+    --------
+    >>> np.floor_divide(7,3)
+    2
+    >>> np.floor_divide([1., 2., 3., 4.], 2.5)
+    array([ 0.,  0.,  1.,  1.])
+    """
+    return _mx_nd_np.floor_divide(x1, x2, out=out)
+
+
+@set_module('mxnet.numpy')
+@wrap_np_binary_func
 def mod(x1, x2, out=None, **kwargs):
     """
     Return element-wise remainder of division.
diff --git a/python/mxnet/numpy_dispatch_protocol.py 
b/python/mxnet/numpy_dispatch_protocol.py
index c293621..ac86019 100644
--- a/python/mxnet/numpy_dispatch_protocol.py
+++ b/python/mxnet/numpy_dispatch_protocol.py
@@ -254,6 +254,7 @@ _NUMPY_ARRAY_UFUNC_LIST = [
     'logaddexp',
     'subtract',
     'multiply',
+    'floor_divide',
     'true_divide',
     'negative',
     'power',
diff --git a/src/api/operator/numpy/np_elemwise_broadcast_op.cc 
b/src/api/operator/numpy/np_elemwise_broadcast_op.cc
index b9f1060..067d419 100644
--- a/src/api/operator/numpy/np_elemwise_broadcast_op.cc
+++ b/src/api/operator/numpy/np_elemwise_broadcast_op.cc
@@ -61,6 +61,15 @@ MXNET_REGISTER_API("_npi.true_divide")
       UFuncHelper(args, ret, op, op_scalar, op_rscalar);
     });
 
+MXNET_REGISTER_API("_npi.floor_divide")
+    .set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) {
+      using namespace runtime;
+      const nnvm::Op* op         = Op::Get("_npi_floor_divide");
+      const nnvm::Op* op_scalar  = Op::Get("_npi_floor_divide_scalar");
+      const nnvm::Op* op_rscalar = Op::Get("_npi_rfloor_divide_scalar");
+      UFuncHelper(args, ret, op, op_scalar, op_rscalar);
+    });
+
 MXNET_REGISTER_API("_npi.mod").set_body([](runtime::MXNetArgs args, 
runtime::MXNetRetValue* ret) {
   using namespace runtime;
   const nnvm::Op* op         = Op::Get("_npi_mod");
diff --git a/src/common/cuda/rtc/forward_functions-inl.h 
b/src/common/cuda/rtc/forward_functions-inl.h
index 333ae04..2b45709 100644
--- a/src/common/cuda/rtc/forward_functions-inl.h
+++ b/src/common/cuda/rtc/forward_functions-inl.h
@@ -261,6 +261,26 @@ rsub(const DType a, const DType2 b) {
 
 template <typename DType, typename DType2>
 __device__ inline mixed_type<DType, DType2>
+floor_divide(const DType a, const DType2 b) {
+  if (type_util::has_double_or_integral<DType, DType2>::value) {
+    return ::floor((double)a / (double)b);
+  } else {
+    return ::floorf((float)a / (float)b);
+  }
+}
+
+template <typename DType, typename DType2>
+__device__ inline mixed_type<DType, DType2>
+rfloor_divide(const DType a, const DType2 b) {
+  if (type_util::has_double_or_integral<DType, DType2>::value) {
+    return ::floor((double)b / (double)a);
+  } else {
+    return ::floorf((float)b / (float)a);
+  }
+}
+
+template <typename DType, typename DType2>
+__device__ inline mixed_type<DType, DType2>
 mul(const DType a, const DType2 b) {
   return a * b;
 }
diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h
index 677d924..34f852d 100644
--- a/src/operator/mshadow_op.h
+++ b/src/operator/mshadow_op.h
@@ -231,6 +231,118 @@ struct rtrue_divide : public mxnet_op::tunable {
   }
 };
 
+/***** floor_divide ******/
+
+struct floor_divide : public mxnet_op::tunable {
+  template <
+      typename DType,
+      typename std::enable_if<!std::is_same<DType, bool>::value && 
std::is_integral<DType>::value,
+                              int>::type = 0>
+  MSHADOW_XINLINE static DType Map(DType a, DType b) {
+    DType c = static_cast<DType>(::floor(a / b));
+    if ((c * a != b) && ((a < 0) != (b < 0))) {
+      return DType(c - 1);
+    } else {
+      return c;
+    }
+  }
+
+  MSHADOW_XINLINE static bool Map(bool a, bool b) {
+    return static_cast<bool>(::floor(a / b));
+  }
+
+  template <
+      typename DType,
+      typename std::enable_if<!std::is_integral<DType>::value && 
!std::is_same<DType, float>::value,
+                              int>::type = 0>
+  MSHADOW_XINLINE static DType Map(DType a, DType b) {
+    return ::floor(a / b);
+  }
+
+  MSHADOW_XINLINE static float Map(float a, float b) {
+    return ::floorf(a / b);
+  }
+};
+
+struct rfloor_divide : public mxnet_op::tunable {
+  template <
+      typename DType,
+      typename std::enable_if<!std::is_same<DType, bool>::value && 
std::is_integral<DType>::value,
+                              int>::type = 0>
+  MSHADOW_XINLINE static DType Map(DType a, DType b) {
+    DType c = static_cast<DType>(::floor(b / a));
+    if ((c * a != b) && ((a < 0) != (b < 0))) {
+      return DType(c - 1);
+    } else {
+      return c;
+    }
+  }
+
+  MSHADOW_XINLINE static bool Map(bool a, bool b) {
+    return static_cast<bool>(::floor(b / a));
+  }
+
+  template <
+      typename DType,
+      typename std::enable_if<!std::is_integral<DType>::value && 
!std::is_same<DType, float>::value,
+                              int>::type = 0>
+  MSHADOW_XINLINE static DType Map(DType a, DType b) {
+    return ::floor(b / a);
+  }
+
+  MSHADOW_XINLINE static float Map(float a, float b) {
+    return ::floorf(b / a);
+  }
+};
+
+struct mixed_floor_divide {
+  template <typename DType, typename 
std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, 
mshadow::half::half_t b) {
+    return ::floor(a / static_cast<mshadow::half::half_t>(b));
+  }
+
+  template <typename DType,
+            typename std::enable_if<std::is_same<DType, 
mshadow::half::half_t>::value ||
+                                        std::is_integral<DType>::value,
+                                    int>::type = 0>
+  MSHADOW_XINLINE static float Map(DType a, float b) {
+    return ::floorf(a / static_cast<float>(b));
+  }
+
+  template <typename DType,
+            typename std::enable_if<std::is_same<DType, 
mshadow::half::half_t>::value ||
+                                        std::is_same<DType, float>::value ||
+                                        std::is_integral<DType>::value,
+                                    int>::type = 0>
+  MSHADOW_XINLINE static double Map(DType a, double b) {
+    return ::floor(a / static_cast<double>(b));
+  }
+};
+
+struct mixed_rfloor_divide {
+  template <typename DType, typename 
std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, 
mshadow::half::half_t b) {
+    return ::floor(b / static_cast<mshadow::half::half_t>(a));
+  }
+
+  template <typename DType,
+            typename std::enable_if<std::is_same<DType, 
mshadow::half::half_t>::value ||
+                                        std::is_integral<DType>::value,
+                                    int>::type = 0>
+  MSHADOW_XINLINE static float Map(DType a, float b) {
+    return ::floorf(b / static_cast<float>(a));
+  }
+
+  template <typename DType,
+            typename std::enable_if<std::is_same<DType, 
mshadow::half::half_t>::value ||
+                                        std::is_same<DType, float>::value ||
+                                        std::is_integral<DType>::value,
+                                    int>::type = 0>
+  MSHADOW_XINLINE static double Map(DType a, double b) {
+    return ::floor(b / static_cast<double>(a));
+  }
+};
+
 MXNET_BINARY_MATH_OP_NC(left, a);
 
 MXNET_BINARY_MATH_OP_NC(right, b);
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc 
b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc
index c0d6b40..4fd1f2c 100644
--- a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc
+++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc
@@ -61,5 +61,14 @@ MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rpower_scalar)
 .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, 
mshadow_op::rpower>)
 .set_attr<nnvm::FGradient>("FGradient", 
ElemwiseGradUseOut{"_backward_rpower_scalar"});
 
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_floor_divide_scalar)
+    .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, 
op::mshadow_op::floor_divide>)
+    .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rfloor_divide_scalar)
+    .set_attr<FCompute>("FCompute<cpu>",
+                        BinaryScalarOp::Compute<cpu, 
op::mshadow_op::rfloor_divide>)
+    .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
+
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu 
b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu
index 024d02a..c7bbeef 100644
--- a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu
+++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu
@@ -51,5 +51,11 @@ NNVM_REGISTER_OP(_npi_power_scalar)
 NNVM_REGISTER_OP(_npi_rpower_scalar)
 .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"rpow"});
 
+NNVM_REGISTER_OP(_npi_floor_divide_scalar)
+    .set_attr<FCompute>("FCompute<gpu>", 
BinaryScalarRTCCompute{"floor_divide"});
+
+NNVM_REGISTER_OP(_npi_rfloor_divide_scalar)
+    .set_attr<FCompute>("FCompute<gpu>", 
BinaryScalarRTCCompute{"rfloor_divide"});
+
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/numpy/np_floor_divide.cc 
b/src/operator/numpy/np_floor_divide.cc
new file mode 100644
index 0000000..78f6cf5
--- /dev/null
+++ b/src/operator/numpy/np_floor_divide.cc
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file np_floor_divide.cc
+ * \brief CPU Implementation of floor_divide operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_floor_divide)
+    .set_attr<FCompute>("FCompute<cpu>",
+                        NumpyBinaryBroadcastComputeWithBool<cpu,
+                                                            
op::mshadow_op::floor_divide,
+                                                            
op::mshadow_op::mixed_floor_divide,
+                                                            
op::mshadow_op::mixed_rfloor_divide>)
+    .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_floor_divide.cu 
b/src/operator/numpy/np_floor_divide.cu
new file mode 100644
index 0000000..54fbd9d
--- /dev/null
+++ b/src/operator/numpy/np_floor_divide.cu
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file np_floor_divide.cu
+ * \brief GPU Implementation of floor_divide operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_npi_floor_divide)
+    .set_attr<FCompute>("FCompute<gpu>", 
BinaryBroadcastRTCCompute{"floor_divide"});
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/operator_tune.cc b/src/operator/operator_tune.cc
index 02cf907..d36a881 100644
--- a/src/operator/operator_tune.cc
+++ b/src/operator/operator_tune.cc
@@ -362,17 +362,21 @@ 
IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::plus);
 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::minus);         
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::mul);           
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::div);           
    // NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::floor_divide);  
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::true_divide);             
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::minus_sign);              
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rminus);                  
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rdiv);                    
    // NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rfloor_divide);           
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::plus);                    
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::minus);                   
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::mul);                     
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::div);                     
    // NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::floor_divide);            
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::minus_sign);              
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rminus);                  
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rdiv);                    
    // NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rfloor_divide);           
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rtrue_divide);            
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::div_grad);                
    // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::div_grad);                
    // NOLINT()
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h 
b/src/operator/tensor/elemwise_binary_broadcast_op.h
index 9bfcbc7..ef7bb83 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op.h
+++ b/src/operator/tensor/elemwise_binary_broadcast_op.h
@@ -321,7 +321,7 @@ void BinaryBroadcastComputeWithBool(const nnvm::NodeAttrs& 
attrs,
     if (req[0] == kNullOp)
       return;
     mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
-    MSHADOW_TYPE_SWITCH_WITH_BOOL(outputs[0].type_flag_, DType, {
+    MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(outputs[0].type_flag_, DType, {
       BROADCAST_NDIM_SWITCH(ndim, NDim, {
         mshadow::Shape<NDim> oshape  = new_oshape.get<NDim>();
         mshadow::Shape<NDim> lstride = 
mxnet_op::calc_stride(new_lshape.get<NDim>());
diff --git a/src/operator/tensor/elemwise_binary_op.h 
b/src/operator/tensor/elemwise_binary_op.h
index aa350b8..8339f20 100644
--- a/src/operator/tensor/elemwise_binary_op.h
+++ b/src/operator/tensor/elemwise_binary_op.h
@@ -560,7 +560,7 @@ class ElemwiseBinaryOp : public OpBase {
     CHECK_EQ(inputs.size(), 2U);
     CHECK_EQ(outputs.size(), 1U);
     MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-      MSHADOW_TYPE_SWITCH_WITH_BOOL(outputs[0].type_flag_, DType, {
+      MSHADOW_TYPE_SWITCH_EXT_WITH_BOOL(outputs[0].type_flag_, DType, {
         const size_t size = (minthree(outputs[0].Size(), inputs[0].Size(), 
inputs[1].Size()) +
                              DataType<DType>::kLanes - 1) /
                             DataType<DType>::kLanes;
diff --git a/tests/python/unittest/test_numpy_interoperability.py 
b/tests/python/unittest/test_numpy_interoperability.py
index 09deace..c8edad6 100644
--- a/tests/python/unittest/test_numpy_interoperability.py
+++ b/tests/python/unittest/test_numpy_interoperability.py
@@ -1586,6 +1586,17 @@ def _add_workload_fmod(array_pool):
     OpArgMngr.add_workload('fmod', array_pool['4x1'], array_pool['1x1x0'])
 
 
+def _add_workload_floor_divide(array_pool):
+    OpArgMngr.add_workload('floor_divide', array_pool['4x1'], 
array_pool['1x2'])
+    OpArgMngr.add_workload('floor_divide', array_pool['4x1'], 2)
+    OpArgMngr.add_workload('floor_divide', 2, array_pool['4x1'])
+    OpArgMngr.add_workload('floor_divide', array_pool['4x1'], 
array_pool['1x1x0'])
+    OpArgMngr.add_workload('floor_divide', np.array([-1, -2, -3], np.float32), 
1.9999)
+    OpArgMngr.add_workload('floor_divide', np.array([1000, -200, -3], 
np.int64), 3)
+    OpArgMngr.add_workload('floor_divide', np.array([1, -2, -3, 4, -5], 
np.int32), 2.0001)
+    OpArgMngr.add_workload('floor_divide', np.array([1, -50, -0.2, 40000, 0], 
np.float64), -7)
+
+
 def _add_workload_remainder():
     # test remainder basic
     OpArgMngr.add_workload('remainder', np.array([0, 1, 2, 4, 2], 
dtype=np.float16),
@@ -3095,6 +3106,7 @@ def _prepare_workloads():
     _add_workload_power(array_pool)
     _add_workload_mod(array_pool)
     _add_workload_fmod(array_pool)
+    _add_workload_floor_divide(array_pool)
     _add_workload_remainder()
     _add_workload_maximum(array_pool)
     _add_workload_fmax(array_pool)
diff --git a/tests/python/unittest/test_numpy_op.py 
b/tests/python/unittest/test_numpy_op.py
index b7d2c86..bf32c69 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -3070,6 +3070,8 @@ def test_np_binary_funcs():
                                 [lambda y, x1, x2: onp.broadcast_to(x1, 
y.shape)]),
         'divide': (0.1, 1.0, [lambda y, x1, x2: onp.ones(y.shape) / x2],
                    [lambda y, x1, x2: -x1 / (x2 * x2)]),
+        'floor_divide': (0.1, 1.0, [lambda y, x1, x2: onp.zeros(y.shape)],
+                 [lambda y, x1, x2: onp.zeros(y.shape)]),
         'mod': (1.0, 10.0,
                 [lambda y, x1, x2: onp.ones(y.shape),
                  lambda y, x1, x2: onp.zeros(y.shape)],

Reply via email to