sxjscience commented on a change in pull request #18319:
URL: https://github.com/apache/incubator-mxnet/pull/18319#discussion_r437876054



##########
File path: src/operator/numpy/np_indexing_op.cc
##########
@@ -0,0 +1,544 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file np_indexing_op.cc
+*/
+
+#include "./np_indexing_op.h"
+
+namespace mxnet {
+namespace op {
+
+struct AdvancedIndexingTakeCPU {
+  // assume that idx have been flattened to a 1-D tensor (N,)
+  // assume that out_data and in_data have been flattened to 2-D tensors, (N, 
M) and (K, M)
+  // M is the number of columns of in_data and out_data
+  // K is the number of rows of in_data
+  // i is the index of out_data
+  template<typename DType, typename IType>
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* 
in_data,
+                                  const IType* idx, const size_t M, const 
int64_t K) {
+    int64_t j = static_cast<int64_t>(idx[i]);
+    j = j % K;
+    j += (j < 0) ? K : 0;
+#pragma GCC diagnostic push
+#if __GNUC__ >= 8
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+    std::memcpy(out_data + i * M, in_data + j * M, M * sizeof(DType));
+#pragma GCC diagnostic pop
+  }
+};
+
+struct AdvancedIndexingTakeMultiDimensionCPU {
+  // assume that idx have been flattened to a 1-D tensor (N,)
+  // assume that out_data and in_data have been flattened to 2-D tensors, (N, 
M) and (K, M)
+  // M is the number of columns of in_data and out_data
+  // K is the number of rows of in_data
+  // i is the index of out_data
+  template<typename DType, typename IType>
+  MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* 
in_data,
+                                  const IType* idx, const size_t M, const 
int64_t K) {
+    int64_t j = static_cast<int64_t>(idx[i]);
+    j = j % K;
+    j += (j < 0) ? K : 0;
+#pragma GCC diagnostic push
+#if __GNUC__ >= 8
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+    std::memcpy(out_data + i * M, in_data + (i * K + j) * M, M * 
sizeof(DType));
+#pragma GCC diagnostic pop
+  }
+};
+
+struct AdvancedIndexingBooleanMaskBackwardCPUWriteKernel {
+  template<typename DType>
+  static void Map(int i,
+                  DType* igrad,
+                  const OpReqType /*req*/,
+                  const DType* ograd,
+                  const int32_t* idx,
+                  const size_t col_size) {
+    // i is row id already
+    int32_t prev = (i == 0) ? 0 : idx[i - 1];
+    int32_t curr = idx[i];
+#pragma GCC diagnostic push
+#if __GNUC__ >= 8
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+    if (prev != curr) {
+      std::memcpy(igrad + i * col_size, ograd + prev * col_size, col_size * 
sizeof(DType));
+    } else {
+      std::memset(igrad + i * col_size, 0, col_size * sizeof(DType));
+    }
+#pragma GCC diagnostic pop
+  }
+};
+
+template<typename DType>
+bool CheckIndexOutOfBound(const DType* data_ptr, size_t data_size,
+                          const DType min, const DType max) {
+  bool is_valid = true;
+  for (size_t i = 0; i < data_size; i++) {
+    if (data_ptr[i] > max || data_ptr[i] < min) {
+      is_valid = false;
+      break;
+    }
+  }
+  return is_valid;
+}
+
+inline bool AdvancedIndexingOpType(const nnvm::NodeAttrs& attrs,
+                       std::vector<int> *in_attrs,
+                       std::vector<int> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 2U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  CHECK_NE((*in_attrs)[1], -1) << "Index type must be set for take operator";
+
+  TYPE_ASSIGN_CHECK(*out_attrs, 0, (*in_attrs)[0]);
+  TYPE_ASSIGN_CHECK(*in_attrs, 0, (*out_attrs)[0]);
+  return (*in_attrs)[0] != -1;
+}
+
+bool AdvancedIndexingOpStorageType(const nnvm::NodeAttrs& attrs,
+                            const int dev_mask,
+                            DispatchMode* dispatch_mode,
+                            std::vector<int> *in_attrs,
+                            std::vector<int> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 2);
+  CHECK_EQ(out_attrs->size(), 1);
+  for (int &attr : *in_attrs) {
+    CHECK_EQ(attr, kDefaultStorage) << "Only default storage is supported";
+  }
+  for (int &attr : *out_attrs) {
+    attr = kDefaultStorage;
+  }
+  *dispatch_mode = DispatchMode::kFComputeEx;
+  return true;
+}
+
+bool AdvancedIndexingOpBackStorageType(const nnvm::NodeAttrs& attrs,
+                                const int dev_mask,
+                                DispatchMode* dispatch_mode,
+                                std::vector<int> *in_attrs,
+                                std::vector<int> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 3);
+  CHECK_EQ(out_attrs->size(), 2);
+  for (int &attr : *in_attrs) {
+    CHECK_EQ(attr, kDefaultStorage) << "Only default storage is supported";
+  }
+  for (int &attr : *out_attrs) {
+    attr = kDefaultStorage;
+  }
+  for (size_t i = 0; i < out_attrs->size(); i++)
+    out_attrs->at(i) = kDefaultStorage;
+  *dispatch_mode = DispatchMode::kFComputeEx;
+  return true;
+}
+
+template<>
+void AdvancedIndexingOpForward<cpu>(const nnvm::NodeAttrs& attrs,
+                        const OpContext& ctx,
+                        const std::vector<NDArray>& inputs,
+                        const std::vector<OpReqType>& req,
+                        const std::vector<NDArray>& outputs) {
+  using namespace mxnet_op;
+  if (req[np_indexing_::kOut] == kNullOp) return;
+  CHECK_EQ(inputs.size(), 2U);
+  CHECK_EQ(outputs.size(), 1U);
+
+  if (inputs[np_indexing_::kIdx].dtype() == mshadow::kBool) {
+    CHECK(req[0] == kWriteTo || req[0] == kWriteInplace);
+    const NDArray &data = inputs[0];
+    const NDArray &idx = inputs[1];
+    const NDArray &out = outputs[0];
+    CHECK_EQ(data.shape()[0], idx.shape()[0]);
+    CHECK_EQ(idx.shape().ndim(), 1U);  // idx is required to be 1-d.
+    // count the number of 1s in `idx`, so that we could know the output 
dimension
+    size_t idx_size = idx.shape()[0];
+    std::vector<int32_t> prefix_sum(idx_size, 0);
+    size_t valid_num = 0;
+    // Calculate prefix sum
+    bool* idx_dptr = idx.data().dptr<bool>();
+    for (size_t i = 0; i < idx_size; i++) {
+      prefix_sum[i] = (i == 0) ? 0 : prefix_sum[i - 1];
+      prefix_sum[i] += (idx_dptr[i]) ? 1 : 0;
+    }
+    valid_num = prefix_sum[idx_size - 1];
+    // set the output shape forcefully
+    mxnet::TShape s = data.shape();
+    s[0] = valid_num;
+
+    const_cast<NDArray &>(out).Init(s);
+    // do the copy
+    MSHADOW_TYPE_SWITCH_WITH_BOOL(data.dtype(), DType, {
+      size_t input_size = data.shape().Size();
+      size_t col_size = input_size / idx_size;
+      mshadow::Stream<cpu> *stream = ctx.get_stream<cpu>();
+      mxnet_op::Kernel<BooleanMaskForwardCPUKernel, cpu>::Launch(
+        stream, idx_size, out.data().dptr<DType>(), data.data().dptr<DType>(),
+        prefix_sum.data(), col_size);
+    });
+  } else if (inputs[np_indexing_::kIdx].dtype() == mshadow::kInt8 ||
+             inputs[np_indexing_::kIdx].dtype() == mshadow::kInt16 ||
+             inputs[np_indexing_::kIdx].dtype() == mshadow::kInt32 ||
+             inputs[np_indexing_::kIdx].dtype() == mshadow::kInt64) {
+    using namespace mshadow;
+    const mxnet::TShape& idxshape = inputs[np_indexing_::kIdx].shape();
+    const mxnet::TShape& arrshape = inputs[np_indexing_::kArr].shape();
+
+    if (idxshape.Size() == 0) {
+      return;
+    }
+
+    mxnet::TShape oshape(idxshape.ndim() + arrshape.ndim() - 1, -1);
+    for (index_t i = 0; i < idxshape.ndim(); ++i) {
+      oshape[i] = idxshape[i];
+    }
+    for (index_t i = 0; i < arrshape.ndim(); i++) {
+      if (i < 0) {
+        oshape[i] = arrshape[i];
+      } else if (i > 0) {
+        oshape[i + idxshape.ndim() - 1] = arrshape[i];
+      }
+    }
+
+    const NDArray &out = outputs[0];
+    const_cast<NDArray &>(out).Init(oshape);
+
+    Stream<cpu> *s = ctx.get_stream<cpu>();
+
+    MSHADOW_TYPE_SWITCH_WITH_BOOL(outputs[np_indexing_::kOut].dtype(), DType, 
{  // output data type
+      MSHADOW_TYPE_SWITCH_WITH_BOOL(inputs[np_indexing_::kIdx].dtype(), IType, 
{  // index data type
+        IType min = 0;
+        IType max = static_cast<IType>(arrshape[0] - 1);
+        // check with single thread is faster since data is small
+        IType* idx_ptr = inputs[np_indexing_::kIdx].data().dptr<IType>();
+        size_t idx_size = idxshape.Size();
+        bool is_valid = CheckIndexOutOfBound(idx_ptr, idx_size, min, max);
+        CHECK(is_valid) << "take operator contains indices out of bound";
+        Kernel<AdvancedIndexingTakeCPU, cpu>::Launch(s, idxshape.Size(),
+                                                  
outputs[np_indexing_::kOut].data().dptr<DType>(),
+                                                  
inputs[np_indexing_::kArr].data().dptr<DType>(),
+                                                  
inputs[np_indexing_::kIdx].data().dptr<IType>(),
+                                                  
oshape.Size()/idxshape.Size(), arrshape[0]);
+      });
+    });
+  } else {
+    dmlc::LogMessageFatal(__FILE__, __LINE__).stream()
+    << "arrays used as indices must be explictly declared as integer (or 
boolean) type. "
+    << "Use np.astype() to cast indices to integer or boolean.";
+  }
+}
+
+template<>
+void AdvancedIndexingMultipleOpForward<cpu>(const nnvm::NodeAttrs& attrs,
+                        const OpContext& ctx,
+                        const std::vector<NDArray>& inputs,
+                        const std::vector<OpReqType>& req,
+                        const std::vector<NDArray>& outputs) {
+  using namespace mxnet_op;
+  if (req[np_indexing_::kOut] == kNullOp) return;
+  CHECK_EQ(inputs.size(), 2U);
+  CHECK_EQ(outputs.size(), 1U);
+
+  if (inputs[np_indexing_::kIdx].dtype() == mshadow::kBool) {
+    dmlc::LogMessageFatal(__FILE__, __LINE__).stream()
+    << "Multi-dimension boolean indexing is not supported.";
+  } else if (inputs[np_indexing_::kIdx].dtype() == mshadow::kInt8 ||
+             inputs[np_indexing_::kIdx].dtype() == mshadow::kInt16 ||
+             inputs[np_indexing_::kIdx].dtype() == mshadow::kInt32 ||
+             inputs[np_indexing_::kIdx].dtype() == mshadow::kInt64) {
+    using namespace mshadow;
+    const mxnet::TShape& idxshape = inputs[np_indexing_::kIdx].shape();
+    const mxnet::TShape& arrshape = inputs[np_indexing_::kArr].shape();
+
+    if (idxshape.Size() == 0 || idxshape.Size() == 1) {
+      return;
+    }
+
+    CHECK_EQ(arrshape[0], idxshape[0]);  // size of index must equal to size 
of array
+
+    mxnet::TShape oshape(arrshape.ndim() - 1, -1);
+    oshape[0] = arrshape[0];
+    for (index_t i = 2; i < arrshape.ndim(); i++) {
+      oshape[i-1] = arrshape[i];
+    }
+
+    const NDArray &out = outputs[0];
+    const_cast<NDArray &>(out).Init(oshape);
+
+    Stream<cpu> *s = ctx.get_stream<cpu>();
+
+    MSHADOW_TYPE_SWITCH_WITH_BOOL(outputs[np_indexing_::kOut].dtype(), DType, 
{  // output data type
+      MSHADOW_TYPE_SWITCH_WITH_BOOL(inputs[np_indexing_::kIdx].dtype(), IType, 
{  // index data type
+        IType min = 0;
+        IType max = static_cast<IType>(arrshape[1] - 1);
+        // check with single thread is faster since data is small
+        IType* idx_ptr = inputs[np_indexing_::kIdx].data().dptr<IType>();
+        size_t idx_size = idxshape.Size();
+        bool is_valid = CheckIndexOutOfBound(idx_ptr, idx_size, min, max);
+        CHECK(is_valid) << "take operator contains indices out of bound";
+        Kernel<AdvancedIndexingTakeMultiDimensionCPU, cpu>::Launch(s, 
idxshape.Size(),
+                                    
outputs[np_indexing_::kOut].data().dptr<DType>(),
+                                    
inputs[np_indexing_::kArr].data().dptr<DType>(),
+                                    
inputs[np_indexing_::kIdx].data().dptr<IType>(),
+                                    oshape.Size()/idxshape.Size(), 
arrshape[1]);
+      });
+    });
+  } else {
+    dmlc::LogMessageFatal(__FILE__, __LINE__).stream()
+    << "arrays used as indices must be explictly declared as integer (or 
boolean) type. "
+    << "Use np.astype() to cast indices to integer or boolean.";
+  }
+}
+
+template<>
+void AdvancedIndexingOpBackward<cpu>(const nnvm::NodeAttrs& attrs,
+                                     const OpContext &ctx,
+                                     const std::vector<NDArray> &inputs,
+                                     const std::vector<OpReqType> &req,
+                                     const std::vector<NDArray> &outputs) {
+  CHECK_EQ(inputs.size(), 3U);
+  CHECK_EQ(outputs.size(), 2U);
+  if (req[0] == kNullOp) return;
+
+  if (inputs[np_indexing_::kIdx+1].dtype() == mshadow::kBool) {
+    // inputs: {ograd, data, idx}
+    // outputs: {igrad_data, igrad_idx}
+    const NDArray& ograd = inputs[0];
+    const NDArray& idx = inputs[2];
+    const NDArray& igrad_data = outputs[0];
+    MSHADOW_TYPE_SWITCH(igrad_data.dtype(), DType, {
+      MSHADOW_TYPE_SWITCH_WITH_BOOL(idx.dtype(), IType, {
+        size_t input_size = igrad_data.shape().Size();
+        size_t idx_size = idx.shape()[0];
+        size_t col_size = input_size / idx_size;
+        std::vector<int32_t> prefix_sum(idx_size, 0);
+        bool* idx_dptr = idx.data().dptr<bool>();
+        for (size_t i = 0; i < idx_size; i++) {
+          prefix_sum[i] = (i == 0) ? 0 : prefix_sum[i - 1];
+          prefix_sum[i] += (idx_dptr[i]) ? 1 : 0;
+        }
+        mshadow::Stream<cpu> *stream = ctx.get_stream<cpu>();
+        if (req[0] == kAddTo) {
+          mxnet_op::Kernel<BooleanMaskBackwardKernel, cpu>::Launch(
+            stream, idx_size, igrad_data.data().dptr<DType>(), req[0],
+            ograd.data().dptr<DType>(), prefix_sum.data(), col_size);
+        } else {
+          mxnet_op::Kernel<AdvancedIndexingBooleanMaskBackwardCPUWriteKernel, 
cpu>::Launch(
+            stream, idx_size, igrad_data.data().dptr<DType>(), req[0],
+            ograd.data().dptr<DType>(), prefix_sum.data(), col_size);
+        }
+      });
+    });
+  } else if (inputs[np_indexing_::kIdx+1].dtype() == mshadow::kInt8 ||
+             inputs[np_indexing_::kIdx+1].dtype() == mshadow::kInt16 ||
+             inputs[np_indexing_::kIdx+1].dtype() == mshadow::kInt32 ||
+             inputs[np_indexing_::kIdx+1].dtype() == mshadow::kInt64) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_NE(req[np_indexing_::kIdx], kAddTo)
+      << "take layer doesn't support gradient of req type kAddTo to index";
+
+    // grad_out is the gradient of the outputs in the feed-forward
+    // grad_in is the gradient of the inputs in the feed-forward
+    Stream<cpu> *s = ctx.get_stream<cpu>();
+
+    MSHADOW_TYPE_SWITCH(outputs[0].dtype(), DType, {  // output data type
+      MSHADOW_TYPE_SWITCH(inputs[2].dtype(), IType, {  // index data type
+        // inputs are specified in the .cc file, which are the gradients from
+        // the upper layer and the input index
+        // outputs are the gradients of inputs in the feed-forward pass
+        const mxnet::TShape& idxshape = inputs[2].shape();
+        const mxnet::TShape& arrshape = outputs[0].shape();
+        const mxnet::TShape& oshape = inputs[0].shape();
+
+        if (idxshape.Size() == 0) {
+          return;
+        }
+
+        if (req[np_indexing_::kIdx] != kNullOp) {
+          mxnet_op::Kernel<mxnet_op::set_zero, cpu>::Launch(
+            s, idxshape.Size(), 
outputs[np_indexing_::kIdx].data().dptr<IType>());
+        }
+
+        int idxndim = idxshape.ndim();
+        Tensor<cpu, 1, IType> idx = inputs[2].data().get_with_shape<cpu, 1, 
IType>(
+            Shape1(idxshape.ProdShape(0, idxndim)), s);
+        Tensor<cpu, 2, DType> grad_out = inputs[0].data().get_with_shape<cpu, 
2, DType>(
+            Shape2(oshape.ProdShape(0, idxndim), oshape.ProdShape(idxndim, 
oshape.ndim())), s);
+        Tensor<cpu, 2, DType> grad_in = outputs[0].data().get_with_shape<cpu, 
2, DType>(
+            Shape2(arrshape[0], arrshape.ProdShape(1, arrshape.ndim())), s);
+
+        // re-using the previous code for axis = 0 case
+        if (req[np_indexing_::kArr] == kWriteTo || req[np_indexing_::kArr] == 
kAddTo) {
+          if (req[np_indexing_::kArr] == kWriteTo) {
+            grad_in = scalar<DType>(0.0f);
+          }
+          AddTakeGrad<false>(grad_in, idx, grad_out);
+        } else {
+          LOG(FATAL) << "wrong req";
+        }
+      });
+    });
+  } else {
+    dmlc::LogMessageFatal(__FILE__, __LINE__).stream()
+    << "arrays used as indices must be explictly declared as integer (or 
boolean) type. "
+    << "Use np.astype() to cast indices to integer or boolean.";
+  }
+}
+
+template<>
+void AdvancedIndexingMultipleOpBackward<cpu>(const nnvm::NodeAttrs& attrs,
+                                     const OpContext &ctx,
+                                     const std::vector<NDArray> &inputs,
+                                     const std::vector<OpReqType> &req,
+                                     const std::vector<NDArray> &outputs) {
+  CHECK_EQ(inputs.size(), 3U);
+  CHECK_EQ(outputs.size(), 2U);
+  if (req[0] == kNullOp) return;
+
+  if (inputs[np_indexing_::kIdx+1].dtype() == mshadow::kBool) {
+    dmlc::LogMessageFatal(__FILE__, __LINE__).stream()
+    << "Multi-dimension boolean indexing is not supported.";
+  } else if (inputs[np_indexing_::kIdx+1].dtype() == mshadow::kInt8 ||
+             inputs[np_indexing_::kIdx+1].dtype() == mshadow::kInt16 ||
+             inputs[np_indexing_::kIdx+1].dtype() == mshadow::kInt32 ||
+             inputs[np_indexing_::kIdx+1].dtype() == mshadow::kInt64) {
+    using namespace mxnet_op;
+    using namespace mshadow;
+    if (req[0] == kNullOp) return;
+    mshadow::Stream<cpu> *s = ctx.get_stream<cpu>();
+
+    const mxnet::TShape& ishape = outputs[0].shape();
+    const index_t axis = 0;
+    int leading = 1, trailing = 1, M = ishape[0];
+    for (index_t i = 0; i < axis; ++i) leading *= ishape[i];
+    for (index_t i = axis+1; i < ishape.ndim(); ++i) trailing *= ishape[i];
+
+    MSHADOW_TYPE_SWITCH(outputs[0].data().type_flag_, DType, {  // output type
+      MSHADOW_TYPE_SWITCH(inputs[2].dtype(), IType, {  // index type
+        if (req[0] != kAddTo) outputs[0].data().FlatTo1D<cpu, DType>(s) = 0;
+        if (trailing == 1) {
+          Kernel<pick_grad<2, true>, cpu>::Launch(s, inputs[0].data().Size(),
+          outputs[0].data().dptr<DType>(), inputs[0].data().dptr<DType>(),
+          inputs[2].data().dptr<IType>(), M,
+          1, Shape2(leading, M), Shape2(leading, 1));
+        } else {
+          Kernel<pick_grad<3, true>, cpu>::Launch(s, inputs[0].data().Size(),
+          outputs[0].data().dptr<DType>(), inputs[0].data().dptr<DType>(),
+          inputs[2].data().dptr<IType>(), M,
+          trailing, Shape3(leading, M, trailing),
+          Shape3(leading, 1, trailing));
+        }
+      });
+    });
+  } else {
+    dmlc::LogMessageFatal(__FILE__, __LINE__).stream()

Review comment:
       Is it possible to directly use `LOG(FATAL)`?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to