haojin2 commented on a change in pull request #14359: Speedup
_contrib_index_copy
URL: https://github.com/apache/incubator-mxnet/pull/14359#discussion_r264367160
##########
File path: src/operator/contrib/index_copy.cc
##########
@@ -26,6 +26,99 @@
namespace mxnet {
namespace op {
+struct index_copy_fwd_cpu {
+ template<typename DType, typename IType>
+ static void Map(int i,
+ const DType* new_tensor,
+ const IType* idx,
+ DType* out_tensor,
+ int dim_size) {
+ DType* out_ptr = out_tensor + static_cast<int>(idx[i]) * dim_size;
+ const DType* new_ptr = new_tensor + i * dim_size;
+ std::memcpy(out_ptr, new_ptr, sizeof(DType) * dim_size);
+ }
+};
+
+template<>
+void IndexCopyForward<cpu>(const nnvm::NodeAttrs& attrs,
+ const OpContext& ctx,
+ const std::vector<TBlob>& inputs,
+ const std::vector<OpReqType>& req,
+ const std::vector<TBlob>& outputs) {
+ using namespace mshadow;
+ using namespace mxnet_op;
+ CHECK_EQ(inputs.size(), 3U);
+ CHECK_EQ(outputs.size(), 1U);
+ CHECK_EQ(req.size(), 1U);
+ mshadow::Stream<cpu> *s = ctx.get_stream<cpu>();
+ const TBlob& out = outputs[0];
+ const TBlob& original_tensor = inputs[0];
+ const TBlob& idx_vector = inputs[1];
+ const TBlob& copied_tensor = inputs[2];
+ int dim_size = inputs[2].Size() / inputs[1].Size();
+ // copy original tensor to output
+ copy(s, out, original_tensor);
+ // index copy
+ MSHADOW_TYPE_SWITCH(out.type_flag_, DType, {
+ MSHADOW_TYPE_SWITCH(idx_vector.type_flag_, IType, {
+ Kernel<index_copy_fwd_cpu, cpu>::Launch(
+ s, idx_vector.Size(), copied_tensor.dptr<DType>(),
+ idx_vector.dptr<IType>(), out.dptr<DType>(), dim_size);
+ });
+ });
+}
+
+struct index_copy_bwd_cpu {
+ template<typename DType, typename IType>
+ static void Map(int i,
+ const DType* out_tensor_grad,
+ DType* orig_tensor_grad,
+ DType* new_tensor_grad,
+ const IType* idx,
+ int dim_size,
+ int idx_size) {
+ const DType* out_ptr = out_tensor_grad + i * dim_size;
+ DType* orig_ptr = orig_tensor_grad + i * dim_size;
+ std::memcpy(orig_ptr, out_ptr, sizeof(DType) * dim_size);
+ if (i < idx_size) {
+ const int index = idx[i];
+ DType* new_ptr = new_tensor_grad + i * dim_size;
+ orig_ptr = orig_tensor_grad + index * dim_size;
+ const DType* src_ptr = out_tensor_grad + index * dim_size;
+ std::memcpy(new_ptr, src_ptr, sizeof(DType) * dim_size);
+ std::memset(orig_ptr, 0, sizeof(DType) * dim_size);
+ }
+ }
+};
+
+template<>
+void IndexCopyBackward<cpu>(const nnvm::NodeAttrs& attrs,
+ const OpContext& ctx,
+ const std::vector<TBlob>& inputs,
+ const std::vector<OpReqType>& req,
+ const std::vector<TBlob>& outputs) {
+ using namespace mshadow;
+ using namespace mxnet_op;
+ CHECK_EQ(inputs.size(), 4U);
+ CHECK_EQ(outputs.size(), 3U);
+ Stream<cpu> *s = ctx.get_stream<cpu>();
+ const TBlob& out_grad = inputs[0];
+ const TBlob& index = inputs[2];
+ const TBlob& in_grad_1 = outputs[0];
Review comment:
indices should have no gradients.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services