rongzha1 commented on a change in pull request #12997: A better take forward 
kernel for CPU
URL: https://github.com/apache/incubator-mxnet/pull/12997#discussion_r229213828
 
 

 ##########
 File path: src/operator/tensor/indexing_op.cc
 ##########
 @@ -28,6 +28,28 @@
 namespace mxnet {
 namespace op {
 
+template<bool clip = true>
+struct TakeCPU {
+  // assume that idx have been flattened to a 1-D tensor (N,)
+  // assume that out_data and in_data have been flattened to 2-D tensors, (N, 
M) and (K, M)
+  // M is the number of columns of in_data and out_data
+  // K is the number of rows of in_data
+  // i is the index of out_data
+  template<typename DType, typename IType>
+  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
+                                  const IType* idx, const size_t M, const 
int64_t K) {
+    int64_t j = static_cast<int64_t>(idx[i]);
+    if (clip) {
+      if (j <= 0) j = 0;
+      else if (j >= K) j = K - 1;
+    } else {
+      j = j % K;
+      j += (j < 0) ? K : 0;
+    }
+    std::memcpy(out_data + i * M, in_data + j * M, M * sizeof(DType));
 
 Review comment:
   can you do some test to get a best some_size :
   if( M > some_size) {
     std::memcpy(out_data + i * M, in_data + j * M, M * sizeof(DType))
   } else {
         const int jM = j*M;
         const int iM = i*M;
         for (int n = 0; n < M; n++) {
           out_data[iM + n] = in_data[jM + n];
         }
   }
   
   Small M will have a big overhead

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to