This is an automated email from the ASF dual-hosted git repository.

zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 64287dd  Speed up SequenceReverse (#14627)
64287dd is described below

commit 64287dd23d880c6dd664ed7ab1460ee26c902fcf
Author: Przemyslaw Tredak <[email protected]>
AuthorDate: Mon Apr 29 18:49:53 2019 +0200

    Speed up SequenceReverse (#14627)
---
 src/operator/sequence_reverse-inl.h | 57 ++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/src/operator/sequence_reverse-inl.h 
b/src/operator/sequence_reverse-inl.h
index 03210d3..8e2362f 100644
--- a/src/operator/sequence_reverse-inl.h
+++ b/src/operator/sequence_reverse-inl.h
@@ -64,40 +64,37 @@ struct SequenceReverseParam : public 
dmlc::Parameter<SequenceReverseParam> {
   }
 };
 
+template <OpReqType req>
 struct ReverseKernel {
   template <typename DType, typename IType>
   MSHADOW_XINLINE static void Map(const int i, DType *const out_data,
                                   const DType *const in_data,
-                                  const OpReqType req,
                                   const index_t max_seq_len,
                                   const index_t batch_size,
                                   const index_t other_dim, const index_t numel,
                                   const IType *const indices) {
-    for (index_t batch = 0; batch < batch_size; ++batch) {
-      const index_t num_seq =
-          indices ? static_cast<index_t>(indices[batch]) : max_seq_len;
-      const index_t padded_periods = max_seq_len - num_seq;
-      // padded part
-      if (padded_periods > 0 && i < static_cast<int>(padded_periods)) {
-        const int padded_in_offset =
-            (i + num_seq) * batch_size * other_dim + batch * other_dim;
-
-        for (index_t j = 0; j < other_dim; ++j) {
-          KERNEL_ASSIGN(out_data[padded_in_offset + j], req,
-                        in_data[padded_in_offset + j]);
-        }
-      }
-      // unpadded part
-      if (i < static_cast<int>(num_seq)) {
-        const int in_offset = i * batch_size * other_dim + batch * other_dim;
-        const int out_offset =
-            numel - (i + 1 + padded_periods) * batch_size * other_dim +
-            batch * other_dim;
-
-        for (index_t j = 0; j < other_dim; ++j) {
-          KERNEL_ASSIGN(out_data[out_offset + j], req, in_data[in_offset + j]);
-        }
-      }
+    const index_t batch = i / (max_seq_len * other_dim);
+    const int id = (i / other_dim) % max_seq_len;
+    const index_t j = i % other_dim;
+    const index_t num_seq =
+        indices ? static_cast<index_t>(indices[batch]) : max_seq_len;
+    const index_t padded_periods = max_seq_len - num_seq;
+    // padded part
+    if (padded_periods > 0 && id < static_cast<int>(padded_periods)) {
+      const int padded_in_offset =
+          (id + num_seq) * batch_size * other_dim + batch * other_dim;
+
+      KERNEL_ASSIGN(out_data[padded_in_offset + j], req,
+                    in_data[padded_in_offset + j]);
+    }
+    // unpadded part
+    if (id < static_cast<int>(num_seq)) {
+      const int in_offset = id * batch_size * other_dim + batch * other_dim;
+      const int out_offset =
+          numel - (id + 1 + padded_periods) * batch_size * other_dim +
+          batch * other_dim;
+
+      KERNEL_ASSIGN(out_data[out_offset + j], req, in_data[in_offset + j]);
     }
   }
 };
@@ -118,9 +115,11 @@ class SequenceReverseOp : public Operator {
     const index_t other_dim = data.size(2);
     const index_t tensor_numel = data.shape_.Size();
 
-    mxnet_op::Kernel<ReverseKernel, xpu>::Launch(
-        s, max_seq_len, out.dptr_, data.dptr_, req, max_seq_len, batch_size,
-        other_dim, tensor_numel, indices);
+    MXNET_ASSIGN_REQ_SWITCH(req, req_type, {
+      mxnet_op::Kernel<ReverseKernel<req_type>, xpu>::Launch(
+          s, max_seq_len * batch_size * other_dim, out.dptr_, data.dptr_,
+          max_seq_len, batch_size, other_dim, tensor_numel, indices);
+    });
   }
 
   virtual void Forward(const OpContext &ctx, const std::vector<TBlob> &in_data,

Reply via email to