jiajinyu commented on a change in pull request #12958: Improve dot(csr, rsp) on 
CPU by 10x 
URL: https://github.com/apache/incubator-mxnet/pull/12958#discussion_r230274359
 
 

 ##########
 File path: src/operator/tensor/dot-inl.h
 ##########
 @@ -521,18 +521,22 @@ struct DotCsrRspDnsByRowBlocks {
       const RType* row_idx_ptr = first;
       // end of binary search
       if (row_idx_ptr == row_idx_r+nnr_r || *row_idx_ptr > 
col_idx_l[indptr_l[j+1]-1]) continue;
-      for (IType k = indptr_l[j]; k < indptr_l[j+1] && row_idx_ptr != 
row_idx_r+nnr_r;) {
-        if (col_idx_l[k] == *row_idx_ptr) {
-          const dim_t offset_r = (row_idx_ptr - row_idx_r) * num_cols;
-          for (dim_t l = 0; l < num_cols; ++l) {
-            out[offset_out+l] += data_l[k] * data_r[offset_r+l];
-          }
-          ++k;
-          ++row_idx_ptr;
-        } else if (col_idx_l[k] < *row_idx_ptr) {
-          ++k;
-        } else {
-          ++row_idx_ptr;
+      const auto end = row_idx_r + nnr_r;
+      auto start = row_idx_ptr;
+      for (IType k = indptr_l[j]; k < indptr_l[j+1] && start < end; ++k) {
+        const auto v = col_idx_l[k];
+        if (v < *start) {
+          continue;
+        }
+        const auto p = std::lower_bound(start, end, v);
+        start = p;
+        if (p >= end || v < *p) {
+          continue;
+        }
+        start += 1;
+        const dim_t offset_r = (p - row_idx_r) * num_cols;
+        for (dim_t l = 0; l < num_cols; ++l) {
+          out[offset_out+l] += data_l[k] * data_r[offset_r+l];
 
 Review comment:
   Thanks a lot for the great feedback. Any suggestions on how to do the 
benchmark? I think it depends on the hardware spec a lot.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to