eric-haibin-lin commented on a change in pull request #11113: Support for
dot(dns, csr) = dns and dot(dns, csr.T) = dns on CPU
URL: https://github.com/apache/incubator-mxnet/pull/11113#discussion_r192551482
##########
File path: src/operator/tensor/dot-inl.h
##########
@@ -655,7 +659,81 @@ struct DotDnsCsrCsrByRowBlocks {
}
};
+/*!
+ * \brief CPU Kernel of dot(dns1, csr) = dns2
+ * Parallelization by row blocks
+ */
+struct DotDnsCsrDnsByRowBlocks {
+ /*!
+ * \brief
+ * \param i the i-th thread
+ */
+ template<typename DType, typename IType, typename CType>
+ MSHADOW_CINLINE static void Map(int i,
+ DType* out,
+ const DType* data_l,
+ const DType* data_r,
+ const IType* indptr_r,
+ const CType* col_idx_r,
+ const nnvm::dim_t seg_len,
+ const nnvm::dim_t num_rows_l,
+ const nnvm::dim_t num_cols_l,
+ const nnvm::dim_t num_rows_r,
+ const nnvm::dim_t num_cols_r) {
+ using nnvm::dim_t;
+ const dim_t seg_start = i * seg_len;
+ if (seg_start >= num_rows_l) return;
+ const dim_t seg_end = std::min(seg_start + seg_len, num_rows_l);
+ for (dim_t j = 0; j < num_rows_r; ++j) {
+ if (indptr_r[j] == indptr_r[j+1]) continue;
+ for (IType k = indptr_r[j]; k < indptr_r[j+1]; ++k) {
+ const CType col_idx = col_idx_r[k];
+ const DType val = data_r[k];
+ for (dim_t r = seg_start; r < seg_end; ++r) {
+ out[r*num_cols_r+col_idx] += data_l[r*num_cols_l+j] * val;
+ }
+ }
+ }
+ }
+};
+/*!
+ * \brief CPU Kernel of dot(dns1, csr.T) = dns2
+ * Parallelization by row blocks
+ */
+struct DotDnsCsrTransDnsByRowBlocks {
+ /*!
+ * \brief
+ * \param i the i-th thread
Review comment:
Please complete documentation
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services