reminisce commented on a change in pull request #7223: GPU implementation of 
cast_storage (dense to rsp)
URL: https://github.com/apache/incubator-mxnet/pull/7223#discussion_r130228514
 
 

 ##########
 File path: src/operator/tensor/cast_storage-inl.cuh
 ##########
 @@ -14,13 +14,288 @@
 namespace mxnet {
 namespace op {
 using mshadow::cuda::kBaseThreadNum;
+using mshadow::Shape1;
+using mxnet_op::Kernel;
 
-inline void CastStorageDnsRspImpl(const OpContext& ctx, const gpu& gpu_dev, 
const TBlob& dns, NDArray* rsp) {
-  LOG(FATAL) << "CastStorageDnsRspImpl gpu version is not implemented.";
+/*!
+ * \brief Thread kernel for marking non-zero rows of a tensor.
+ * Parallelized by tensor rows: 1 thread/row
+ */
+struct MarkRspRowIdxThreadKernel {
+  /*!
+   * \brief
+   * \param tid         global thread id
+   * \param row_flg     row flag array to mark non-zero rows
+   * \param dns         dense matrix data
+   * \param num_rows    number of rows (size of first dimension of tensor)
+   * \param row_length  number of elements per row
+   */
+  template<typename DType, typename RType>
+  __device__ __forceinline__ static void Map(int tid,
+                                             RType* row_flg,
+                                             const DType* dns,
+                                             const index_t num_rows,
+                                             const index_t row_length) {
 
 Review comment:
   One thing to note that `dim_t` is `int64_t`, while `size_t` is unsigned. 
Should we use `dim_t` instead of size_t to keep it consistent with the dim size 
type?
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to