reminisce commented on a change in pull request #7223: GPU implementation of cast_storage (dense to rsp) URL: https://github.com/apache/incubator-mxnet/pull/7223#discussion_r130228514
########## File path: src/operator/tensor/cast_storage-inl.cuh ########## @@ -14,13 +14,288 @@ namespace mxnet { namespace op { using mshadow::cuda::kBaseThreadNum; +using mshadow::Shape1; +using mxnet_op::Kernel; -inline void CastStorageDnsRspImpl(const OpContext& ctx, const gpu& gpu_dev, const TBlob& dns, NDArray* rsp) { - LOG(FATAL) << "CastStorageDnsRspImpl gpu version is not implemented."; +/*! + * \brief Thread kernel for marking non-zero rows of a tensor. + * Parallelized by tensor rows: 1 thread/row + */ +struct MarkRspRowIdxThreadKernel { + /*! + * \brief + * \param tid global thread id + * \param row_flg row flag array to mark non-zero rows + * \param dns dense matrix data + * \param num_rows number of rows (size of first dimension of tensor) + * \param row_length number of elements per row + */ + template<typename DType, typename RType> + __device__ __forceinline__ static void Map(int tid, + RType* row_flg, + const DType* dns, + const index_t num_rows, + const index_t row_length) { Review comment: One thing to note that `dim_t` is `int64_t`, while `size_t` is unsigned. Should we use `dim_t` instead of size_t to keep it consistent with the dim size type? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services