Zha0q1 commented on pull request #19067:
URL: https://github.com/apache/incubator-mxnet/pull/19067#issuecomment-723395187
```c++
template<
typename xpu,
typename IndexT,
std::enable_if_t<!std::is_same<IndexT, lapack_index_t>::value, int> = 0>
inline void convert_to_int_if_needed(
Stream<xpu> *s,
const Tensor<xpu, 2, IndexT>& tensor) {
}
// convertion to int is required only for GPU when IndexT is equal
lapack_index_t (int64_t)
template<
typename xpu,
typename IndexT,
std::enable_if_t<std::is_same<IndexT, lapack_index_t>::value, int> = 0>
inline void convert_to_int_if_needed(
Stream<xpu> *s,
const Tensor<xpu, 2, IndexT>& tensor) {
#ifdef __CUDACC__
CHECK_LE(tensor.shape_[0], std::numeric_limits<int>::max())
<< "Tensor has size greater than supported.";
CHECK_LE(tensor.shape_[1], std::numeric_limits<int>::max())
<< "Tensor has size greater than supported.";
cudaStream_t stream = Stream<xpu>::GetStream(s);
size_t elements = tensor.shape_.Size();
std::vector<IndexT> vec(elements, 0);
IndexT* ptr = vec.data();
int* ptr_int = reinterpret_cast<int*>(vec.data());
CUDA_CALL(cudaMemcpyAsync(ptr, reinterpret_cast<IndexT*>(tensor.dptr_),
tensor.MSize() * sizeof(IndexT),
cudaMemcpyDeviceToHost, stream));
for (IndexT i = 0; i < elements; ++i) {
ptr_int[i] = static_cast<int>(ptr[i]);
}
CUDA_CALL(cudaMemcpyAsync(tensor.dptr_, ptr,
tensor.MSize() * sizeof(IndexT),
cudaMemcpyHostToDevice, stream));
#endif
}
```
This might be the cause of the windows build issue if I were to take a guess.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]