While running train_loss += loss.mean().asscalar(), I got following errorm the
loss.shape is (98304, 1).
mxnet.base.MXNetError: [16:26:40]
c:\jenkins\workspace\mxnet-tag\mxnet\3rdparty\mshadow\mshadow\./cuda/tensor_gpu-inl.cuh:58:
too large launch parameter: Softmax[98304,1], [256,1,1]
source code in tensor_gpu-inl.cuh:
const int kMaxGridDim = 65535;
/*! \brief suggested grid number for mapping kernel */
const int kBaseGridNum = 1024;
/*! \brief get align stride for given size in x dimension */
inline index_t GetAlignStride(index_t xsize) {
if (xsize >= MSHADOW_MIN_PAD_RATIO * 32) {
return ((xsize + kMemUnit - 1) >> kMemUnitBits) << kMemUnitBits;
} else {
// if originally space is not aligned, no necessary to to alligned thread
allocation
return xsize;
}
}
inline void CheckLaunchParam(dim3 dimGrid, dim3 dimBlock, const char *estr =
"") {
if (dimBlock.x * dimBlock.y * dimBlock.z >
static_cast<unsigned>(kMaxThreadsPerBlock) ||
dimGrid.x > kMaxGridDim || dimGrid.y > kMaxGridDim) {
LOG(FATAL) << "too large launch parameter: "
<< estr << "["
<< dimGrid.x << ","
<< dimGrid.y << "], ["
<< dimBlock.x << ","
<< dimBlock.y << ","
<< dimBlock.z << "]";
}
}
[ Full content available at:
https://github.com/apache/incubator-mxnet/issues/12751 ]
This message was relayed via gitbox.apache.org for [email protected]