While running train_loss += loss.mean().asscalar(), I got following errorm the 
loss.shape is (98304, 1).

mxnet.base.MXNetError: [16:26:40] 
c:\jenkins\workspace\mxnet-tag\mxnet\3rdparty\mshadow\mshadow\./cuda/tensor_gpu-inl.cuh:58:
 too large launch parameter: Softmax[98304,1], [256,1,1]

source code in tensor_gpu-inl.cuh:
const int kMaxGridDim = 65535;
/*! \brief suggested grid number for mapping kernel */
const int kBaseGridNum = 1024;
/*! \brief get align stride for given size in x dimension */
inline index_t GetAlignStride(index_t xsize) {
  if (xsize >= MSHADOW_MIN_PAD_RATIO * 32) {
    return ((xsize  + kMemUnit - 1) >> kMemUnitBits) << kMemUnitBits;
  } else {
    // if originally space is not aligned, no necessary to to alligned thread 
allocation
    return xsize;
  }
}
inline void CheckLaunchParam(dim3 dimGrid, dim3 dimBlock, const char *estr = 
"") {
  if (dimBlock.x * dimBlock.y * dimBlock.z > 
static_cast<unsigned>(kMaxThreadsPerBlock) ||
      dimGrid.x > kMaxGridDim || dimGrid.y > kMaxGridDim) {
    LOG(FATAL) << "too large launch parameter: "
      << estr << "["
      << dimGrid.x << ","
      << dimGrid.y << "], ["
      << dimBlock.x << ","
      << dimBlock.y << ","
      << dimBlock.z << "]";
  }
}

[ Full content available at: 
https://github.com/apache/incubator-mxnet/issues/12751 ]
This message was relayed via gitbox.apache.org for devnull@infra.apache.org

Reply via email to