This test is failing almost consistently. Three runs failed in a row.

http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1696/pipeline/
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1697/pipeline/
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1698/pipeline/
```
======================================================================

ERROR: test_gluon_gpu.test_slice_batchnorm

----------------------------------------------------------------------

Traceback (most recent call last):

  File "/usr/local/lib/python3.5/dist-packages/nose/case.py", line 198, in 
runTest

    self.test(*self.arg)

  File "/usr/local/lib/python3.5/dist-packages/nose/util.py", line 620, in 
newfunc

    return func(*arg, **kw)

  File "/work/mxnet/tests/python/gpu/../unittest/common.py", line 172, in 
test_new

    orig_test(*args, **kwargs)

  File "/work/mxnet/tests/python/gpu/../unittest/test_gluon.py", line 1954, in 
test_slice_batchnorm

    check_layer_forward_withinput(net, x)

  File "/work/mxnet/tests/python/gpu/../unittest/test_gluon.py", line 1508, in 
check_layer_forward_withinput

    mx.test_utils.assert_almost_equal(x.grad.asnumpy(), 
x_hybrid.grad.asnumpy(), rtol=1e-5, atol=1e-6)

  File "/work/mxnet/python/mxnet/ndarray/ndarray.py", line 1980, in asnumpy

    ctypes.c_size_t(data.size)))

  File "/work/mxnet/python/mxnet/base.py", line 253, in check_call

    raise MXNetError(py_str(_LIB.MXGetLastError()))

mxnet.base.MXNetError: [21:40:19] 
src/operator/nn/./cudnn/cudnn_convolution-inl.h:870: Failed to find any forward 
convolution algorithm.  with workspace size of 1073741824 bytes, please 
consider reducing batch/model size or increasing the workspace size



Stack trace returned 10 entries:

[bt] (0) 
/work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTrace[abi:cxx11]()+0x1c7)
 [0x7f2a6dd0a9e7]

[bt] (1) 
/work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32)
 [0x7f2a6dd0ae92]

[bt] (2) 
/work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::op::CuDNNConvolutionOp<float>::SelectAlgo(mxnet::RunContext
 const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, 
std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, 
cudnnDataType_t, cudnnDataType_t)+0xea6) [0x7f2a740b2856]

[bt] (3) 
/work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::op::CuDNNConvolutionOp<float>::Init(mxnet::op::ConvolutionParam
 const&, int, int, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > 
const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&, 
mxnet::RunContext const&, bool)+0xac3) [0x7f2a740b5fb3]

[bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x805b3b3) 
[0x7f2a7405d3b3]

[bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void 
mxnet::op::ConvolutionCompute<mshadow::gpu>(nnvm::NodeAttrs const&, 
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> 
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > 
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xfac) 
[0x7f2a7405e64c]

[bt] (6) 
/work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void 
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&), void (*)(nnvm::NodeAttrs const&, 
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> 
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > 
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > 
const&)>::_M_invoke(std::_Any_data const&, nnvm::NodeAttrs const&, 
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> 
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > 
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0x20) 
[0x7f2a6dd051c0]

[bt] (7) 
/work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void
 (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunCon
 text) const+0x2cd) [0x7f2a70fb603d]

[bt] (8) 
/work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void 
(mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void 
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&)::{lambd
 a(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, 
mxnet::RunContext&&)+0x1e) [0x7f2a70fb638e]

[bt] (9) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x576686b) 
[0x7f2a7176886b]





-------------------- >> begin captured logging << --------------------

common: INFO: Setting test np/mx/python random seeds, use 
MXNET_TEST_SEED=560052399 to reproduce.

--------------------- >> end captured logging << ---------------------
```

[ Full content available at: 
https://github.com/apache/incubator-mxnet/issues/12715 ]
This message was relayed via gitbox.apache.org for [email protected]

Reply via email to