This test is failing almost consistently. Three runs failed in a row.
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1696/pipeline/
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1697/pipeline/
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/incubator-mxnet/detail/master/1698/pipeline/
```
======================================================================
ERROR: test_gluon_gpu.test_slice_batchnorm
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/nose/case.py", line 198, in
runTest
self.test(*self.arg)
File "/usr/local/lib/python3.5/dist-packages/nose/util.py", line 620, in
newfunc
return func(*arg, **kw)
File "/work/mxnet/tests/python/gpu/../unittest/common.py", line 172, in
test_new
orig_test(*args, **kwargs)
File "/work/mxnet/tests/python/gpu/../unittest/test_gluon.py", line 1954, in
test_slice_batchnorm
check_layer_forward_withinput(net, x)
File "/work/mxnet/tests/python/gpu/../unittest/test_gluon.py", line 1508, in
check_layer_forward_withinput
mx.test_utils.assert_almost_equal(x.grad.asnumpy(),
x_hybrid.grad.asnumpy(), rtol=1e-5, atol=1e-6)
File "/work/mxnet/python/mxnet/ndarray/ndarray.py", line 1980, in asnumpy
ctypes.c_size_t(data.size)))
File "/work/mxnet/python/mxnet/base.py", line 253, in check_call
raise MXNetError(py_str(_LIB.MXGetLastError()))
mxnet.base.MXNetError: [21:40:19]
src/operator/nn/./cudnn/cudnn_convolution-inl.h:870: Failed to find any forward
convolution algorithm. with workspace size of 1073741824 bytes, please
consider reducing batch/model size or increasing the workspace size
Stack trace returned 10 entries:
[bt] (0)
/work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::StackTrace[abi:cxx11]()+0x1c7)
[0x7f2a6dd0a9e7]
[bt] (1)
/work/mxnet/python/mxnet/../../lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32)
[0x7f2a6dd0ae92]
[bt] (2)
/work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::op::CuDNNConvolutionOp<float>::SelectAlgo(mxnet::RunContext
const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&,
std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&,
cudnnDataType_t, cudnnDataType_t)+0xea6) [0x7f2a740b2856]
[bt] (3)
/work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::op::CuDNNConvolutionOp<float>::Init(mxnet::op::ConvolutionParam
const&, int, int, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> >
const&, std::vector<nnvm::TShape, std::allocator<nnvm::TShape> > const&,
mxnet::RunContext const&, bool)+0xac3) [0x7f2a740b5fb3]
[bt] (4) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x805b3b3)
[0x7f2a7405d3b3]
[bt] (5) /work/mxnet/python/mxnet/../../lib/libmxnet.so(void
mxnet::op::ConvolutionCompute<mshadow::gpu>(nnvm::NodeAttrs const&,
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob>
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> >
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xfac)
[0x7f2a7405e64c]
[bt] (6)
/work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob,
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType,
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob,
std::allocator<mxnet::TBlob> > const&), void (*)(nnvm::NodeAttrs const&,
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob>
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> >
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> >
const&)>::_M_invoke(std::_Any_data const&, nnvm::NodeAttrs const&,
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob>
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> >
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0x20)
[0x7f2a6dd051c0]
[bt] (7)
/work/mxnet/python/mxnet/../../lib/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob,
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType,
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob,
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*,
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*,
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*,
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource,
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int,
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType,
std::allocator<mxnet::OpReqType> >
const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunCon
text) const+0x2cd) [0x7f2a70fb603d]
[bt] (8)
/work/mxnet/python/mxnet/../../lib/libmxnet.so(std::_Function_handler<void
(mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob,
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType,
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob,
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*,
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*,
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*,
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource,
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int,
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType,
std::allocator<mxnet::OpReqType> > const&)::{lambd
a(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&,
mxnet::RunContext&&)+0x1e) [0x7f2a70fb638e]
[bt] (9) /work/mxnet/python/mxnet/../../lib/libmxnet.so(+0x576686b)
[0x7f2a7176886b]
-------------------- >> begin captured logging << --------------------
common: INFO: Setting test np/mx/python random seeds, use
MXNET_TEST_SEED=560052399 to reproduce.
--------------------- >> end captured logging << ---------------------
```
[ Full content available at:
https://github.com/apache/incubator-mxnet/issues/12715 ]
This message was relayed via gitbox.apache.org for [email protected]