szha commented on issue #18100: URL: https://github.com/apache/incubator-mxnet/issues/18100#issuecomment-616669873
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/mxnet-validation%2Funix-gpu/detail/PR-18025/36/pipeline ``` =================================== FAILURES =================================== ______________________________ test_np_histogram _______________________________ @with_seed() @use_np def test_np_histogram(): shapes = [(), (3, 4), (3, 0)] for shape in shapes: mx_a = np.random.uniform(0.0, 10.0, size=shape) > np_a = mx_a.asnumpy() tests/python/unittest/test_numpy_op.py:4494: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ python/mxnet/ndarray/ndarray.py:2566: in asnumpy ctypes.c_size_t(data.size))) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ret = -1 def check_call(ret): """Check the return value of C API call. This function will raise an exception when an error occurs. Wrap every API call with this function. Parameters ---------- ret : int return value from API calls. """ if ret != 0: > raise get_last_ffi_error() E mxnet.base.MXNetError: Traceback (most recent call last): E [bt] (9) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da] E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe] E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d] E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491] E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee] E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027] E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4] E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyUniformForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xd34) [0x7f23e4aade64] E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, float>::GenUniform(float*, unsigned long)+0x100) [0x7f23e1b26ea0] E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf] E File "/work/mxnet/include/mshadow/./random.h", line 396 E MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (102 vs. 0) : CURAND Gen Uniform float failed. size = 1 python/mxnet/base.py:246: MXNetError ________________________________ test_np_choice ________________________________ @with_seed() @use_np def test_np_choice(): class TestUniformChoice(HybridBlock): def __init__(self, sample_size, replace): super(TestUniformChoice, self).__init__() self.sample_size = sample_size self.replace = replace def hybrid_forward(self, F, a): return F.np.random.choice(a=a, size=self.sample_size, replace=self.replace, p=None) class TestWeightedChoice(HybridBlock): def __init__(self, sample_size, replace): super(TestWeightedChoice, self).__init__() self.sample_size = sample_size self.replace = replace def hybrid_forward(self, F, a, p): op = getattr(F.np.random, "choice", None) return F.np.random.choice(a, self.sample_size, self.replace, p) def test_sample_with_replacement(sampler, num_classes, shape, weight=None): samples = sampler(num_classes, shape, replace=True, p=weight).asnumpy() generated_density = _np.histogram(samples, _np.arange(num_classes + 1), density=True)[0] expected_density = (weight.asnumpy() if weight is not None else _np.array([1 / num_classes] * num_classes)) # test almost equal assert_almost_equal(generated_density, expected_density, rtol=1e-1, atol=1e-1) # test shape assert (samples.shape == shape) def test_sample_without_replacement(sampler, num_classes, shape, num_trials, weight=None): samples = sampler(num_classes, shape, replace=False, p=weight).asnumpy() # Check shape and uniqueness assert samples.shape == shape assert len(_np.unique(samples)) == samples.size # Check distribution bins = _np.zeros((num_classes)) expected_freq = (weight.asnumpy() if weight is not None else _np.array([1 / num_classes] * num_classes)) for i in range(num_trials): out = sampler(num_classes, 1, replace=False, p=weight).item() bins[out] += 1 bins /= num_trials assert_almost_equal(bins, expected_freq, rtol=1e-1, atol=1e-1) def test_indexing_mode(sampler, set_size, samples_size, replace, weight=None): a = np.arange(set_size) if weight is not None: samples = sampler(a, weight) else: samples = sampler(a) assert len(samples) == samples_size if not replace: assert len(_np.unique(samples.asnumpy())) == samples_size num_classes = 10 num_samples = 10 ** 8 # Density tests are commented out due to their huge time comsumption. # Tests passed locally. # shape_list1 = [ # (10 ** 8, 1), # (10 ** 5, 10 ** 3), # (10 ** 2, 10 ** 3, 10 ** 3) # ] # for shape in shape_list1: # test_sample_with_replacement(np.random.choice, num_classes, shape) # weight = np.array(_np.random.dirichlet([1.0] * num_classes)) # test_sample_with_replacement(np.random.choice, num_classes, shape, weight) # Tests passed locally, # commented out for the same reason as above. # shape_list2 = [ # (6, 1), # (2, 3), # (1, 2, 3), # (2, 2), # ] # for shape in shape_list2: # test_sample_without_replacement(np.random.choice, num_classes, shape, 10 ** 5) # weight = np.array(_np.random.dirichlet([1.0] * num_classes)) # test_sample_without_replacement(np.random.choice, num_classes, shape, 10 ** 5, weight) # Test hypridize mode: for wtype in ['float16', 'float32', 'float64']: for hybridize in [True, False]: for replace in [True, False]: test_choice = TestUniformChoice(num_classes // 2, replace) test_choice_weighted = TestWeightedChoice(num_classes // 2, replace) if hybridize: test_choice.hybridize() test_choice_weighted.hybridize() weight = np.array(_np.random.dirichlet([1.0] * num_classes)).astype(wtype) > test_indexing_mode(test_choice, num_classes, num_classes // 2, replace, None) tests/python/unittest/test_numpy_op.py:4598: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ tests/python/unittest/test_numpy_op.py:4559: in test_indexing_mode assert len(_np.unique(samples.asnumpy())) == samples_size python/mxnet/ndarray/ndarray.py:2566: in asnumpy ctypes.c_size_t(data.size))) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ret = -1 def check_call(ret): """Check the return value of C API call. This function will raise an exception when an error occurs. Wrap every API call with this function. Parameters ---------- ret : int return value from API calls. """ if ret != 0: > raise get_last_ffi_error() E mxnet.base.MXNetError: Traceback (most recent call last): E [bt] (9) /usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xbd53f) [0x7f24564f253f] E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da] E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe] E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d] E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491] E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete), mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&, mxnet::engine::CallbackOnComplete&&)+0xba) [0x7f23de997c2a] E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027] E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4] E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyChoiceForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0x16e9) [0x7f23e47986e9] E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf] E [08:59:51] /work/mxnet/include/mshadow/./random.h:321: Check failed: (status) == (CURAND_STATUS_SUCCESS) CURAND Gen rand ints failed. python/mxnet/base.py:246: MXNetError ______________________________ test_np_full_like _______________________________ @with_seed() @use_np def test_np_full_like(): class TestFullLike(HybridBlock): def __init__(self, fill_value, dtype, ctx): super(TestFullLike, self).__init__() self._fill_value = fill_value self._dtype = dtype self._ctx = ctx def hybrid_forward(self, F, x, *args, **kwargs): return F.np.full_like(x, self._fill_value, dtype=self._dtype, ctx=self._ctx) if StrictVersion(platform.python_version()) < StrictVersion('3.0.0'): return dtypes = ['float64', 'float32', 'float16', 'int64', 'int32', 'int8', 'bool'] shapes = [ (), (1,), (4, 3), (4, 5), (2, 1), (6, 5, 6), (4, 2, 1, 2), (5, 1, 3, 3), (3, 3, 1, 0), ] # numpy.full_like operator in py2 cannot handle shape like (5, 0, 3) properly fill_values = [0, 1, 2, 3, 4, 5, 6, True, False] flags = [True, False] for fill_value, dtype, shape, hybridize in itertools.product( fill_values, dtypes, shapes, flags): param_dtype = _np.random.choice(dtypes) a = np.random.uniform(low=0, high=100, size=shape, dtype='float64').astype(dtype) test = TestFullLike(fill_value, param_dtype, npx.current_context()) > expected_ret = _np.full_like(a.asnumpy(), fill_value=fill_value, dtype=param_dtype) tests/python/unittest/test_numpy_op.py:6479: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ python/mxnet/ndarray/ndarray.py:2566: in asnumpy ctypes.c_size_t(data.size))) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ret = -1 def check_call(ret): """Check the return value of C API call. This function will raise an exception when an error occurs. Wrap every API call with this function. Parameters ---------- ret : int return value from API calls. """ if ret != 0: > raise get_last_ffi_error() E mxnet.base.MXNetError: Traceback (most recent call last): E [bt] (9) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da] E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe] E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d] E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491] E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee] E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027] E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4] E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyUniformForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xd34) [0x7f23e4aade64] E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, float>::GenUniform(float*, unsigned long)+0x100) [0x7f23e1b26ea0] E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf] E File "/work/mxnet/include/mshadow/./random.h", line 396 E MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (202 vs. 0) : CURAND Gen Uniform float failed. size = 1 python/mxnet/base.py:246: MXNetError _______________________________ test_np_diagflat _______________________________ @with_seed() @use_np def test_np_diagflat(): class TestDiagflat(HybridBlock): def __init__(self, k=0): super(TestDiagflat,self).__init__() self._k = k def hybrid_forward(self,F,a): return F.np.diagflat(a, k=self._k) shapes = [(2,),5 , (1,5), (2,2), (2,5), (3,3), (4,3),(4,4,5)] # test_shapes, remember to include zero-dim shape and zero-size shapes dtypes = [np.int8, np.uint8, np.int32, np.int64, np.float16, np.float32, np.float64] # remember to include all meaningful data types for the operator range_k = 6 for hybridize,shape,dtype, in itertools.product([False,True],shapes,dtypes): rtol = 1e-2 if dtype == np.float16 else 1e-3 atol = 1e-4 if dtype == np.float16 else 1e-5 for k in range(-range_k,range_k): test_diagflat = TestDiagflat(k) if hybridize: test_diagflat.hybridize() x = np.random.uniform(-1.0,1.0, size = shape).astype(dtype) x.attach_grad() > np_out = _np.diagflat(x.asnumpy(), k) tests/python/unittest/test_numpy_op.py:7464: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ python/mxnet/ndarray/ndarray.py:2566: in asnumpy ctypes.c_size_t(data.size))) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ret = -1 def check_call(ret): """Check the return value of C API call. This function will raise an exception when an error occurs. Wrap every API call with this function. Parameters ---------- ret : int return value from API calls. """ if ret != 0: > raise get_last_ffi_error() E mxnet.base.MXNetError: Traceback (most recent call last): E [bt] (9) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da] E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe] E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d] E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491] E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee] E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027] E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4] E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyUniformForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xd34) [0x7f23e4aade64] E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, float>::GenUniform(float*, unsigned long)+0x100) [0x7f23e1b26ea0] E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf] E File "/work/mxnet/include/mshadow/./random.h", line 396 E MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (202 vs. 0) : CURAND Gen Uniform float failed. size = 2 python/mxnet/base.py:246: MXNetError ________________________________ test_batchnorm ________________________________ @with_seed() def test_batchnorm(): momentum = 0.9 epsilon = 1e-5 def _test_batchnorm_impl(op, shape, axis, cudnn_off, output_mean_var): print(str((op, shape, axis, cudnn_off))) kwargs = dict(output_mean_var=output_mean_var) if op == mx.nd.contrib.SyncBatchNorm: if axis != 1: return key = str(op) + str(shape) + str(axis) kwargs.update(dict(key=key)) if cudnn_off: return else: kwargs.update(dict(axis=axis, cudnn_off=cudnn_off)) nch = shape[axis] bn_gamma = mx.nd.random.uniform(shape=(nch,)) bn_gamma.attach_grad() bn_beta = mx.nd.random.uniform(shape=(nch,)) bn_beta.attach_grad() bn_running_mean = mx.nd.zeros(nch) bn_running_var = mx.nd.ones(nch) running_mean = mx.nd.zeros(nch) running_var = mx.nd.ones(nch) num_iters = 10 expand_shape = [1] * len(shape) expand_shape[axis] = shape[axis] for _ in range(num_iters): data = mx.nd.random.uniform(shape=shape) data.attach_grad() ograd = mx.nd.random.uniform(shape=shape) with mx.autograd.record(): output = op(data, bn_gamma, bn_beta, bn_running_mean, bn_running_var, momentum=momentum, eps=epsilon, fix_gamma=False, **kwargs) if output_mean_var: output, output_mean, output_std = output output.backward(ograd) mx.nd.waitall() data_mean = data.mean( axis=axis, exclude=True, keepdims=True) data_var = (data - data_mean).square().mean(axis=axis, exclude=True, keepdims=True) target_output = (data - data_mean) / \ (data_var + epsilon).sqrt() * \ bn_gamma.reshape(expand_shape) + \ bn_beta.reshape(expand_shape) # squeeze data_mean and data_var data_mean_flat = data_mean.squeeze() data_var_flat = data_var.squeeze() running_mean = running_mean * momentum + \ data_mean_flat * (1 - momentum) running_var = running_var * momentum + \ data_var_flat * (1 - momentum) W = bn_gamma.reshape(expand_shape) dnx = ograd * W xsm = data - data_mean nd = 1.0 / mx.nd.sqrt(data_var + epsilon) nx = xsm * nd m = np.prod(shape) / shape[axis] dvar = (dnx * xsm).sum(axis=axis, keepdims=True, exclude=True) * (-0.5) * mx.nd.power(nd, 3) dmean = -nd * dnx.sum(axis=axis, keepdims=True, exclude=True) - \ dvar * xsm.mean(axis=axis, keepdims=True, exclude=True) * 2.0 dX = dnx * nd + dvar * xsm * (2.0 / m) + dmean * (1.0 / m) dW = (ograd * nx).sum(axis=axis, exclude=True) db = ograd.sum(axis=axis, exclude=True) atol = 1e-2 rtol = 1e-2 if output_mean_var: assert_almost_equal(output_mean.asnumpy(), data_mean_flat.asnumpy(), atol=atol, rtol=rtol) if op != mx.nd.contrib.SyncBatchNorm: assert_almost_equal(output_std.asnumpy(), (1.0 / (data_var_flat + epsilon).sqrt()).asnumpy(), atol=atol, rtol=rtol) else: assert_almost_equal(output_std.asnumpy(), data_var_flat.asnumpy(), atol=atol, rtol=rtol) assert_almost_equal(output.asnumpy(), target_output.asnumpy(), atol=atol, rtol=rtol) assert_almost_equal(bn_running_mean.asnumpy( ), running_mean.asnumpy(), atol=atol, rtol=rtol) assert_almost_equal(bn_running_var.asnumpy( ), running_var.asnumpy(), atol=atol, rtol=rtol) assert_almost_equal(data.grad.asnumpy(), dX.asnumpy(), atol=atol, rtol=rtol) assert_almost_equal( bn_gamma.grad.asnumpy(), dW.asnumpy(), atol=atol, rtol=rtol) assert_almost_equal( bn_beta.grad.asnumpy(), db.asnumpy(), atol=atol, rtol=rtol) for op in [mx.nd.BatchNorm, mx.nd.contrib.SyncBatchNorm]: for shape in [(24, 2), (24, 3, 4), (24, 4, 4, 4), (24, 8, 4, 4), (24, 5, 6, 4, 4)]: for axis in range(len(shape)): for cudnn_off in [False, True]: for output_mean_var in [False, True]: _test_batchnorm_impl(op, shape, axis, > cudnn_off, output_mean_var) tests/python/unittest/test_operator.py:1943: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ tests/python/unittest/test_operator.py:1870: in _test_batchnorm_impl mx.nd.waitall() python/mxnet/ndarray/ndarray.py:211: in waitall check_call(_LIB.MXNDArrayWaitAll()) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ret = -1 def check_call(ret): """Check the return value of C API call. This function will raise an exception when an error occurs. Wrap every API call with this function. Parameters ---------- ret : int return value from API calls. """ if ret != 0: > raise get_last_ffi_error() E mxnet.base.MXNetError: Traceback (most recent call last): E [bt] (9) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > >::_M_run()+0x4a) [0x7f23de99d4da] E [bt] (8) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (std::shared_ptr<dmlc::ManualEvent>), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#4}::operator()() const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe] E [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context, bool, mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*, std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d] E [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491] E [bt] (5) /work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee] E [bt] (4) /work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void (mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext&&)+0x17) [0x7f23dea6a027] E [bt] (3) /work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x934) [0x7f23dea69bd4] E [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void mxnet::op::NumpyNormalForward<mshadow::gpu>(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xdf8) [0x7f23e49d5708] E [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, float>::GenGaussian(float*, unsigned long, float, float)+0x15e) [0x7f23e1b271fe] E [bt] (0) /work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f) [0x7f23de7900cf] E File "/work/mxnet/include/mshadow/./random.h", line 380 E MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (202 vs. 0) : CURAND Gen Normal float failed. size = 0,mu = 0,sigma = 1 python/mxnet/base.py:246: MXNetError ----------------------------- Captured stdout call ----------------------------- (<function BatchNorm at 0x7f22dab688c8>, (24, 2), 0, False) ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
