szha commented on issue #18100:
URL: 
https://github.com/apache/incubator-mxnet/issues/18100#issuecomment-616669873


   
http://jenkins.mxnet-ci.amazon-ml.com/blue/organizations/jenkins/mxnet-validation%2Funix-gpu/detail/PR-18025/36/pipeline
   
   ```
   =================================== FAILURES 
===================================
   ______________________________ test_np_histogram 
_______________________________
   
       @with_seed()
       @use_np
       def test_np_histogram():
           shapes = [(), (3, 4), (3, 0)]
       
           for shape in shapes:
               mx_a = np.random.uniform(0.0, 10.0, size=shape)
   >           np_a = mx_a.asnumpy()
   
   tests/python/unittest/test_numpy_op.py:4494: 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   python/mxnet/ndarray/ndarray.py:2566: in asnumpy
       ctypes.c_size_t(data.size)))
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   
   ret = -1
   
       def check_call(ret):
           """Check the return value of C API call.
       
           This function will raise an exception when an error occurs.
           Wrap every API call with this function.
       
           Parameters
           ----------
           ret : int
               return value from API calls.
           """
           if ret != 0:
   >           raise get_last_ffi_error()
   E           mxnet.base.MXNetError: Traceback (most recent call last):
   E             [bt] (9) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void
 (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > 
>::_M_run()+0x4a) [0x7f23de99d4da]
   E             [bt] (8) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(std::shared_ptr<dmlc::ManualEvent>), 
mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, 
bool)::{lambda()#4}::operator()() 
const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data
 const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
   E             [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context,
 bool, 
mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*,
 std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
   E             [bt] (6) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext,
 mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
   E             [bt] (5) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee]
   E             [bt] (4) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void 
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, 
mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
   E             [bt] (3) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void
 (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) 
const+0x934) [0x7f23dea69bd4]
   E             [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::op::NumpyUniformForward<mshadow::gpu>(nnvm::NodeAttrs const&, 
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> 
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > 
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xd34) 
[0x7f23e4aade64]
   E             [bt] (1) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, 
float>::GenUniform(float*, unsigned long)+0x100) [0x7f23e1b26ea0]
   E             [bt] (0) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f)
 [0x7f23de7900cf]
   E             File "/work/mxnet/include/mshadow/./random.h", line 396
   E           MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (102 
vs. 0) : CURAND Gen Uniform float failed. size = 1
   
   python/mxnet/base.py:246: MXNetError
   ________________________________ test_np_choice 
________________________________
   
       @with_seed()
       @use_np
       def test_np_choice():
           class TestUniformChoice(HybridBlock):
               def __init__(self, sample_size, replace):
                   super(TestUniformChoice, self).__init__()
                   self.sample_size = sample_size
                   self.replace = replace
       
               def hybrid_forward(self, F, a):
                   return F.np.random.choice(a=a, size=self.sample_size, 
replace=self.replace, p=None)
       
           class TestWeightedChoice(HybridBlock):
               def __init__(self, sample_size, replace):
                   super(TestWeightedChoice, self).__init__()
                   self.sample_size = sample_size
                   self.replace = replace
       
               def hybrid_forward(self, F, a, p):
                   op = getattr(F.np.random, "choice", None)
                   return F.np.random.choice(a, self.sample_size, self.replace, 
p)
       
           def test_sample_with_replacement(sampler, num_classes, shape, 
weight=None):
               samples = sampler(num_classes, shape, replace=True, 
p=weight).asnumpy()
               generated_density = _np.histogram(samples, 
_np.arange(num_classes + 1), density=True)[0]
               expected_density = (weight.asnumpy() if weight is not None else
                                   _np.array([1 / num_classes] * num_classes))
               # test almost equal
               assert_almost_equal(generated_density, expected_density, 
rtol=1e-1, atol=1e-1)
               # test shape
               assert (samples.shape == shape)
       
           def test_sample_without_replacement(sampler, num_classes, shape, 
num_trials, weight=None):
               samples = sampler(num_classes, shape, replace=False, 
p=weight).asnumpy()
               # Check shape and uniqueness
               assert samples.shape == shape
               assert len(_np.unique(samples)) == samples.size
               # Check distribution
               bins = _np.zeros((num_classes))
               expected_freq = (weight.asnumpy() if weight is not None else
                                _np.array([1 / num_classes] * num_classes))
               for i in range(num_trials):
                   out = sampler(num_classes, 1, replace=False, p=weight).item()
                   bins[out] += 1
               bins /= num_trials
               assert_almost_equal(bins, expected_freq, rtol=1e-1, atol=1e-1)
       
           def test_indexing_mode(sampler, set_size, samples_size, replace, 
weight=None):
               a = np.arange(set_size)
               if weight is not None:
                   samples = sampler(a, weight)
               else:
                   samples = sampler(a)
               assert len(samples) == samples_size
               if not replace:
                   assert len(_np.unique(samples.asnumpy())) == samples_size
       
           num_classes = 10
           num_samples = 10 ** 8
           # Density tests are commented out due to their huge time comsumption.
           # Tests passed locally.
           # shape_list1 = [
           #     (10 ** 8, 1),
           #     (10 ** 5, 10 ** 3),
           #     (10 ** 2, 10 ** 3, 10 ** 3)
           # ]
           # for shape in shape_list1:
           #     test_sample_with_replacement(np.random.choice, num_classes, 
shape)
           #     weight = np.array(_np.random.dirichlet([1.0] * num_classes))
           #     test_sample_with_replacement(np.random.choice, num_classes, 
shape, weight)
       
           # Tests passed locally,
           # commented out for the same reason as above.
           # shape_list2 = [
           #     (6, 1),
           #     (2, 3),
           #     (1, 2, 3),
           #     (2, 2),
           # ]
           # for shape in shape_list2:
           #     test_sample_without_replacement(np.random.choice, num_classes, 
shape, 10 ** 5)
           #     weight = np.array(_np.random.dirichlet([1.0] * num_classes))
           #     test_sample_without_replacement(np.random.choice, num_classes, 
shape, 10 ** 5, weight)
       
           # Test hypridize mode:
           for wtype in ['float16', 'float32', 'float64']:
               for hybridize in [True, False]:
                   for replace in [True, False]:
                       test_choice = TestUniformChoice(num_classes // 2, 
replace)
                       test_choice_weighted = TestWeightedChoice(num_classes // 
2, replace)
                       if hybridize:
                           test_choice.hybridize()
                           test_choice_weighted.hybridize()
                       weight = np.array(_np.random.dirichlet([1.0] * 
num_classes)).astype(wtype)
   >                   test_indexing_mode(test_choice, num_classes, num_classes 
// 2, replace, None)
   
   tests/python/unittest/test_numpy_op.py:4598: 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   tests/python/unittest/test_numpy_op.py:4559: in test_indexing_mode
       assert len(_np.unique(samples.asnumpy())) == samples_size
   python/mxnet/ndarray/ndarray.py:2566: in asnumpy
       ctypes.c_size_t(data.size)))
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   
   ret = -1
   
       def check_call(ret):
           """Check the return value of C API call.
       
           This function will raise an exception when an error occurs.
           Wrap every API call with this function.
       
           Parameters
           ----------
           ret : int
               return value from API calls.
           """
           if ret != 0:
   >           raise get_last_ffi_error()
   E           mxnet.base.MXNetError: Traceback (most recent call last):
   E             [bt] (9) /usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xbd53f) 
[0x7f24564f253f]
   E             [bt] (8) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void
 (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > 
>::_M_run()+0x4a) [0x7f23de99d4da]
   E             [bt] (7) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(std::shared_ptr<dmlc::ManualEvent>), 
mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, 
bool)::{lambda()#4}::operator()() 
const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data
 const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
   E             [bt] (6) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context,
 bool, 
mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*,
 std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
   E             [bt] (5) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext,
 mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
   E             [bt] (4) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(mxnet::RunContext, mxnet::engine::CallbackOnComplete), 
mxnet::engine::ThreadedEngine::BulkFlush()::{lambda(mxnet::RunContext, 
mxnet::engine::CallbackOnComplete)#1}>::_M_invoke(std::_Any_data const&, 
mxnet::RunContext&&, mxnet::engine::CallbackOnComplete&&)+0xba) [0x7f23de997c2a]
   E             [bt] (3) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void 
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, 
mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
   E             [bt] (2) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void
 (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) 
const+0x934) [0x7f23dea69bd4]
   E             [bt] (1) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::op::NumpyChoiceForward<mshadow::gpu>(nnvm::NodeAttrs const&, 
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> 
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > 
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > 
const&)+0x16e9) [0x7f23e47986e9]
   E             [bt] (0) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f)
 [0x7f23de7900cf]
   E           [08:59:51] /work/mxnet/include/mshadow/./random.h:321: Check 
failed: (status) == (CURAND_STATUS_SUCCESS) CURAND Gen rand ints failed.
   
   python/mxnet/base.py:246: MXNetError
   ______________________________ test_np_full_like 
_______________________________
   
       @with_seed()
       @use_np
       def test_np_full_like():
           class TestFullLike(HybridBlock):
               def __init__(self, fill_value, dtype, ctx):
                   super(TestFullLike, self).__init__()
                   self._fill_value = fill_value
                   self._dtype = dtype
                   self._ctx = ctx
       
               def hybrid_forward(self, F, x, *args, **kwargs):
                   return F.np.full_like(x, self._fill_value, 
dtype=self._dtype, ctx=self._ctx)
       
           if StrictVersion(platform.python_version()) < StrictVersion('3.0.0'):
               return
       
           dtypes = ['float64', 'float32', 'float16', 'int64', 'int32', 'int8', 
'bool']
           shapes = [
               (),
               (1,),
               (4, 3),
               (4, 5),
               (2, 1),
               (6, 5, 6),
               (4, 2, 1, 2),
               (5, 1, 3, 3),
               (3, 3, 1, 0),
           ]
           # numpy.full_like operator in py2 cannot handle shape like (5, 0, 3) 
properly
           fill_values = [0, 1, 2, 3, 4, 5, 6, True, False]
           flags = [True, False]
           for fill_value, dtype, shape, hybridize in itertools.product(
               fill_values, dtypes, shapes, flags):
               param_dtype = _np.random.choice(dtypes)
               a = np.random.uniform(low=0, high=100, size=shape, 
dtype='float64').astype(dtype)
               test = TestFullLike(fill_value, param_dtype, 
npx.current_context())
   >           expected_ret = _np.full_like(a.asnumpy(), fill_value=fill_value, 
dtype=param_dtype)
   
   tests/python/unittest/test_numpy_op.py:6479: 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   python/mxnet/ndarray/ndarray.py:2566: in asnumpy
       ctypes.c_size_t(data.size)))
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   
   ret = -1
   
       def check_call(ret):
           """Check the return value of C API call.
       
           This function will raise an exception when an error occurs.
           Wrap every API call with this function.
       
           Parameters
           ----------
           ret : int
               return value from API calls.
           """
           if ret != 0:
   >           raise get_last_ffi_error()
   E           mxnet.base.MXNetError: Traceback (most recent call last):
   E             [bt] (9) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void
 (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > 
>::_M_run()+0x4a) [0x7f23de99d4da]
   E             [bt] (8) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(std::shared_ptr<dmlc::ManualEvent>), 
mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, 
bool)::{lambda()#4}::operator()() 
const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data
 const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
   E             [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context,
 bool, 
mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*,
 std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
   E             [bt] (6) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext,
 mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
   E             [bt] (5) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee]
   E             [bt] (4) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void 
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, 
mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
   E             [bt] (3) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void
 (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) 
const+0x934) [0x7f23dea69bd4]
   E             [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::op::NumpyUniformForward<mshadow::gpu>(nnvm::NodeAttrs const&, 
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> 
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > 
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xd34) 
[0x7f23e4aade64]
   E             [bt] (1) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, 
float>::GenUniform(float*, unsigned long)+0x100) [0x7f23e1b26ea0]
   E             [bt] (0) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f)
 [0x7f23de7900cf]
   E             File "/work/mxnet/include/mshadow/./random.h", line 396
   E           MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (202 
vs. 0) : CURAND Gen Uniform float failed. size = 1
   
   python/mxnet/base.py:246: MXNetError
   _______________________________ test_np_diagflat 
_______________________________
   
       @with_seed()
       @use_np
       def test_np_diagflat():
           class TestDiagflat(HybridBlock):
               def __init__(self, k=0):
                   super(TestDiagflat,self).__init__()
                   self._k = k
               def hybrid_forward(self,F,a):
                   return F.np.diagflat(a, k=self._k)
           shapes = [(2,),5 , (1,5), (2,2), (2,5), (3,3), (4,3),(4,4,5)] # 
test_shapes, remember to include zero-dim shape and zero-size shapes
           dtypes = [np.int8, np.uint8, np.int32, np.int64, np.float16, 
np.float32, np.float64] # remember to include all meaningful data types for the 
operator
           range_k = 6
           for hybridize,shape,dtype, in 
itertools.product([False,True],shapes,dtypes):
               rtol = 1e-2 if dtype == np.float16 else 1e-3
               atol = 1e-4 if dtype == np.float16 else 1e-5
       
               for k in range(-range_k,range_k):
                   test_diagflat = TestDiagflat(k)
                   if hybridize:
                       test_diagflat.hybridize()
       
                   x = np.random.uniform(-1.0,1.0, size = shape).astype(dtype)
                   x.attach_grad()
       
   >               np_out = _np.diagflat(x.asnumpy(), k)
   
   tests/python/unittest/test_numpy_op.py:7464: 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   python/mxnet/ndarray/ndarray.py:2566: in asnumpy
       ctypes.c_size_t(data.size)))
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   
   ret = -1
   
       def check_call(ret):
           """Check the return value of C API call.
       
           This function will raise an exception when an error occurs.
           Wrap every API call with this function.
       
           Parameters
           ----------
           ret : int
               return value from API calls.
           """
           if ret != 0:
   >           raise get_last_ffi_error()
   E           mxnet.base.MXNetError: Traceback (most recent call last):
   E             [bt] (9) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void
 (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > 
>::_M_run()+0x4a) [0x7f23de99d4da]
   E             [bt] (8) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(std::shared_ptr<dmlc::ManualEvent>), 
mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, 
bool)::{lambda()#4}::operator()() 
const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data
 const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
   E             [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context,
 bool, 
mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*,
 std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
   E             [bt] (6) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext,
 mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
   E             [bt] (5) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee]
   E             [bt] (4) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void 
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, 
mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
   E             [bt] (3) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void
 (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) 
const+0x934) [0x7f23dea69bd4]
   E             [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::op::NumpyUniformForward<mshadow::gpu>(nnvm::NodeAttrs const&, 
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> 
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > 
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xd34) 
[0x7f23e4aade64]
   E             [bt] (1) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, 
float>::GenUniform(float*, unsigned long)+0x100) [0x7f23e1b26ea0]
   E             [bt] (0) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f)
 [0x7f23de7900cf]
   E             File "/work/mxnet/include/mshadow/./random.h", line 396
   E           MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (202 
vs. 0) : CURAND Gen Uniform float failed. size = 2
   
   python/mxnet/base.py:246: MXNetError
   ________________________________ test_batchnorm 
________________________________
   
       @with_seed()
       def test_batchnorm():
           momentum = 0.9
           epsilon = 1e-5
       
           def _test_batchnorm_impl(op, shape, axis, cudnn_off, 
output_mean_var):
               print(str((op, shape, axis, cudnn_off)))
       
               kwargs = dict(output_mean_var=output_mean_var)
               if op == mx.nd.contrib.SyncBatchNorm:
                   if axis != 1:
                       return
                   key = str(op) + str(shape) + str(axis)
                   kwargs.update(dict(key=key))
                   if cudnn_off:
                       return
               else:
                   kwargs.update(dict(axis=axis, cudnn_off=cudnn_off))
               nch = shape[axis]
       
               bn_gamma = mx.nd.random.uniform(shape=(nch,))
               bn_gamma.attach_grad()
       
               bn_beta = mx.nd.random.uniform(shape=(nch,))
               bn_beta.attach_grad()
       
               bn_running_mean = mx.nd.zeros(nch)
               bn_running_var = mx.nd.ones(nch)
       
               running_mean = mx.nd.zeros(nch)
               running_var = mx.nd.ones(nch)
               num_iters = 10
               expand_shape = [1] * len(shape)
               expand_shape[axis] = shape[axis]
               for _ in range(num_iters):
                   data = mx.nd.random.uniform(shape=shape)
                   data.attach_grad()
                   ograd = mx.nd.random.uniform(shape=shape)
                   with mx.autograd.record():
                       output = op(data, bn_gamma, bn_beta,
                                   bn_running_mean, bn_running_var,
                                   momentum=momentum, eps=epsilon,
                                   fix_gamma=False, **kwargs)
                       if output_mean_var:
                           output, output_mean, output_std = output
                       output.backward(ograd)
                   mx.nd.waitall()
       
                   data_mean = data.mean(
                       axis=axis, exclude=True, keepdims=True)
                   data_var = (data - data_mean).square().mean(axis=axis,
                                                               exclude=True,
                                                               keepdims=True)
       
                   target_output = (data - data_mean) / \
                       (data_var + epsilon).sqrt() * \
                       bn_gamma.reshape(expand_shape) + \
                       bn_beta.reshape(expand_shape)
       
                   # squeeze data_mean and data_var
                   data_mean_flat = data_mean.squeeze()
                   data_var_flat = data_var.squeeze()
       
                   running_mean = running_mean * momentum + \
                       data_mean_flat * (1 - momentum)
                   running_var = running_var * momentum + \
                       data_var_flat * (1 - momentum)
       
                   W = bn_gamma.reshape(expand_shape)
                   dnx = ograd * W
                   xsm = data - data_mean
                   nd = 1.0 / mx.nd.sqrt(data_var + epsilon)
                   nx = xsm * nd
                   m = np.prod(shape) / shape[axis]
                   dvar = (dnx * xsm).sum(axis=axis, keepdims=True,
                                          exclude=True) * (-0.5) * 
mx.nd.power(nd, 3)
                   dmean = -nd * dnx.sum(axis=axis, keepdims=True, 
exclude=True) - \
                       dvar * xsm.mean(axis=axis, keepdims=True,
                                       exclude=True) * 2.0
                   dX = dnx * nd + dvar * xsm * (2.0 / m) + dmean * (1.0 / m)
                   dW = (ograd * nx).sum(axis=axis, exclude=True)
                   db = ograd.sum(axis=axis, exclude=True)
       
                   atol = 1e-2
                   rtol = 1e-2
       
                   if output_mean_var:
                       assert_almost_equal(output_mean.asnumpy(),
                                           data_mean_flat.asnumpy(),
                                           atol=atol, rtol=rtol)
                       if op != mx.nd.contrib.SyncBatchNorm:
                           assert_almost_equal(output_std.asnumpy(),
                                               (1.0 / (data_var_flat +
                                                       
epsilon).sqrt()).asnumpy(),
                                               atol=atol, rtol=rtol)
                       else:
                           assert_almost_equal(output_std.asnumpy(),
                                               data_var_flat.asnumpy(),
                                               atol=atol, rtol=rtol)
                   assert_almost_equal(output.asnumpy(), 
target_output.asnumpy(),
                                       atol=atol, rtol=rtol)
                   assert_almost_equal(bn_running_mean.asnumpy(
                   ), running_mean.asnumpy(), atol=atol, rtol=rtol)
                   assert_almost_equal(bn_running_var.asnumpy(
                   ), running_var.asnumpy(), atol=atol, rtol=rtol)
       
                   assert_almost_equal(data.grad.asnumpy(),
                                       dX.asnumpy(), atol=atol, rtol=rtol)
                   assert_almost_equal(
                       bn_gamma.grad.asnumpy(), dW.asnumpy(), atol=atol, 
rtol=rtol)
                   assert_almost_equal(
                       bn_beta.grad.asnumpy(), db.asnumpy(), atol=atol, 
rtol=rtol)
       
           for op in [mx.nd.BatchNorm, mx.nd.contrib.SyncBatchNorm]:
               for shape in [(24, 2), (24, 3, 4), (24, 4, 4, 4), (24, 8, 4, 4), 
(24, 5, 6, 4, 4)]:
                   for axis in range(len(shape)):
                       for cudnn_off in [False, True]:
                           for output_mean_var in [False, True]:
                               _test_batchnorm_impl(op, shape, axis,
   >                                                cudnn_off, output_mean_var)
   
   tests/python/unittest/test_operator.py:1943: 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   tests/python/unittest/test_operator.py:1870: in _test_batchnorm_impl
       mx.nd.waitall()
   python/mxnet/ndarray/ndarray.py:211: in waitall
       check_call(_LIB.MXNDArrayWaitAll())
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   
   ret = -1
   
       def check_call(ret):
           """Check the return value of C API call.
       
           This function will raise an exception when an error occurs.
           Wrap every API call with this function.
       
           Parameters
           ----------
           ret : int
               return value from API calls.
           """
           if ret != 0:
   >           raise get_last_ffi_error()
   E           mxnet.base.MXNetError: Traceback (most recent call last):
   E             [bt] (9) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::function<void
 (std::shared_ptr<dmlc::ManualEvent>)>, std::shared_ptr<dmlc::ManualEvent> > > 
>::_M_run()+0x4a) [0x7f23de99d4da]
   E             [bt] (8) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(std::shared_ptr<dmlc::ManualEvent>), 
mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, 
bool)::{lambda()#4}::operator()() 
const::{lambda(std::shared_ptr<dmlc::ManualEvent>)#1}>::_M_invoke(std::_Any_data
 const&, std::shared_ptr<dmlc::ManualEvent>&&)+0x4e) [0x7f23de9a17fe]
   E             [bt] (7) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::engine::ThreadedEnginePerDevice::GPUWorker<(dmlc::ConcurrentQueueType)0>(mxnet::Context,
 bool, 
mxnet::engine::ThreadedEnginePerDevice::ThreadWorkerBlock<(dmlc::ConcurrentQueueType)0>*,
 std::shared_ptr<dmlc::ManualEvent> const&)+0x11d) [0x7f23de9a151d]
   E             [bt] (6) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext,
 mxnet::engine::OprBlock*)+0x121) [0x7f23de99e491]
   E             [bt] (5) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(+0x20828ee) [0x7f23de9938ee]
   E             [bt] (4) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(std::_Function_handler<void 
(mxnet::RunContext), mxnet::imperative::PushFCompute(std::function<void 
(nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, 
mxnet::RunContext&&)+0x17) [0x7f23dea6a027]
   E             [bt] (3) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void
 (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, 
std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, 
nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, 
std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, 
std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, 
std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > 
const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) 
const+0x934) [0x7f23dea69bd4]
   E             [bt] (2) /work/mxnet/python/mxnet/../../build/libmxnet.so(void 
mxnet::op::NumpyNormalForward<mshadow::gpu>(nnvm::NodeAttrs const&, 
mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> 
> const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > 
const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)+0xdf8) 
[0x7f23e49d5708]
   E             [bt] (1) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(mshadow::Random<mshadow::gpu, 
float>::GenGaussian(float*, unsigned long, float, float)+0x15e) [0x7f23e1b271fe]
   E             [bt] (0) 
/work/mxnet/python/mxnet/../../build/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x7f)
 [0x7f23de7900cf]
   E             File "/work/mxnet/include/mshadow/./random.h", line 380
   E           MXNetError: Check failed: status == CURAND_STATUS_SUCCESS (202 
vs. 0) : CURAND Gen Normal float failed. size = 0,mu = 0,sigma = 1
   
   python/mxnet/base.py:246: MXNetError
   ----------------------------- Captured stdout call 
-----------------------------
   (<function BatchNorm at 0x7f22dab688c8>, (24, 2), 0, False)
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to