leezu commented on pull request #18690:
URL: https://github.com/apache/incubator-mxnet/pull/18690#issuecomment-660321410


   Backtrace of the issue below. I use the following patch to obtain the 
backtrace
   
   ``` diff
   modified   src/c_api/c_api_ndarray.cc
   @@ -394,7 +394,6 @@ int MXAutogradBackwardEx(uint32_t num_output,
                             NDArrayHandle **grad_handles,
                             int **grad_stypes) {
      MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get();
   -  API_BEGIN();
    
      std::vector<NDArray*> outputs, ograds, variables;
      outputs.reserve(num_output);
   @@ -430,7 +429,7 @@ int MXAutogradBackwardEx(uint32_t num_output,
        *grad_handles = dmlc::BeginPtr(ret->ret_handles);
        *grad_stypes = dmlc::BeginPtr(ret->out_types);
      }
   -  API_END();
   +  return 0;
    }
    
    int MXAutogradGetSymbol(NDArrayHandle handle, SymbolHandle *out) {
   ```
   
   and build via `cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLOG_FATAL_THROW=0 
-DUSE_CUDA=0 ..; ninja`
   
   
   Backtrace:
   
   ```
   Thread 1 "python3.8" received signal SIGABRT, Aborted.                       
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                          __GI_raise (sig=sig@entry=6) at 
../sysdeps/unix/sysv/linux/raise.c:51
   51      ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.       
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                          (gdb) bt
   #0  0x00007ffff705ef47 in __GI_raise (sig=sig@entry=6) at 
../sysdeps/unix/sysv/linux/raise.c:51
   #1  0x00007ffff70608b1 in __GI_abort () at abort.c:79
   #2  0x00007fff3737c257 in  () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #3  0x00007fff37387606 in  () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #4  0x00007fff37387671 in  () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #5  0x00007fff37387905 in  () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #6  0x00007fff3737e96b in std::__throw_bad_cast() () at 
/usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #7  0x00007fff3ba064f7 in 
__gnu_cxx::new_allocator<nnvm::NodeEntry>::allocate(unsigned long, void const*) 
(this=0x555555e73ea0, __n=18446744073709459446) at 
/usr/bin/../lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/ext/new_allocator.h:106
   #8  0x00007fff3ba064b4 in 
std::allocator_traits<std::allocator<nnvm::NodeEntry> 
>::allocate(std::allocator<nnvm::NodeEntry>&, unsigned long) (__a=..., 
__n=18446744073709459446) at 
/usr/bin/../lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/alloc_traits.h:460
   #9  0x00007fff3ba06423 in std::_Vector_base<nnvm::NodeEntry, 
std::allocator<nnvm::NodeEntry> >::_M_allocate(unsigned long) 
(this=0x555555e73ea0, __n=18446744073709459446) at 
/usr/bin/../lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/stl_vector.h:346
   #10 0x00007fff3ba060cc in std::vector<nnvm::NodeEntry, 
std::allocator<nnvm::NodeEntry> 
>::_M_allocate_and_copy<__gnu_cxx::__normal_iterator<nnvm::NodeEntry const*, 
std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> > > >(unsigned 
long, __gnu_cxx::__normal_iterator<nnvm::NodeEntry const*, 
std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> > >, 
__gnu_cxx::__normal_iterator<nnvm::NodeEntry const*, 
std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> > >) 
(this=0x555555e73ea0, __n=18446744073709459446, __first={node = <error reading 
variable: Cannot access memory at address 0xf0000000a>, index = 15, version = 
3707764736}, __last={node = <error reading variable: Cannot access memory at 
address 0x100000010>, index = 1470775728, version = 21845}) at 
/usr/bin/../lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/stl_vector.h:1511
   #11 0x00007fff3ba04c07 in std::vector<nnvm::NodeEntry, 
std::allocator<nnvm::NodeEntry> >::operator=(std::vector<nnvm::NodeEntry, 
std::allocator<nnvm::NodeEntry> > const&) (this=0x555555e73ea0, __x=std::vector 
of length -92170, capacity 12802 = {...}) at 
/usr/bin/../lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/vector.tcc:226
   #12 0x00007fff3bc74a44 in nnvm::Graph::operator=(nnvm::Graph const&) 
(this=0x555555e73ea0) at ../include/nnvm/graph.h:46
   #13 0x00007fff3bc60133 in mxnet::CachedOp::DynamicBackward(bool, 
mxnet::OpStatePtr const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&) (this=0x555557aa41b0, 
retain_graph=false, op_state=..., inputs=std::vector of length 3, capacity 3 = 
{...}, reqs=std::vector of length 1, capacity 1 = {...}, outputs=std::vector of 
length 1, capacity 1 = {...}) at ../src/imperative/cached_op.cc:853
   #14 0x00007fff3bc6262b in mxnet::CachedOp::Backward(bool, mxnet::OpStatePtr 
const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, 
std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, 
std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&) 
(this=0x555557aa41b0, retain_graph=false, state=..., inputs=std::vector of 
length 3, capacity 3 = {...}, reqs=std::vector of length 1, capacity 1 = {...}, 
outputs=std::vector of length 1, capacity 1 = {...}) at 
../src/imperative/cached_op.cc:1048
   #15 0x00007fff3bcdd29d in (anonymous 
namespace)::InvokeOperator(nnvm::IndexedGraph const&, int, bool, 
std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, 
mxnet::Context, std::vector<mxnet::OpStatePtr, 
std::allocator<mxnet::OpStatePtr> >*, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> >*, std::vector<unsigned int, 
std::allocator<unsigned int> >*, std::function<void (mxnet::OpStatePtr 
const&)>) (idx=..., node_idx=5, retain_graph=false, arrays=std::vector of 
length 8, capacity 8 = {...}, ctx=..., p_states=0x7ffffffccfa8, 
ndinputs=std::vector of length 3, capacity 3 = {...}, ndoutputs=std::vector of 
length 1, capacity 1 = {...}, p_req=0x7ffffffcc328, p_ref_count=0x7ffffffccfc8, 
invoke=...) at ../src/imperative/imperative_utils.cc:91
   #16 0x00007fff3bcdcaca in mxnet::imperative::RunGraph(bool, 
nnvm::IndexedGraph const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, unsigned long, unsigned long, 
std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> >&&, 
std::vector<unsigned int, std::allocator<unsigned int> >&&, 
std::vector<mxnet::OpStatePtr, std::allocator<mxnet::OpStatePtr> >*, 
std::vector<mxnet::DispatchMode, std::allocator<mxnet::DispatchMode> > const&, 
bool, std::vector<mxnet::TShape, std::allocator<mxnet::TShape> >*, 
std::function<void (char const*, char const*, void*)> const&, bool) 
(retain_graph=false, idx=..., arrays=std::vector of length 8, capacity 8 = 
{...}, node_start=4, node_end=7, array_reqs=..., ref_count=..., 
p_states=0x7ffffffccfa8, dispatch_modes=std::vector of length 7, capacity 7 = 
{...}, recording=false, shapes=0x0, callback=..., monitor_all=false)
       at ../src/imperative/imperative_utils.cc:165
   #17 0x00007fff3bcbe53c in 
mxnet::Imperative::Backward(std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, bool, bool, bool) 
(this=0x7fff501e8e78 <mxnet::Imperative::Get()::inst>, outputs=std::vector of 
length 1, capacity 1 = {...}, ograds=std::vector of length 1, capacity 1 = 
{...}, variables=std::vector of length 0, capacity 0, is_train=true, 
retain_graph=false, create_graph=false) at ../src/imperative/imperative.cc:616
   #18 0x00007fff3bab5a3f in MXAutogradBackwardEx(uint32_t, NDArrayHandle*, 
NDArrayHandle*, uint32_t, NDArrayHandle*, int, int, int, NDArrayHandle**, 
int**) (num_output=1, output_handles=0x7ffda003bbe0, 
ograd_handles=0x7ffda003bd20, num_variables=0, var_handles=0x0, retain_graph=0, 
create_graph=0, is_train=1, grad_handles=0x0, grad_stypes=0x0) at 
../src/c_api/c_api_ndarray.cc:418
   #19 0x00007ffff2f65dae in ffi_call_unix64 () at 
/usr/lib/x86_64-linux-gnu/libffi.so.6
   #20 0x00007ffff2f6571f in ffi_call () at 
/usr/lib/x86_64-linux-gnu/libffi.so.6
   #21 0x00007ffff317d415 in _call_function_pointer (flags=4353, 
pProc=0x7fff3bab57c0 <MXAutogradBackwardEx(uint32_t, NDArrayHandle*, 
NDArrayHandle*, uint32_t, NDArrayHandle*, int, int, int, NDArrayHandle**, 
int**)>, avalues=0x7ffffffcd9c0, atypes=0x7ffffffcd960, restype=0x7ffff35bb9f8, 
resmem=0x7ffffffcda20, argcount=10) at 
/tmp/python-build.20200514035455.63369/Python-3.8.2/Modules/_ctypes/callproc.c:871
   #22 0x00007ffff317de19 in _ctypes_callproc (pProc=0x7fff3bab57c0 
<MXAutogradBackwardEx(uint32_t, NDArrayHandle*, NDArrayHandle*, uint32_t, 
NDArrayHandle*, int, int, int, NDArrayHandle**, int**)>, argtuple=(1, 
<c_void_p_Array_1 at remote 0x7ffda003bb90>, <c_void_p_Array_1 at remote 
0x7ffda003bcd0>, 0, <c_void_p at remote 0x7ffda003be10>, <c_int at remote 
0x7ffda003beb0>, <c_int at remote 0x7ffda003bf50>, <c_int at remote 
0x7ffdc19de050>, <c_void_p at remote 0x7ffdc19de0f0>, <c_void_p at remote 
0x7ffdc19de190>), flags=4353, argtypes=0x0, restype=<_ctypes.PyCSimpleType at 
remote 0x555555e7bce0>, checker=0x0) at 
/tmp/python-build.20200514035455.63369/Python-3.8.2/Modules/_ctypes/callproc.c:1199
   #23 0x00007ffff3178169 in PyCFuncPtr_call (self=0x7ffda004cc90, inargs=(1, 
<c_void_p_Array_1 at remote 0x7ffda003bb90>, <c_void_p_Array_1 at remote 
0x7ffda003bcd0>, 0, <c_void_p at remote 0x7ffda003be10>, <c_int at remote 
0x7ffda003beb0>, <c_int at remote 0x7ffda003bf50>, <c_int at remote 
0x7ffdc19de050>, <c_void_p at remote 0x7ffdc19de0f0>, <c_void_p at remote 
0x7ffdc19de190>), kwds=0x0) at 
/tmp/python-build.20200514035455.63369/Python-3.8.2/Modules/_ctypes/_ctypes.c:4201
   ...
   ```
   
   Specifically note the `Cannot access memory at address 0xf0000000a`
   
   To obtain the backtrace run `gdb /path/to/python` and `(gdb) run -m pytest 
--color=yes --verbose --exitfirst 
./tests/python/unittest/test_dynamic_shape.py` and then `bt`


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to