mseth10 commented on issue #18823:
URL: 
https://github.com/apache/incubator-mxnet/issues/18823#issuecomment-671154040


   And this is the stack trace when static_alloc is set to false. I used the 
following patch to obtain it:
   ```
   diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc
   index ebb3134ae..e0ba2791e 100644
   --- a/src/c_api/c_api_ndarray.cc
   +++ b/src/c_api/c_api_ndarray.cc
   @@ -394,7 +394,6 @@ int MXAutogradBackwardEx(uint32_t num_output,
                             NDArrayHandle **grad_handles,
                             int **grad_stypes) {
      MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get();
   -  API_BEGIN();
    
      std::vector<NDArray*> outputs, ograds, variables;
      outputs.reserve(num_output);
   @@ -430,7 +429,7 @@ int MXAutogradBackwardEx(uint32_t num_output,
        *grad_handles = dmlc::BeginPtr(ret->ret_handles);
        *grad_stypes = dmlc::BeginPtr(ret->out_types);
      }
   -  API_END();
   +  return 0;
    }
    
    int MXAutogradGetSymbol(NDArrayHandle handle, SymbolHandle *out) {
   diff --git a/src/operator/subgraph/default_subgraph_property.cc 
b/src/operator/subgraph/default_subgraph_property.cc
   index ff51b6397..4228c6521 100644
   --- a/src/operator/subgraph/default_subgraph_property.cc
   +++ b/src/operator/subgraph/default_subgraph_property.cc
   @@ -64,7 +64,7 @@ class DefaultSubgraphProperty: public SubgraphProperty {
        n->attrs.name = "_CachedOp" + std::to_string(subgraph_id);
        n->attrs.subgraphs.push_back(std::make_shared<nnvm::Symbol>(sym));
    
   -    std::vector<std::pair<std::string, std::string> > 
flags{{"static_alloc", "true"}};
   +    std::vector<std::pair<std::string, std::string> > flags{};
        n->attrs.parsed = std::make_shared<CachedOp>(sym, flags);
    
        return n;
   ```
   and build via `cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLOG_FATAL_THROW=0 
-DUSE_CUDA=0 ..; ninja`
   ```
   Thread 1 "python3" received signal SIGABRT, Aborted.
   __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
   51   ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
   (gdb) bt
   #0  0x00007ffff7a22f47 in __GI_raise (sig=sig@entry=6) at 
../sysdeps/unix/sysv/linux/raise.c:51
   #1  0x00007ffff7a248b1 in __GI_abort () at abort.c:79
   #2  0x00007fffe93a6957 in  () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #3  0x00007fffe93acae6 in  () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #4  0x00007fffe93acb21 in  () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #5  0x00007fffe93acd54 in  () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #6  0x00007fffe93d5012 in  () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
   #7  0x00007fff42e355ac in 
__gnu_cxx::new_allocator<nnvm::NodeEntry>::allocate(unsigned long, void const*) 
(this=0x23893a0, __n=12297829382473034410) at 
/usr/include/c++/7/ext/new_allocator.h:102
   #8  0x00007fff42e33c4c in 
std::allocator_traits<std::allocator<nnvm::NodeEntry> 
>::allocate(std::allocator<nnvm::NodeEntry>&, unsigned long) (__a=..., 
__n=12297829382473034410) at /usr/include/c++/7/bits/alloc_traits.h:436
   #9  0x00007fff42e3162e in std::_Vector_base<nnvm::NodeEntry, 
std::allocator<nnvm::NodeEntry> >::_M_allocate(unsigned long) (this=0x23893a0, 
__n=12297829382473034410) at /usr/include/c++/7/bits/stl_vector.h:172
   #10 0x00007fff42e2ec8e in std::vector<nnvm::NodeEntry, 
std::allocator<nnvm::NodeEntry> 
>::_M_allocate_and_copy<__gnu_cxx::__normal_iterator<nnvm::NodeEntry const*, 
std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> > > >(unsigned 
long, __gnu_cxx::__normal_iterator<nnvm::NodeEntry const*, 
std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> > >, 
__gnu_cxx::__normal_iterator<nnvm::NodeEntry const*, 
std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> > >) 
(this=0x23893a0, __n=12297829382473034410, __first={node = <error reading 
variable: Cannot access memory at address 0xf0000000a>, index = 15, version = 
0}, __last=
     {node = <error reading variable: Cannot access memory at address 
0x100000010>, index = 1362897120, version = 32767}) at 
/usr/include/c++/7/bits/stl_vector.h:1260
   #11 0x00007fff42e2c50e in std::vector<nnvm::NodeEntry, 
std::allocator<nnvm::NodeEntry> >::operator=(std::vector<nnvm::NodeEntry, 
std::allocator<nnvm::NodeEntry> > const&) (this=0x23893a0, __x=std::vector of 
length 0, capacity -41698 = {...}) at /usr/include/c++/7/bits/vector.tcc:206
   #12 0x00007fff431c3b79 in nnvm::Graph::operator=(nnvm::Graph const&) 
(this=0x23893a0) at ../include/nnvm/graph.h:46
   #13 0x00007fff431b3c7e in mxnet::CachedOp::DynamicBackward(bool, 
mxnet::OpStatePtr const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::OpReqType, 
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&) (this=0x1eeca90, retain_graph=false, 
op_state=..., inputs=std::vector of length 3, capacity 3 = {...}, 
reqs=std::vector of length 1, capacity 1 = {...}, outputs=std::vector of length 
1, capacity 1 = {...}) at ../src/imperative/cached_op.cc:853
   #14 0x00007fff431b5a00 in mxnet::CachedOp::Backward(bool, mxnet::OpStatePtr 
const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, 
std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, 
std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&) 
(this=0x1eeca90, retain_graph=false, state=..., inputs=std::vector of length 3, 
capacity 3 = {...}, reqs=std::vector of length 1, capacity 1 = {...}, 
outputs=std::vector of length 1, capacity 1 = {...}) at 
../src/imperative/cached_op.cc:1048
   #15 0x00007fff43219394 in (anonymous namespace)::InvokeOperator(const 
nnvm::IndexedGraph &, int, bool, const std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > &, mxnet::Context, 
std::vector<mxnet::OpStatePtr, std::allocator<mxnet::OpStatePtr> > *, const 
std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > &, const 
std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > &, 
std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > *, 
std::vector<unsigned int, std::allocator<unsigned int> > *, 
std::function<void(const mxnet::OpStatePtr&)>) (idx=..., node_idx=3, 
retain_graph=false, arrays=std::vector of length 4, capacity 4 = {...}, 
ctx=..., p_states=0x7fffffff63a0, ndinputs=std::vector of length 3, capacity 3 
= {...}, ndoutputs=std::vector of length 1, capacity 1 = {...}, 
p_req=0x7fffffff5f90, p_ref_count=0x7fffffff6380, invoke=...) at 
../src/imperative/imperative_utils.cc:91
   #16 0x00007fff43219faf in mxnet::imperative::RunGraph(bool, 
nnvm::IndexedGraph const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, unsigned long, unsigned long, 
std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> >&&, 
std::vector<unsigned int, std::allocator<unsigned int> >&&, 
std::vector<mxnet::OpStatePtr, std::allocator<mxnet::OpStatePtr> >*, 
std::vector<mxnet::DispatchMode, std::allocator<mxnet::DispatchMode> > const&, 
bool, std::vector<mxnet::TShape, std::allocator<mxnet::TShape> >*, 
std::function<void (char const*, char const*, void*)> const&, bool) 
(retain_graph=false, idx=..., arrays=std::vector of length 4, capacity 4 = 
{...}, node_start=2, node_end=4, array_reqs=..., ref_count=..., 
p_states=0x7fffffff63a0, dispatch_modes=std::vector of length 4, capacity 4 = 
{...}, recording=false, shapes=0x0, callback=..., monitor_all=false)
       at ../src/imperative/imperative_utils.cc:165
   #17 0x00007fff431fcaa9 in 
mxnet::Imperative::Backward(std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, 
std::allocator<mxnet::NDArray*> > const&, bool, bool, bool) 
(this=0x7fff51515ac0 <mxnet::Imperative::Get()::inst>, outputs=std::vector of 
length 1, capacity 1 = {...}, ograds=std::vector of length 1, capacity 1 = 
{...}, variables=std::vector of length 0, capacity 0, is_train=true, 
retain_graph=false, create_graph=false) at ../src/imperative/imperative.cc:616
   #18 0x00007fff43062242 in MXAutogradBackwardEx(uint32_t, NDArrayHandle*, 
NDArrayHandle*, uint32_t, NDArrayHandle*, int, int, int, NDArrayHandle**, 
int**) (num_output=1, output_handles=0x7fff90f87de0, 
ograd_handles=0x7fff90f87c48, num_variables=0, var_handles=0x0, retain_graph=0, 
create_graph=0, is_train=1, grad_handles=0x0, grad_stypes=0x0) at 
../src/c_api/c_api_ndarray.cc:419
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to