mseth10 commented on issue #18823:
URL:
https://github.com/apache/incubator-mxnet/issues/18823#issuecomment-671154040
And this is the stack trace when static_alloc is set to false. I used the
following patch to obtain it:
```
diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc
index ebb3134ae..e0ba2791e 100644
--- a/src/c_api/c_api_ndarray.cc
+++ b/src/c_api/c_api_ndarray.cc
@@ -394,7 +394,6 @@ int MXAutogradBackwardEx(uint32_t num_output,
NDArrayHandle **grad_handles,
int **grad_stypes) {
MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get();
- API_BEGIN();
std::vector<NDArray*> outputs, ograds, variables;
outputs.reserve(num_output);
@@ -430,7 +429,7 @@ int MXAutogradBackwardEx(uint32_t num_output,
*grad_handles = dmlc::BeginPtr(ret->ret_handles);
*grad_stypes = dmlc::BeginPtr(ret->out_types);
}
- API_END();
+ return 0;
}
int MXAutogradGetSymbol(NDArrayHandle handle, SymbolHandle *out) {
diff --git a/src/operator/subgraph/default_subgraph_property.cc
b/src/operator/subgraph/default_subgraph_property.cc
index ff51b6397..4228c6521 100644
--- a/src/operator/subgraph/default_subgraph_property.cc
+++ b/src/operator/subgraph/default_subgraph_property.cc
@@ -64,7 +64,7 @@ class DefaultSubgraphProperty: public SubgraphProperty {
n->attrs.name = "_CachedOp" + std::to_string(subgraph_id);
n->attrs.subgraphs.push_back(std::make_shared<nnvm::Symbol>(sym));
- std::vector<std::pair<std::string, std::string> >
flags{{"static_alloc", "true"}};
+ std::vector<std::pair<std::string, std::string> > flags{};
n->attrs.parsed = std::make_shared<CachedOp>(sym, flags);
return n;
```
and build via `cmake -GNinja -DCMAKE_BUILD_TYPE=Debug -DLOG_FATAL_THROW=0
-DUSE_CUDA=0 ..; ninja`
```
Thread 1 "python3" received signal SIGABRT, Aborted.
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 0x00007ffff7a22f47 in __GI_raise (sig=sig@entry=6) at
../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007ffff7a248b1 in __GI_abort () at abort.c:79
#2 0x00007fffe93a6957 in () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#3 0x00007fffe93acae6 in () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#4 0x00007fffe93acb21 in () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#5 0x00007fffe93acd54 in () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#6 0x00007fffe93d5012 in () at /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#7 0x00007fff42e355ac in
__gnu_cxx::new_allocator<nnvm::NodeEntry>::allocate(unsigned long, void const*)
(this=0x23893a0, __n=12297829382473034410) at
/usr/include/c++/7/ext/new_allocator.h:102
#8 0x00007fff42e33c4c in
std::allocator_traits<std::allocator<nnvm::NodeEntry>
>::allocate(std::allocator<nnvm::NodeEntry>&, unsigned long) (__a=...,
__n=12297829382473034410) at /usr/include/c++/7/bits/alloc_traits.h:436
#9 0x00007fff42e3162e in std::_Vector_base<nnvm::NodeEntry,
std::allocator<nnvm::NodeEntry> >::_M_allocate(unsigned long) (this=0x23893a0,
__n=12297829382473034410) at /usr/include/c++/7/bits/stl_vector.h:172
#10 0x00007fff42e2ec8e in std::vector<nnvm::NodeEntry,
std::allocator<nnvm::NodeEntry>
>::_M_allocate_and_copy<__gnu_cxx::__normal_iterator<nnvm::NodeEntry const*,
std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> > > >(unsigned
long, __gnu_cxx::__normal_iterator<nnvm::NodeEntry const*,
std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> > >,
__gnu_cxx::__normal_iterator<nnvm::NodeEntry const*,
std::vector<nnvm::NodeEntry, std::allocator<nnvm::NodeEntry> > >)
(this=0x23893a0, __n=12297829382473034410, __first={node = <error reading
variable: Cannot access memory at address 0xf0000000a>, index = 15, version =
0}, __last=
{node = <error reading variable: Cannot access memory at address
0x100000010>, index = 1362897120, version = 32767}) at
/usr/include/c++/7/bits/stl_vector.h:1260
#11 0x00007fff42e2c50e in std::vector<nnvm::NodeEntry,
std::allocator<nnvm::NodeEntry> >::operator=(std::vector<nnvm::NodeEntry,
std::allocator<nnvm::NodeEntry> > const&) (this=0x23893a0, __x=std::vector of
length 0, capacity -41698 = {...}) at /usr/include/c++/7/bits/vector.tcc:206
#12 0x00007fff431c3b79 in nnvm::Graph::operator=(nnvm::Graph const&)
(this=0x23893a0) at ../include/nnvm/graph.h:46
#13 0x00007fff431b3c7e in mxnet::CachedOp::DynamicBackward(bool,
mxnet::OpStatePtr const&, std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::OpReqType,
std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&) (this=0x1eeca90, retain_graph=false,
op_state=..., inputs=std::vector of length 3, capacity 3 = {...},
reqs=std::vector of length 1, capacity 1 = {...}, outputs=std::vector of length
1, capacity 1 = {...}) at ../src/imperative/cached_op.cc:853
#14 0x00007fff431b5a00 in mxnet::CachedOp::Backward(bool, mxnet::OpStatePtr
const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&,
std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&,
std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&)
(this=0x1eeca90, retain_graph=false, state=..., inputs=std::vector of length 3,
capacity 3 = {...}, reqs=std::vector of length 1, capacity 1 = {...},
outputs=std::vector of length 1, capacity 1 = {...}) at
../src/imperative/cached_op.cc:1048
#15 0x00007fff43219394 in (anonymous namespace)::InvokeOperator(const
nnvm::IndexedGraph &, int, bool, const std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > &, mxnet::Context,
std::vector<mxnet::OpStatePtr, std::allocator<mxnet::OpStatePtr> > *, const
std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > &, const
std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > &,
std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > *,
std::vector<unsigned int, std::allocator<unsigned int> > *,
std::function<void(const mxnet::OpStatePtr&)>) (idx=..., node_idx=3,
retain_graph=false, arrays=std::vector of length 4, capacity 4 = {...},
ctx=..., p_states=0x7fffffff63a0, ndinputs=std::vector of length 3, capacity 3
= {...}, ndoutputs=std::vector of length 1, capacity 1 = {...},
p_req=0x7fffffff5f90, p_ref_count=0x7fffffff6380, invoke=...) at
../src/imperative/imperative_utils.cc:91
#16 0x00007fff43219faf in mxnet::imperative::RunGraph(bool,
nnvm::IndexedGraph const&, std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&, unsigned long, unsigned long,
std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> >&&,
std::vector<unsigned int, std::allocator<unsigned int> >&&,
std::vector<mxnet::OpStatePtr, std::allocator<mxnet::OpStatePtr> >*,
std::vector<mxnet::DispatchMode, std::allocator<mxnet::DispatchMode> > const&,
bool, std::vector<mxnet::TShape, std::allocator<mxnet::TShape> >*,
std::function<void (char const*, char const*, void*)> const&, bool)
(retain_graph=false, idx=..., arrays=std::vector of length 4, capacity 4 =
{...}, node_start=2, node_end=4, array_reqs=..., ref_count=...,
p_states=0x7fffffff63a0, dispatch_modes=std::vector of length 4, capacity 4 =
{...}, recording=false, shapes=0x0, callback=..., monitor_all=false)
at ../src/imperative/imperative_utils.cc:165
#17 0x00007fff431fcaa9 in
mxnet::Imperative::Backward(std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*,
std::allocator<mxnet::NDArray*> > const&, bool, bool, bool)
(this=0x7fff51515ac0 <mxnet::Imperative::Get()::inst>, outputs=std::vector of
length 1, capacity 1 = {...}, ograds=std::vector of length 1, capacity 1 =
{...}, variables=std::vector of length 0, capacity 0, is_train=true,
retain_graph=false, create_graph=false) at ../src/imperative/imperative.cc:616
#18 0x00007fff43062242 in MXAutogradBackwardEx(uint32_t, NDArrayHandle*,
NDArrayHandle*, uint32_t, NDArrayHandle*, int, int, int, NDArrayHandle**,
int**) (num_output=1, output_handles=0x7fff90f87de0,
ograd_handles=0x7fff90f87c48, num_variables=0, var_handles=0x0, retain_graph=0,
create_graph=0, is_train=1, grad_handles=0x0, grad_stypes=0x0) at
../src/c_api/c_api_ndarray.cc:419
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]