GraphExecutor should hold Engine::_GetSharedRef() to a class member engine_ref_, or sometimes Engine might be deconstructed before GraphExecutor was deconstructed. Note that in ~GraphExecutor(), Engine::Get()->DeleteOperator() was called.
This bug is similar to https://github.com/apache/incubator-mxnet/issues/309 , which has been fixed yet. In this bug, sometimes ObjectPool<ThreadedOpr>, ObjectPool<OprBlock>, ObjectPool<VersionedVarBlock>, might be deconstructed before ThreadedEngine was deconstructed. Why this shared_ptr bug appears everywhere? It's a general C++ problem. A simple example as follow: engine.h ` #pragma once #include <memory> class Eng { public: virtual ~Eng() = default; virtual void Print() const = 0; }; // src/engine/engine.cc class Engine : public Eng { public: Engine(); ~Engine(); static std::shared_ptr<Engine> _GetSharedRef(); static Engine* Get(); void Print() const; private: char *name_ = nullptr; }; typedef Eng *get_t(); typedef std::shared_ptr<Engine> get_ref_t(); ` engine.cc ` #include "engine.h" #include <stdio.h> Engine::Engine() { name_ = new char[8]; snprintf(name_, 8, "HELLO"); } Engine::~Engine() { printf("BEGIN ~Engine\n"); if (name_) delete name_; printf(" END ~Engine\n"); } std::shared_ptr<Engine> Engine::_GetSharedRef() { static std::shared_ptr<Engine> sptr(new Engine()); return sptr; } Engine* Engine::Get() { static Engine *inst = _GetSharedRef().get(); return inst; } void Engine::Print() const { printf("name=%s\n", name_); } extern "C" Eng *get() { return Engine::Get(); } extern "C" std::shared_ptr<Engine> get_ref() { return Engine::_GetSharedRef(); } ` executor.h ` #pragma once #include <memory> #include "engine.h" class Executor { public: virtual ~Executor() = default; virtual void Print() = 0; }; // src/executor/graph_executor.cc class GraphExecutor : public Executor { public: GraphExecutor(void *eng); ~GraphExecutor(); static std::shared_ptr<GraphExecutor> _GetSharedRef(void *eng); static GraphExecutor* Get(void *eng); void Print(); private: void *eng_; std::shared_ptr<Engine> engine_ref_; }; typedef Executor *create_t(void *); ` executor.cc ` #include "executor.h" #include <dlfcn.h> #include <stdio.h> #include "engine.h" GraphExecutor::GraphExecutor(void *eng) { eng_ = eng; get_ref_t *get_ref = (get_ref_t *) dlsym(eng_, "get_ref"); engine_ref_ = get_ref(); // TODO: import! } GraphExecutor::~GraphExecutor() { printf("BEGIN ~GraphExecutor\n"); Print(); printf(" END ~GraphExecutor\n"); } void GraphExecutor::Print() { get_t *get = (get_t *) dlsym(eng_, "get"); Eng *eng = get(); eng->Print(); } std::shared_ptr<GraphExecutor> GraphExecutor::_GetSharedRef(void *eng) { static std::shared_ptr<GraphExecutor> sptr(new GraphExecutor(eng)); return sptr; } GraphExecutor* GraphExecutor::Get(void *eng) { static GraphExecutor *inst = _GetSharedRef(eng).get(); return inst; } extern "C" Executor *create(void *eng) { return GraphExecutor::Get(eng); } ` test.cc ` #include "engine.h" #include "executor.h" #include <dlfcn.h> #include <stdio.h> int main() { void *handle_engine = dlopen("libengine.so", RTLD_LAZY); void *handle = dlopen("libexe.so", RTLD_LAZY); create_t *create = (create_t *) dlsym(handle, "create"); Executor *executor = create(handle_engine); executor->Print(); //dlclose(handle_engine); //dlclose(handle); return 0; } ` Makefile ` all: gcc -std=c++11 -g engine.cc -fPIC -shared -o libengine.so gcc -std=c++11 -g executor.cc -fPIC -shared -o libexe.so g++ -std=c++11 -g test.cc -rdynamic -ldl -o b.out ` make ./b.out You will run well. But if there is no "engine_ref_ = get_ref();" in GraphExecutor::GraphExecutor(void *eng), it will coredump. This is the same problem as the bug. [ Full content available at: https://github.com/apache/incubator-mxnet/issues/12613 ] This message was relayed via gitbox.apache.org for [email protected]
