GraphExecutor should hold Engine::_GetSharedRef() to a class member 
engine_ref_, or sometimes Engine might be deconstructed before GraphExecutor 
was deconstructed. Note that in ~GraphExecutor(), 
Engine::Get()->DeleteOperator() was called.

This bug is similar to https://github.com/apache/incubator-mxnet/issues/309 , 
which has been fixed yet. In this bug, sometimes ObjectPool<ThreadedOpr>, 
ObjectPool<OprBlock>, ObjectPool<VersionedVarBlock>, might be deconstructed 
before ThreadedEngine was deconstructed.

Why this shared_ptr bug appears everywhere? It's a general C++ problem. A 
simple example as follow:

engine.h
`
#pragma once

#include <memory>

class Eng {
 public:
  virtual ~Eng() = default;
  virtual void Print() const = 0;
};

// src/engine/engine.cc
class Engine : public Eng {
 public:
  Engine();
  ~Engine();

  static std::shared_ptr<Engine> _GetSharedRef();
  static Engine* Get();

  void Print() const;

 private:
  char *name_ = nullptr;
};

typedef Eng *get_t();
typedef std::shared_ptr<Engine> get_ref_t();
`

engine.cc
`
#include "engine.h"

#include <stdio.h>

Engine::Engine() {
  name_ = new char[8];
  snprintf(name_, 8, "HELLO");
}

Engine::~Engine() {
  printf("BEGIN ~Engine\n");
  if (name_)  delete name_;
  printf("  END ~Engine\n");
}

std::shared_ptr<Engine> Engine::_GetSharedRef() {
  static std::shared_ptr<Engine> sptr(new Engine());
  return sptr;
}

Engine* Engine::Get() {
  static Engine *inst = _GetSharedRef().get();
  return inst;
}

void Engine::Print() const {
  printf("name=%s\n", name_);
}

extern "C" Eng *get() {
  return Engine::Get();
}

extern "C" std::shared_ptr<Engine> get_ref() {
  return Engine::_GetSharedRef();
}
`

executor.h
`
#pragma once

#include <memory>

#include "engine.h"

class Executor {
 public:
  virtual ~Executor() = default;
  virtual void Print() = 0;
};

// src/executor/graph_executor.cc
class GraphExecutor : public Executor {
 public:
  GraphExecutor(void *eng);
  ~GraphExecutor();

  static std::shared_ptr<GraphExecutor> _GetSharedRef(void *eng);
  static GraphExecutor* Get(void *eng);

  void Print();

 private:
  void *eng_;
  std::shared_ptr<Engine> engine_ref_;
};

typedef Executor *create_t(void *);
`

executor.cc
`
#include "executor.h"

#include <dlfcn.h>
#include <stdio.h>

#include "engine.h"

GraphExecutor::GraphExecutor(void *eng) {
  eng_ = eng;

  get_ref_t *get_ref = (get_ref_t *) dlsym(eng_, "get_ref");
  engine_ref_ = get_ref();  // TODO: import!
}

GraphExecutor::~GraphExecutor() {
  printf("BEGIN ~GraphExecutor\n");
  Print();
  printf("  END ~GraphExecutor\n");
}

void GraphExecutor::Print() {
  get_t *get = (get_t *) dlsym(eng_, "get");
  Eng *eng = get();
  eng->Print();
}

std::shared_ptr<GraphExecutor> GraphExecutor::_GetSharedRef(void *eng) {
  static std::shared_ptr<GraphExecutor> sptr(new GraphExecutor(eng));
  return sptr;
}

GraphExecutor* GraphExecutor::Get(void *eng) {
  static GraphExecutor *inst = _GetSharedRef(eng).get();
  return inst;
}

extern "C" Executor *create(void *eng) {
  return GraphExecutor::Get(eng);
}
`

test.cc
`
#include "engine.h"
#include "executor.h"

#include <dlfcn.h>
#include <stdio.h>

int main() {
  void *handle_engine = dlopen("libengine.so", RTLD_LAZY);
  void *handle = dlopen("libexe.so", RTLD_LAZY);

  create_t *create = (create_t *) dlsym(handle, "create");
  Executor *executor = create(handle_engine);
  executor->Print();

  //dlclose(handle_engine);
  //dlclose(handle);
  return 0;
}
`

Makefile
`
all:
        gcc -std=c++11 -g engine.cc -fPIC -shared -o libengine.so
        gcc -std=c++11 -g executor.cc -fPIC -shared -o libexe.so
        g++ -std=c++11 -g test.cc -rdynamic -ldl -o b.out
`

make
./b.out

You will run well. But if there is no "engine_ref_ = get_ref();" in 
GraphExecutor::GraphExecutor(void *eng), it will coredump. This is the same 
problem as the bug.

[ Full content available at: 
https://github.com/apache/incubator-mxnet/issues/12613 ]
This message was relayed via gitbox.apache.org for [email protected]

Reply via email to