szha closed pull request #8804: Fix weird hang bug due to cuInit sometimes calls fork URL: https://github.com/apache/incubator-mxnet/pull/8804
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/src/engine/threaded_engine_perdevice.cc b/src/engine/threaded_engine_perdevice.cc index c01de75384..28bc92f7b2 100644 --- a/src/engine/threaded_engine_perdevice.cc +++ b/src/engine/threaded_engine_perdevice.cc @@ -55,7 +55,6 @@ class ThreadedEnginePerDevice : public ThreadedEngine { #ifndef _WIN32 pthread_atfork( []() { - Engine::Get()->WaitForAll(); Engine::Get()->Stop(); }, []() { @@ -71,10 +70,10 @@ class ThreadedEnginePerDevice : public ThreadedEngine { #endif } ~ThreadedEnginePerDevice() noexcept(false) { - this->Stop(); + this->StopNoWait(); } - void Stop() override { + void StopNoWait() { SignalQueuesForKill(); gpu_normal_workers_.Clear(); gpu_copy_workers_.Clear(); @@ -82,7 +81,14 @@ class ThreadedEnginePerDevice : public ThreadedEngine { cpu_priority_worker_.reset(nullptr); } + void Stop() override { + if (is_worker_) return; + WaitForAll(); + StopNoWait(); + } + void Start() override { + if (is_worker_) return; gpu_worker_nthreads_ = common::GetNumThreadPerGPU(); cpu_worker_nthreads_ = dmlc::GetEnv("MXNET_CPU_WORKER_NTHREADS", 1); // create CPU task @@ -196,6 +202,8 @@ class ThreadedEnginePerDevice : public ThreadedEngine { ~ThreadWorkerBlock() noexcept(false) {} }; + /*! \brief whether this is a worker thread. */ + static MX_THREAD_LOCAL bool is_worker_; /*! \brief number of concurrent thread cpu worker uses */ int cpu_worker_nthreads_; /*! \brief number of concurrent thread each gpu worker uses */ @@ -219,6 +227,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine { bool is_copy_worker, ThreadWorkerBlock<type> *block, std::shared_ptr<ThreadPool::SimpleEvent> ready_event) { + this->is_worker_ = true; #if MXNET_USE_CUDA mshadow::Stream<gpu> *stream; do { @@ -251,6 +260,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine { template<dmlc::ConcurrentQueueType type> inline void CPUWorker(Context ctx, ThreadWorkerBlock<type> *block) { + this->is_worker_ = true; auto* task_queue = &(block->task_queue); RunContext run_ctx{ctx, nullptr}; // execute task @@ -303,5 +313,8 @@ class ThreadedEnginePerDevice : public ThreadedEngine { Engine *CreateThreadedEnginePerDevice() { return new ThreadedEnginePerDevice(); } + +MX_THREAD_LOCAL bool ThreadedEnginePerDevice::is_worker_ = false; + } // namespace engine } // namespace mxnet ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services