This is an automated email from the ASF dual-hosted git repository.

kellen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 3e2f752  Reference engine from chunk via weak pointer (#14591)
3e2f752 is described below

commit 3e2f752d493b3819aa7ac60c58a7b6caa346e13a
Author: Kellen Sunderland <[email protected]>
AuthorDate: Tue Apr 16 20:45:52 2019 -0700

    Reference engine from chunk via weak pointer (#14591)
---
 include/mxnet/ndarray.h                  | 22 +++++++++++------
 src/ndarray/ndarray.cc                   | 24 ++++++++++---------
 tests/cpp/engine/engine_shutdown_test.cc | 41 ++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index 13fb42c..05d3fa4 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -850,15 +850,20 @@ class NDArray {
     mxnet::ShapeVector aux_shapes;
     /*! \brief Reference to the storage to ensure proper destruct order */
     std::shared_ptr<Storage> storage_ref_;
+    /*! \brief Reference to the engine to ensure we cleanup without calling a 
destructed engine */
+    std::weak_ptr<Engine> engine_ref_;
 
-    /*! \brief default cosntructor */
+
+    /*! \brief default constructor */
     Chunk() : static_data(true), delay_alloc(false),
-              storage_ref_(Storage::_GetSharedRef()) {}
+              storage_ref_(Storage::_GetSharedRef()),
+              engine_ref_(Engine::_GetSharedRef()) {}
 
     /*! \brief construct a new chunk */
     Chunk(mxnet::TShape shape, Context ctx_, bool delay_alloc_, int dtype)
         : static_data(false), delay_alloc(true), ctx(ctx_),
-          storage_ref_(Storage::_GetSharedRef()) {
+          storage_ref_(Storage::_GetSharedRef()),
+          engine_ref_(Engine::_GetSharedRef()) {
       storage_shape = shape;
       if (shape_is_known(storage_shape)) {
         shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
@@ -872,7 +877,8 @@ class NDArray {
 
     Chunk(const TBlob &data, int dev_id)
         : static_data(true), delay_alloc(false),
-          storage_ref_(Storage::_GetSharedRef()) {
+          storage_ref_(Storage::_GetSharedRef()),
+          engine_ref_(Engine::_GetSharedRef()) {
       CHECK(storage_type == kDefaultStorage);
       var = Engine::Get()->NewVariable();
       if (data.dev_mask() == cpu::kDevMask) {
@@ -890,7 +896,8 @@ class NDArray {
 
     Chunk(int shared_pid, int shared_id, const mxnet::TShape& shape, int dtype)
         : static_data(false), delay_alloc(false),
-          storage_ref_(Storage::_GetSharedRef()) {
+          storage_ref_(Storage::_GetSharedRef()),
+          engine_ref_(Engine::_GetSharedRef()) {
       var = Engine::Get()->NewVariable();
       ctx = Context::CPUShared(0);
       shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
@@ -906,7 +913,8 @@ class NDArray {
           const mxnet::ShapeVector &aux_shapes_)
         : static_data(false), delay_alloc(delay_alloc_), 
storage_type(storage_type_),
           aux_types(aux_types_), ctx(ctx_), storage_shape(storage_shape_),
-          aux_shapes(aux_shapes_), storage_ref_(Storage::_GetSharedRef()) {
+          aux_shapes(aux_shapes_), storage_ref_(Storage::_GetSharedRef()),
+          engine_ref_(Engine::_GetSharedRef()) {
       shandle.ctx = ctx;
       var = Engine::Get()->NewVariable();
       // aux_handles always reflect the correct number of aux data
@@ -924,7 +932,7 @@ class NDArray {
     Chunk(const NDArrayStorageType storage_type_, const TBlob &data,
           const std::vector<TBlob> &aux_data, int dev_id)
         : static_data(true), delay_alloc(false), storage_type(storage_type_),
-          storage_ref_(Storage::_GetSharedRef()) {
+          storage_ref_(Storage::_GetSharedRef()), 
engine_ref_(Engine::_GetSharedRef()) {
       using namespace mshadow;
       CHECK_NE(storage_type, kDefaultStorage);
       // init var
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index f5aac36..eddfbcf 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -113,20 +113,22 @@ NDArray::Chunk::~Chunk() {
   // We want to delete mkldnn memory after deleting the variable.
   mem.mem = this->mkl_mem_;
 #endif
-  Engine::Get()->DeleteVariable([mem, skip_free](RunContext s) {
-    if (skip_free == false) {
+  if (auto engine = engine_ref_.lock()) {
+    engine->DeleteVariable([mem, skip_free](RunContext s) {
+      if (skip_free == false) {
 #if MXNET_USE_MKLDNN == 1
-      if (mem.mem) {
-        CHECK_LE(mem.mem->GetSize(), mem.h.size);
-        CHECK_EQ(mem.mem->GetDataHandle(), mem.h.dptr);
-      }
+        if (mem.mem) {
+          CHECK_LE(mem.mem->GetSize(), mem.h.size);
+          CHECK_EQ(mem.mem->GetDataHandle(), mem.h.dptr);
+        }
 #endif
-      Storage::Get()->Free(mem.h);
-      for (const auto& aux : mem.aux_h) {
-        Storage::Get()->Free(aux);
+        Storage::Get()->Free(mem.h);
+        for (const auto &aux : mem.aux_h) {
+          Storage::Get()->Free(aux);
+        }
       }
-    }
-  }, shandle.ctx, var);
+    }, shandle.ctx, var);
+  }
 }
 
 void NDArray::Chunk::CheckAndAllocData(const mxnet::TShape &shape, int dtype) {
diff --git a/tests/cpp/engine/engine_shutdown_test.cc 
b/tests/cpp/engine/engine_shutdown_test.cc
new file mode 100644
index 0000000..893d085
--- /dev/null
+++ b/tests/cpp/engine/engine_shutdown_test.cc
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2019 by Contributors
+ * \file engine_shutdown_test.cc
+ * \brief Tests engine shutdown for possible crashes
+*/
+#include <gtest/gtest.h>
+
+#include "../src/engine/engine_impl.h"
+#include "../include/test_util.h"
+
+/**
+ * This test will help ensure we don't crash during engine shutdown.
+ * The crash happens during a static destructor call, so this test may pass 
and then cause a test-run process crash.
+ */
+TEST(EngineShutdown, stop_without_crashing) {
+    static std::unique_ptr<mxnet::NDArray> ndArray;
+    {
+        auto engine = mxnet::Engine::_GetSharedRef();
+        ndArray = std::make_unique<mxnet::NDArray>(mxnet::Context::CPU());
+        engine->Stop();
+    }
+}

Reply via email to