This is an automated email from the ASF dual-hosted git repository.
kellen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 3e2f752 Reference engine from chunk via weak pointer (#14591)
3e2f752 is described below
commit 3e2f752d493b3819aa7ac60c58a7b6caa346e13a
Author: Kellen Sunderland <[email protected]>
AuthorDate: Tue Apr 16 20:45:52 2019 -0700
Reference engine from chunk via weak pointer (#14591)
---
include/mxnet/ndarray.h | 22 +++++++++++------
src/ndarray/ndarray.cc | 24 ++++++++++---------
tests/cpp/engine/engine_shutdown_test.cc | 41 ++++++++++++++++++++++++++++++++
3 files changed, 69 insertions(+), 18 deletions(-)
diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index 13fb42c..05d3fa4 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -850,15 +850,20 @@ class NDArray {
mxnet::ShapeVector aux_shapes;
/*! \brief Reference to the storage to ensure proper destruct order */
std::shared_ptr<Storage> storage_ref_;
+ /*! \brief Reference to the engine to ensure we cleanup without calling a
destructed engine */
+ std::weak_ptr<Engine> engine_ref_;
- /*! \brief default cosntructor */
+
+ /*! \brief default constructor */
Chunk() : static_data(true), delay_alloc(false),
- storage_ref_(Storage::_GetSharedRef()) {}
+ storage_ref_(Storage::_GetSharedRef()),
+ engine_ref_(Engine::_GetSharedRef()) {}
/*! \brief construct a new chunk */
Chunk(mxnet::TShape shape, Context ctx_, bool delay_alloc_, int dtype)
: static_data(false), delay_alloc(true), ctx(ctx_),
- storage_ref_(Storage::_GetSharedRef()) {
+ storage_ref_(Storage::_GetSharedRef()),
+ engine_ref_(Engine::_GetSharedRef()) {
storage_shape = shape;
if (shape_is_known(storage_shape)) {
shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
@@ -872,7 +877,8 @@ class NDArray {
Chunk(const TBlob &data, int dev_id)
: static_data(true), delay_alloc(false),
- storage_ref_(Storage::_GetSharedRef()) {
+ storage_ref_(Storage::_GetSharedRef()),
+ engine_ref_(Engine::_GetSharedRef()) {
CHECK(storage_type == kDefaultStorage);
var = Engine::Get()->NewVariable();
if (data.dev_mask() == cpu::kDevMask) {
@@ -890,7 +896,8 @@ class NDArray {
Chunk(int shared_pid, int shared_id, const mxnet::TShape& shape, int dtype)
: static_data(false), delay_alloc(false),
- storage_ref_(Storage::_GetSharedRef()) {
+ storage_ref_(Storage::_GetSharedRef()),
+ engine_ref_(Engine::_GetSharedRef()) {
var = Engine::Get()->NewVariable();
ctx = Context::CPUShared(0);
shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
@@ -906,7 +913,8 @@ class NDArray {
const mxnet::ShapeVector &aux_shapes_)
: static_data(false), delay_alloc(delay_alloc_),
storage_type(storage_type_),
aux_types(aux_types_), ctx(ctx_), storage_shape(storage_shape_),
- aux_shapes(aux_shapes_), storage_ref_(Storage::_GetSharedRef()) {
+ aux_shapes(aux_shapes_), storage_ref_(Storage::_GetSharedRef()),
+ engine_ref_(Engine::_GetSharedRef()) {
shandle.ctx = ctx;
var = Engine::Get()->NewVariable();
// aux_handles always reflect the correct number of aux data
@@ -924,7 +932,7 @@ class NDArray {
Chunk(const NDArrayStorageType storage_type_, const TBlob &data,
const std::vector<TBlob> &aux_data, int dev_id)
: static_data(true), delay_alloc(false), storage_type(storage_type_),
- storage_ref_(Storage::_GetSharedRef()) {
+ storage_ref_(Storage::_GetSharedRef()),
engine_ref_(Engine::_GetSharedRef()) {
using namespace mshadow;
CHECK_NE(storage_type, kDefaultStorage);
// init var
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index f5aac36..eddfbcf 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -113,20 +113,22 @@ NDArray::Chunk::~Chunk() {
// We want to delete mkldnn memory after deleting the variable.
mem.mem = this->mkl_mem_;
#endif
- Engine::Get()->DeleteVariable([mem, skip_free](RunContext s) {
- if (skip_free == false) {
+ if (auto engine = engine_ref_.lock()) {
+ engine->DeleteVariable([mem, skip_free](RunContext s) {
+ if (skip_free == false) {
#if MXNET_USE_MKLDNN == 1
- if (mem.mem) {
- CHECK_LE(mem.mem->GetSize(), mem.h.size);
- CHECK_EQ(mem.mem->GetDataHandle(), mem.h.dptr);
- }
+ if (mem.mem) {
+ CHECK_LE(mem.mem->GetSize(), mem.h.size);
+ CHECK_EQ(mem.mem->GetDataHandle(), mem.h.dptr);
+ }
#endif
- Storage::Get()->Free(mem.h);
- for (const auto& aux : mem.aux_h) {
- Storage::Get()->Free(aux);
+ Storage::Get()->Free(mem.h);
+ for (const auto &aux : mem.aux_h) {
+ Storage::Get()->Free(aux);
+ }
}
- }
- }, shandle.ctx, var);
+ }, shandle.ctx, var);
+ }
}
void NDArray::Chunk::CheckAndAllocData(const mxnet::TShape &shape, int dtype) {
diff --git a/tests/cpp/engine/engine_shutdown_test.cc
b/tests/cpp/engine/engine_shutdown_test.cc
new file mode 100644
index 0000000..893d085
--- /dev/null
+++ b/tests/cpp/engine/engine_shutdown_test.cc
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2019 by Contributors
+ * \file engine_shutdown_test.cc
+ * \brief Tests engine shutdown for possible crashes
+*/
+#include <gtest/gtest.h>
+
+#include "../src/engine/engine_impl.h"
+#include "../include/test_util.h"
+
+/**
+ * This test will help ensure we don't crash during engine shutdown.
+ * The crash happens during a static destructor call, so this test may pass
and then cause a test-run process crash.
+ */
+TEST(EngineShutdown, stop_without_crashing) {
+ static std::unique_ptr<mxnet::NDArray> ndArray;
+ {
+ auto engine = mxnet::Engine::_GetSharedRef();
+ ndArray = std::make_unique<mxnet::NDArray>(mxnet::Context::CPU());
+ engine->Stop();
+ }
+}