This is an automated email from the ASF dual-hosted git repository.
jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 19f8e2b Add storage fallback msg to copyfromto (#9219)
19f8e2b is described below
commit 19f8e2bfeaa9cdde7470b8599d604ff39bde05b1
Author: Haibin Lin <[email protected]>
AuthorDate: Tue Jan 2 10:13:32 2018 -0800
Add storage fallback msg to copyfromto (#9219)
* debug env var for infer stype
* add warning for copy from to
* refactor
* remove exception
---
src/common/utils.h | 26 ++++++++++++
src/ndarray/ndarray.cc | 27 +++++++++---
src/operator/operator_common.h | 46 ++++++++++----------
tests/python/unittest/test_executor.py | 77 ++++++++++++++++++----------------
4 files changed, 109 insertions(+), 67 deletions(-)
diff --git a/src/common/utils.h b/src/common/utils.h
index ede218b..6f7e452 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -338,6 +338,32 @@ inline std::string stype_string(const int x) {
return "unknown";
}
+/*! \brief get string representation of device type */
+inline std::string dev_type_string(const int dev_type) {
+ switch (dev_type) {
+ case Context::kCPU:
+ return "cpu";
+ case Context::kGPU:
+ return "gpu";
+ case Context::kCPUPinned:
+ return "cpu_pinned";
+ case Context::kCPUShared:
+ return "cpu_shared";
+ }
+ return "unknown";
+}
+
+/*! \brief log message once. Intended for storage fallback warning messages. */
+inline void LogOnce(const std::string& message) {
+ typedef dmlc::ThreadLocalStore<std::unordered_set<std::string>> LogStore;
+ auto log_store = LogStore::Get();
+ if (log_store->find(message) == log_store->end()) {
+ LOG(INFO) << message;
+ log_store->insert(message);
+ }
+}
+
+
// heuristic to dermine number of threads per GPU
inline int GetNumThreadPerGPU() {
// This is resource efficient option.
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index f09f168..212fd7c 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -531,11 +531,28 @@ void CopyFromTo(const NDArray& from, const NDArray& to,
int priority) {
std::vector<Engine::VarHandle> mutable_vars(1, to.var());
std::vector<Resource> requested;
- if (a == gpu::kDevMask && from_stype != to_stype) {
- Resource rsc = ResourceManager::Get()->Request(from_ctx,
- ResourceRequest(ResourceRequest::kTempSpace));
- requested.push_back(rsc);
- mutable_vars.push_back(rsc.var);
+ if (from_stype != to_stype) {
+ using namespace common;
+ static bool log = dmlc::GetEnv("MXNET_STORAGE_FALLBACK_LOG_VERBOSE", true);
+ if (log) {
+ std::ostringstream os;
+ os << "\nStorage fallback detected:\n"
+ << "Copy from " << stype_string(from_stype) << " storage type on " <<
dev_type_string(a)
+ << " to " << stype_string(to_stype) << " storage type on " <<
dev_type_string(b)
+ << ".\nA temporary ndarray with " << stype_string(to_stype)
+ << " storage type will be generated in order to perform the copy. "
+ << "You can set environment variable "
+ << "MXNET_STORAGE_FALLBACK_LOG_VERBOSE to 0 to suppress this
warning.";
+ LogOnce(os.str());
+ }
+
+ // request temp resource if cast_storage performs on GPU
+ if (a == gpu::kDevMask) {
+ Resource rsc = ResourceManager::Get()->Request(from_ctx,
+ ResourceRequest(ResourceRequest::kTempSpace));
+ requested.push_back(rsc);
+ mutable_vars.push_back(rsc.var);
+ }
}
if (a == cpu::kDevMask && b == cpu::kDevMask) {
diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h
index 560d11e..329db02 100644
--- a/src/operator/operator_common.h
+++ b/src/operator/operator_common.h
@@ -223,7 +223,7 @@ inline bool dispatch_mode_assign(DispatchMode *y, const
DispatchMode& x) {
{ \
if (!shape_assign(&(shape_array)[index], TShape(shape))) { \
std::ostringstream os; \
- os << "Shape inconsistent, Provided=" << (shape_array)[index] << ',' \
+ os << "Shape inconsistent, Provided = " << (shape_array)[index] << ','\
<< " inferred shape=" << shape; \
throw ::mxnet::op::InferShapeError(os.str(), index); \
} \
@@ -240,9 +240,9 @@ inline bool dispatch_mode_assign(DispatchMode *y, const
DispatchMode& x) {
{ \
if (!type_assign(&(type_array)[index], type)) { \
std::ostringstream os; \
- os << "Type inconsistent, Provided=" \
+ os << "Type inconsistent, Provided = " \
<< type_string((type_array)[index]) << ',' \
- << " inferred type=" << type_string(type); \
+ << " inferred type = " << type_string(type); \
throw ::mxnet::op::InferTypeError(os.str(), index); \
} \
}
@@ -258,9 +258,9 @@ inline bool dispatch_mode_assign(DispatchMode *y, const
DispatchMode& x) {
{ \
if (!type_assign(&(type_array)[index], type)) { \
std::ostringstream os; \
- os << "Storage type inconsistent, Provided=" \
+ os << "Storage type inconsistent, Provided = " \
<< common::stype_string((type_array)[index]) << ',' \
- << " inferred storage type=" << common::stype_string(type); \
+ << " inferred storage type = " << common::stype_string(type); \
throw ::mxnet::op::InferStorageTypeError(os.str(), index); \
} \
}
@@ -274,11 +274,11 @@ inline bool dispatch_mode_assign(DispatchMode *y, const
DispatchMode& x) {
*/
#define DISPATCH_MODE_ASSIGN_CHECK(type_array, index, type) \
{ \
- if (!dispatch_mode_assign(&(type_array)[index], type)) {
\
+ if (!dispatch_mode_assign(&(type_array)[index], type)) { \
std::ostringstream os; \
- os << "Dispatch mode inconsistent, Provided=" \
+ os << "Dispatch mode inconsistent, Provided = " \
<< common::dispatch_mode_string((type_array)[index]) << ',' \
- << " inferred mode=" << common::dispatch_mode_string(type); \
+ << " inferred mode = " << common::dispatch_mode_string(type); \
throw ::mxnet::op::InferStorageTypeError(os.str(), index); \
} \
}
@@ -501,7 +501,7 @@ inline std::string operator_stype_string(const
nnvm::NodeAttrs& attrs,
result += "\"" + kv.first + "\" : " + kv.second + ", ";
}
result += "}\n";
- result += "context.dev_mask = " + std::to_string(dev_mask);
+ result += "context.dev_mask = " + common::dev_type_string(dev_mask);
return result;
}
@@ -527,22 +527,18 @@ inline void LogStorageFallback(const nnvm::NodeAttrs&
attrs,
const int dev_mask,
const std::vector<int>* in_attrs,
const std::vector<int>* out_attrs) {
- using namespace op;
- auto warning_printed =
dmlc::ThreadLocalStore<std::unordered_set<std::string>>::Get();
- static bool log_verbose = dmlc::GetEnv("MXNET_STORAGE_FALLBACK_LOG_VERBOSE",
true);
- if (log_verbose) {
- std::string warning = operator_stype_string(attrs, dev_mask, *in_attrs,
*out_attrs);
- if (warning_printed->find(warning) == warning_printed->end()) {
- LOG(INFO) << "\nStorage fallback detected:\n" << warning
- << "\nThe operator with default storage type will be
dispatched for execution. "
- << "You're seeing this warning message because the operator
above is unable to "
- << "process the given ndarrays with specified storage types
and parameter. "
- << "Temporary dense ndarrays are generated in order to execute
the operator. "
- << "You can set environment variable
MXNET_STORAGE_FALLBACK_LOG_VERBOSE "
- << "to 0 to suppress the warnings.";
- warning_printed->insert(warning);
- }
- }
+ static bool log = dmlc::GetEnv("MXNET_STORAGE_FALLBACK_LOG_VERBOSE", true);
+ if (!log) return;
+ const std::string op_str = op::operator_stype_string(attrs, dev_mask,
*in_attrs, *out_attrs);
+ std::ostringstream os;
+ os << "\nStorage type fallback detected:\n" << op_str
+ << "\nThe operator with default storage type will be dispatched for
execution. "
+ << "You're seeing this warning message because the operator above is
unable to "
+ << "process the given ndarrays with specified storage types, context and
parameter. "
+ << "Temporary dense ndarrays are generated in order to execute the
operator. "
+ << "You can set environment variable "
+ << "MXNET_STORAGE_FALLBACK_LOG_VERBOSE to 0 to suppress this warning.";
+ common::LogOnce(os.str());
}
} // namespace op
diff --git a/tests/python/unittest/test_executor.py
b/tests/python/unittest/test_executor.py
index e3d977d..40a922c 100644
--- a/tests/python/unittest/test_executor.py
+++ b/tests/python/unittest/test_executor.py
@@ -77,40 +77,44 @@ def check_bind_with_uniform(uf, gf, dim, sf=None,
lshape=None, rshape=None):
assert reldiff(rhs_grad.asnumpy(), rhs_grad2) < 1e-6
-def test_bind(disable_bulk_exec=False):
- if disable_bulk_exec:
- prev_bulk_inf_val =
mx.test_utils.set_env_var("MXNET_EXEC_BULK_EXEC_INFERENCE", "0", "1")
- prev_bulk_train_val =
mx.test_utils.set_env_var("MXNET_EXEC_BULK_EXEC_TRAIN", "0", "1")
-
- np.random.seed(0)
- nrepeat = 10
- maxdim = 4
- for repeat in range(nrepeat):
- for dim in range(1, maxdim):
- check_bind_with_uniform(lambda x, y: x + y,
- lambda g, x, y: (g, g),
- dim)
- check_bind_with_uniform(lambda x, y: x - y,
- lambda g, x, y: (g, -g),
- dim)
- check_bind_with_uniform(lambda x, y: x * y,
- lambda g, x, y: (y * g, x * g),
- dim)
- check_bind_with_uniform(lambda x, y: x / y,
- lambda g, x, y: (g / y, -x * g/ (y**2)),
- dim)
-
- check_bind_with_uniform(lambda x, y: np.maximum(x, y),
- lambda g, x, y: (g * (x>y), g * (y>x)),
- dim,
- sf=mx.symbol.maximum)
- check_bind_with_uniform(lambda x, y: np.minimum(x, y),
- lambda g, x, y: (g * (x<y), g * (y<x)),
- dim,
- sf=mx.symbol.minimum)
- if disable_bulk_exec:
- mx.test_utils.set_env_var("MXNET_EXEC_BULK_EXEC_INFERENCE",
prev_bulk_inf_val)
- mx.test_utils.set_env_var("MXNET_EXEC_BULK_EXEC_TRAIN",
prev_bulk_train_val)
+def test_bind():
+ def check_bind(disable_bulk_exec):
+ if disable_bulk_exec:
+ prev_bulk_inf_val =
mx.test_utils.set_env_var("MXNET_EXEC_BULK_EXEC_INFERENCE", "0", "1")
+ prev_bulk_train_val =
mx.test_utils.set_env_var("MXNET_EXEC_BULK_EXEC_TRAIN", "0", "1")
+
+ np.random.seed(0)
+ nrepeat = 10
+ maxdim = 4
+ for repeat in range(nrepeat):
+ for dim in range(1, maxdim):
+ check_bind_with_uniform(lambda x, y: x + y,
+ lambda g, x, y: (g, g),
+ dim)
+ check_bind_with_uniform(lambda x, y: x - y,
+ lambda g, x, y: (g, -g),
+ dim)
+ check_bind_with_uniform(lambda x, y: x * y,
+ lambda g, x, y: (y * g, x * g),
+ dim)
+ check_bind_with_uniform(lambda x, y: x / y,
+ lambda g, x, y: (g / y, -x * g/
(y**2)),
+ dim)
+
+ check_bind_with_uniform(lambda x, y: np.maximum(x, y),
+ lambda g, x, y: (g * (x>y), g * (y>x)),
+ dim,
+ sf=mx.symbol.maximum)
+ check_bind_with_uniform(lambda x, y: np.minimum(x, y),
+ lambda g, x, y: (g * (x<y), g * (y<x)),
+ dim,
+ sf=mx.symbol.minimum)
+ if disable_bulk_exec:
+ mx.test_utils.set_env_var("MXNET_EXEC_BULK_EXEC_INFERENCE",
prev_bulk_inf_val)
+ mx.test_utils.set_env_var("MXNET_EXEC_BULK_EXEC_TRAIN",
prev_bulk_train_val)
+
+ check_bind(True)
+ check_bind(False)
def test_dot():
np.random.seed(0)
@@ -154,6 +158,5 @@ def test_reshape():
assert np.all(exe.outputs[0].asnumpy() == 4)
if __name__ == "__main__":
- test_bind(disable_bulk_exec=False)
- test_bind(disable_bulk_exec=True)
- test_reshape()
+ import nose
+ nose.runmodule()
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].