This is an automated email from the ASF dual-hosted git repository. marcoabreu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new f9c2689 Fix a race condition in converting data layouts in MKLDNN. (#9862) f9c2689 is described below commit f9c2689ec2ffd61ce123dce5857f8a797f21e4df Author: Da Zheng <zhengda1...@gmail.com> AuthorDate: Thu Mar 1 11:54:35 2018 +0100 Fix a race condition in converting data layouts in MKLDNN. (#9862) * Fix a race condition in converting data layouts. * Avoid calling data() in elemwise sum. * Fix a compilation error. * Address comments. * avoid data layout conversion inside ndarray. * Fix a compilation error. * address comments. * Reorder weight arrays in convolution async. * Fix async data reordering in NDArray. * Fix race condition in deconv. * Update ndarray.cc * Check more in NDArray. * Fix a bug in MKLDNNDataReorder. * Fix a bug in NDArray. * Simplify weight reorder in (de-)conv. --- include/mxnet/ndarray.h | 23 +++- src/ndarray/ndarray.cc | 149 +++++++++++++++-------- src/operator/nn/mkldnn/mkldnn_base.cc | 17 +++ src/operator/nn/mkldnn/mkldnn_convolution.cc | 25 ++-- src/operator/nn/mkldnn/mkldnn_deconvolution.cc | 22 ++-- src/operator/nn/mkldnn/mkldnn_fully_connected.cc | 5 + src/operator/tensor/cast_storage-inl.h | 7 +- src/operator/tensor/elemwise_sum.cc | 15 +-- tests/python/gpu/test_gluon_model_zoo_gpu.py | 10 +- 9 files changed, 188 insertions(+), 85 deletions(-) diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h index 7ce41ab..67d2a27 100644 --- a/include/mxnet/ndarray.h +++ b/include/mxnet/ndarray.h @@ -622,12 +622,29 @@ class NDArray { /* * Reorder the memory to the specified layout. */ - void MKLDNNDataReorder(const mkldnn::memory::primitive_desc &desc); + void MKLDNNDataReorder(const mkldnn::memory::primitive_desc &desc) { + CHECK_EQ(storage_type(), kDefaultStorage); + ptr_->MKLDNNDataReorder(desc); + } void Reorder2Default() { CHECK_EQ(storage_type(), kDefaultStorage); ptr_->Reorder2Default(); } + /* + * These are the async version of the methods above. + * It changes the layout of this NDArray, but it happens after all accesses to + * the array are complete. + */ + void Reorder2DefaultAsync(); + void MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc); + + /* + * This creates a new NDArray with the reordered data. + * It doesn't affect the data of the original NDArray. + */ + NDArray Reorder2Default() const; + void InvalidateMKLDNNData() { // Removing mkl_mem_ means the NDArray will store data in the default format. ptr_->mkl_mem_ = nullptr; @@ -880,9 +897,11 @@ class NDArray { // Have MKL memory reference to the data in the default storage // or create memory for MKLDNN. void SetMKLMem(const TShape &shape, int dtype); - // In the data is stored in MKLDNN layout, we reorder data in mkl_mem_ and + // If the data is stored in MKLDNN layout, we reorder data in mkl_mem_ and // save the result in shandle. void Reorder2Default(); + // Reroder data to a specified layout. + void MKLDNNDataReorder(const mkldnn::memory::primitive_desc &desc); bool IsMKLDNN() const; bool IsDefault() const; #endif diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index ae7209e..84328ea 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -375,7 +375,45 @@ void NDArray::Chunk::Reorder2Default() { CheckAndAlloc(def_pd.get_size()); // TODO(zhengda) We need to avoid memory copy here. memcpy(shandle.dptr, def_mem->get_data_handle(), def_pd.get_size()); - mkl_mem_.reset(new mkldnn::memory(def_pd, shandle.dptr)); + mkl_mem_ = nullptr; +} + +void NDArray::Chunk::MKLDNNDataReorder(const mkldnn::memory::primitive_desc &pd) { + // If the memory already uses the specified layout, don't do anything. + if (mkl_mem_ != nullptr && mkl_mem_->get_primitive_desc() == pd) + return; + auto _pd = pd; + auto _desc = _pd.desc(); + auto def_format = GetDefaultFormat(_desc); + // If the memory is default, don't do anything. + if (def_format == _desc.data.format && IsDefault()) + return; + // If the specified layout is default, we should use Reorder2Default. + if (def_format == _desc.data.format) { + Reorder2Default(); + return; + } + + std::shared_ptr<mkldnn::memory> new_mem(new mkldnn::memory(pd)); + std::shared_ptr<mkldnn::memory> old_mem; + if (IsDefault()) { + auto def_pd = GetPrimitiveDesc(pd, def_format); + old_mem.reset(new mkldnn::memory(def_pd, shandle.dptr)); + } else { + old_mem = this->mkl_mem_; + } + CHECK(old_mem->get_primitive_desc().desc().data.ndims == _desc.data.ndims); + + // This may be called in MKLDNN operators. We can't use MKLDNNStream here. + std::vector<mkldnn::primitive> net; + net.push_back(mkldnn::reorder(*old_mem, *new_mem)); + mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait(); + + CHECK(shandle.size >= pd.get_size()); + CheckAndAlloc(pd.get_size()); + // TODO(zhengda) We need to avoid memory copy here. + memcpy(shandle.dptr, new_mem->get_data_handle(), pd.get_size()); + mkl_mem_.reset(new mkldnn::memory(pd, shandle.dptr)); } void NDArray::Chunk::SetMKLMem(const TShape &shape, int dtype) { @@ -495,12 +533,56 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder( } } +NDArray NDArray::Reorder2Default() const { + CHECK(storage_type() == kDefaultStorage); + + if (ptr_->mkl_mem_ == nullptr) + return *this; + auto format = GetDefaultFormat(ptr_->mkl_mem_->get_primitive_desc().desc()); + if (format == ptr_->mkl_mem_->get_primitive_desc().desc().data.format) + return *this; + + NDArray ret(shape(), ctx(), false, dtype()); + auto def_pd = GetPrimitiveDesc(ptr_->mkl_mem_->get_primitive_desc(), format); + CHECK(ret.ptr_->shandle.size >= def_pd.get_size()); + mkldnn::memory def_mem(def_pd, ret.ptr_->shandle.dptr); + // This may be called in MKLDNN operators. We can't use MKLDNNStream here. + std::vector<mkldnn::primitive> net; + net.push_back(mkldnn::reorder(*ptr_->mkl_mem_, def_mem)); + mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait(); + return ret; +} + +void NDArray::Reorder2DefaultAsync() { + std::vector<Engine::VarHandle> const_vars; + std::vector<Engine::VarHandle> mutable_vars(1, this->var()); + NDArray tmp = *this; + Engine::Get()->PushAsync( + [tmp](RunContext ctx, Engine::CallbackOnComplete on_complete) { + tmp.ptr_->Reorder2Default(); + on_complete(); + }, ctx(), const_vars, mutable_vars, + FnProperty::kNormal, 0, PROFILER_MESSAGE("Reorder2Default")); +} + +void NDArray::MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc) { + std::vector<Engine::VarHandle> const_vars; + std::vector<Engine::VarHandle> mutable_vars(1, this->var()); + NDArray tmp = *this; + Engine::Get()->PushAsync( + [tmp, desc](RunContext ctx, Engine::CallbackOnComplete on_complete) { + tmp.ptr_->MKLDNNDataReorder(desc); + on_complete(); + }, ctx(), const_vars, mutable_vars, + FnProperty::kNormal, 0, PROFILER_MESSAGE("Reorder")); +} + const mkldnn::memory *NDArray::GetMKLDNNData() const { CHECK(storage_type() == kDefaultStorage); - // If this array uses MKLDNN layout and it's a view, we have to change its - // layout to the default layout. - if (IsMKLDNNData() && IsView()) - ptr_->Reorder2Default(); + // If this array uses MKLDNN layout, we have to make sure it's not a view. + // Otherwise, we'll have to change the layout inside the array. + if (IsMKLDNNData()) + CHECK(!IsView()); ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_, dtype_); // If shandle has data, the data in shandle and mkl_mem_ should match. if (ptr_->shandle.dptr) @@ -534,45 +616,6 @@ const mkldnn::memory *NDArray::GetMKLDNNData() const { } } -void NDArray::MKLDNNDataReorder(const mkldnn::memory::primitive_desc &pd) { - CHECK_EQ(storage_type(), kDefaultStorage); - // If the memory already uses the specified layout, don't do anything. - if (ptr_->mkl_mem_ != nullptr && ptr_->mkl_mem_->get_primitive_desc() == pd) - return; - auto _pd = pd; - auto _desc = _pd.desc(); - auto def_format = GetDefaultFormat(_desc); - // If the memory is default, don't do anything. - if (def_format == _desc.data.format && ptr_->IsDefault()) - return; - // If the specified layout is default, we should use Reorder2Default. - if (def_format == _desc.data.format) { - ptr_->Reorder2Default(); - return; - } - - std::shared_ptr<mkldnn::memory> new_mem(new mkldnn::memory(pd)); - ptr_->SetMKLMem(shape_, dtype_); - auto old_mem = ptr_->mkl_mem_; - // It's possible that the specified layout has a different number of dimensions. - if (old_mem->get_primitive_desc().desc().data.ndims != _desc.data.ndims) { - // For now, we only support reorder from the default layout. - CHECK(ptr_->IsDefault()); - auto def_pd = GetPrimitiveDesc(pd, def_format); - old_mem.reset(new mkldnn::memory(def_pd, old_mem->get_data_handle())); - } - // This may be called in MKLDNN operators. We can't use MKLDNNStream here. - std::vector<mkldnn::primitive> net; - net.push_back(mkldnn::reorder(*old_mem, *new_mem)); - mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait(); - - CHECK(ptr_->shandle.size >= pd.get_size()); - ptr_->CheckAndAlloc(pd.get_size()); - // TODO(zhengda) We need to avoid memory copy here. - memcpy(ptr_->shandle.dptr, new_mem->get_data_handle(), pd.get_size()); - ptr_->mkl_mem_.reset(new mkldnn::memory(pd, ptr_->shandle.dptr)); -} - void NDArray::CopyFrom(const mkldnn::memory &mem) { CHECK(ptr_ != nullptr) << "The NDArray hasn't been initialized"; if (ptr_->mkl_mem_.get() == &mem) @@ -581,10 +624,10 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) { CHECK(mem.get_primitive_desc().get_size() == shape().Size() * GetTypeSize(dtype_)) << "The size of NDArray doesn't match the requested MKLDNN memory desc"; MKLDNNStream *stream = MKLDNNStream::Get(); - // If this array uses MKLDNN layout and it's a view, we have to change its - // layout to the default layout. - if (IsMKLDNNData() && IsView()) - ptr_->Reorder2Default(); + // If this array uses MKLDNN layout, we have to make sure it's not a view. + // Otherwise, we'll have to change the layout inside the array. + if (IsMKLDNNData()) + CHECK(!IsView()); ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_, dtype_); stream->RegisterMem(ptr_->mkl_mem_); @@ -1017,6 +1060,7 @@ inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext // with Copy(). NDArray tmp_from = from; if (tmp_from.IsMKLDNNData()) { + // TODO(zhengda) tmp_from should be cached. tmp_from = NDArray(from.shape(), from.ctx(), false, from.dtype()); auto tmp_mem = from.GetMKLDNNData(); tmp_from.CopyFrom(*tmp_mem); @@ -1025,7 +1069,7 @@ inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext CHECK(tmp_from.IsDefaultData()); CHECK(to.IsDefaultData()); TBlob tmp = to.data(); - ndarray::Copy<from_xpu, to_xpu>(from.data(), &tmp, + ndarray::Copy<from_xpu, to_xpu>(tmp_from.data(), &tmp, from.ctx(), to.ctx(), ctx); } #endif @@ -1849,7 +1893,12 @@ void NDArray::SyncCopyToCPU(void *data, size_t size) const { if (this->ctx().dev_mask() == cpu::kDevMask) { this->WaitToRead(); RunContext rctx{this->ctx(), nullptr}; - ndarray::Copy<cpu, cpu>(this->data(), &dst, + NDArray src = *this; +#if MXNET_USE_MKLDNN == 1 + if (src.IsMKLDNNData()) + src = this->Reorder2Default(); +#endif + ndarray::Copy<cpu, cpu>(src.data(), &dst, Context::CPU(), Context::CPU(), rctx); } else { #if MXNET_USE_CUDA diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc index f21111b..edc3482 100644 --- a/src/operator/nn/mkldnn/mkldnn_base.cc +++ b/src/operator/nn/mkldnn/mkldnn_base.cc @@ -270,9 +270,26 @@ void FallBackCompute(FCompute fn, const nnvm::NodeAttrs &attrs, const std::vector<OpReqType> &req, const std::vector<NDArray> &outputs) { std::vector<TBlob> in_blobs(inputs.size()); + std::vector<NDArray> in_bufs; for (size_t i = 0; i < in_blobs.size(); i++) { + // If the input data isn't stored in the default format, we shouldn't + // call data() directly, which will change the layout of the NDArray. + // Instead, we should save the converted data in another NDArray. + // TODO(zhengda) we should use temp space to save the converted data. + if (inputs[i].IsDefaultData()) { in_blobs[i] = inputs[i].data(); + } else { + if (in_bufs.empty()) + in_bufs.reserve(inputs.size()); + in_bufs.emplace_back(inputs[i].shape(), inputs[i].ctx(), + false, inputs[i].dtype()); + const mkldnn::memory *mem = inputs[i].GetMKLDNNData(); + in_bufs.back().CopyFrom(*mem); + in_blobs[i] = in_bufs.back().data(); + } } + MKLDNNStream::Get()->Submit(); + std::vector<TBlob> out_blobs(outputs.size()); for (size_t i = 0; i < out_blobs.size(); i++) { if (req[i] == kWriteTo) diff --git a/src/operator/nn/mkldnn/mkldnn_convolution.cc b/src/operator/nn/mkldnn/mkldnn_convolution.cc index b94850a..76efc24 100644 --- a/src/operator/nn/mkldnn/mkldnn_convolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_convolution.cc @@ -262,8 +262,8 @@ void MKLDNNConvolutionForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx const std::vector<NDArray> &out_data) { TmpMemMgr::Get()->Init(ctx.requested[conv::kTempSpace]); const ConvolutionParam& param = nnvm::get<ConvolutionParam>(attrs.parsed); - MKLDNNConvForward &fwd = GetConvFwd(attrs, - ctx.is_train, in_data[conv::kData], in_data[conv::kWeight], + NDArray weight = in_data[conv::kWeight]; + MKLDNNConvForward &fwd = GetConvFwd(attrs, ctx.is_train, in_data[conv::kData], weight, param.no_bias ? nullptr : &in_data[conv::kBias], out_data[conv::kOut]); auto data_mem = in_data[conv::kData].GetMKLDNNDataReorder(fwd.fwd_pd.src_primitive_desc()); @@ -271,16 +271,23 @@ void MKLDNNConvolutionForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx if (ctx.is_train) { // TODO(zhengda) kvstore doesn't handle MKLDNN correctly. Let's reorder it // to the default format for now. - if (in_data[conv::kWeight].IsMKLDNNData()) - const_cast<NDArray &>(in_data[conv::kWeight]).Reorder2Default(); - weight_mem = GetWeights(in_data[conv::kWeight], fwd.fwd_pd.weights_primitive_desc(), - param.num_group); + if (weight.IsMKLDNNData()) + // This asks the engine to change the layout of the weight array after + // it's used. + weight.Reorder2DefaultAsync(); + weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), param.num_group); } else { // For inference, we want to reorder the weight array so we don't need to // reorder data every time. - const_cast<NDArray &>(in_data[conv::kWeight]).MKLDNNDataReorder( - fwd.fwd_pd.weights_primitive_desc()); - weight_mem = in_data[conv::kWeight].GetMKLDNNData(); + if (weight.IsDefaultData()) { + weight_mem = GetWeights(weight, fwd.fwd_pd.weights_primitive_desc(), param.num_group); + // We also need to modify the layout on the original weight array. The + // data conversion happens after the weight array is used. + weight.MKLDNNDataReorderAsync(fwd.fwd_pd.weights_primitive_desc()); + } else { + weight_mem = weight.GetMKLDNNData(); + CHECK(weight_mem->get_primitive_desc() == fwd.fwd_pd.weights_primitive_desc()); + } } auto out_mem = CreateMKLDNNMem(out_data[conv::kOut], fwd.fwd_pd.dst_primitive_desc(), req[conv::kOut]); diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc index d336d6d..a0d3df7 100644 --- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc @@ -234,21 +234,27 @@ void MKLDNNDeconvForward::SetDataHandle(const DeconvolutionParam& param, const std::vector<NDArray> &out_data) { auto data_mem = in_data[deconv::kData].GetMKLDNNDataReorder( fwd_pd.diff_dst_primitive_desc()); + NDArray weight = in_data[deconv::kWeight]; const mkldnn::memory *weight_mem; if (ctx.is_train) { // TODO(zhengda) kvstore doesn't handle MKLDNN correctly. Let's reorder it // to the default format for now. - if (in_data[deconv::kWeight].IsMKLDNNData()) - const_cast<NDArray &>(in_data[deconv::kWeight]).Reorder2Default(); - weight_mem = GetWeights(in_data[deconv::kWeight], - fwd_pd.weights_primitive_desc(), - param.num_group); + if (weight.IsMKLDNNData()) + // This asks the engine to reorder data after the weight array is used. + weight.Reorder2DefaultAsync(); + weight_mem = GetWeights(weight, fwd_pd.weights_primitive_desc(), param.num_group); } else { // For inference, we want to reorder the weight array so we don't need to // reorder data every time. - const_cast<NDArray &>(in_data[deconv::kWeight]).MKLDNNDataReorder( - fwd_pd.weights_primitive_desc()); - weight_mem = in_data[deconv::kWeight].GetMKLDNNData(); + if (weight.IsDefaultData()) { + weight_mem = GetWeights(weight, fwd_pd.weights_primitive_desc(), param.num_group); + // We also need to modify the layout on the original weight array. The + // data conversion happens after the weight array is used. + weight.MKLDNNDataReorderAsync(fwd_pd.weights_primitive_desc()); + } else { + weight_mem = weight.GetMKLDNNData(); + CHECK(weight_mem->get_primitive_desc() == fwd_pd.weights_primitive_desc()); + } } auto out_mem = CreateMKLDNNMem(out_data[deconv::kOut], fwd_pd.diff_src_primitive_desc(), req[deconv::kOut]); diff --git a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc index a8b85bb..eb379f2 100644 --- a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc +++ b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc @@ -90,6 +90,11 @@ void MKLDNNFCForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const TShape& oshape = out_data[fullc::kOut].shape(); NDArray weight = in_data[fullc::kWeight]; NDArray data = in_data[fullc::kData]; + // If the input data is a view of an MKLDNN array, we should create a new + // NDArray with reordered data. + if (data.IsMKLDNNData() && data.IsView()) + data = in_data[fullc::kData].Reorder2Default(); + auto out_md = GetMemDesc(out_data[fullc::kOut]); if (data.shape().ndim() != 2 && !param.flatten) { data = data.MKLDNNDataReshape(Shape2(ishape.ProdShape(0, ishape.ndim()-1), diff --git a/src/operator/tensor/cast_storage-inl.h b/src/operator/tensor/cast_storage-inl.h index e345bb2..46de10a 100644 --- a/src/operator/tensor/cast_storage-inl.h +++ b/src/operator/tensor/cast_storage-inl.h @@ -351,7 +351,12 @@ void CastStorageComputeImpl(const OpContext& ctx, CHECK_EQ(output.ctx().dev_type, input.ctx().dev_type); // If one of them uses the MKLDNN layout. if (input.IsMKLDNNData() || output.IsMKLDNNData()) { - auto in_mem = input.GetMKLDNNData(); + NDArray tmp_input = input; + // If the input data is MKLDNN and is a view, we need to reorder the input + // data first. + if (input.IsMKLDNNData() && input.IsView()) + tmp_input = input.Reorder2Default(); + const mkldnn::memory *in_mem = tmp_input.GetMKLDNNData(); const_cast<NDArray &>(output).CopyFrom(*in_mem); MKLDNNStream::Get()->Submit(); } else { diff --git a/src/operator/tensor/elemwise_sum.cc b/src/operator/tensor/elemwise_sum.cc index 10154bc..8efeb85 100644 --- a/src/operator/tensor/elemwise_sum.cc +++ b/src/operator/tensor/elemwise_sum.cc @@ -25,6 +25,7 @@ #include "./elemwise_sum.h" #include "../../ndarray/ndarray_function.h" #include "../nn/mkldnn/mkldnn_ops-inl.h" +#include "../nn/mkldnn/mkldnn_base-inl.h" #include "../../common/utils.h" namespace mxnet { @@ -122,19 +123,9 @@ void ElementWiseSumComputeExCPU(const nnvm::NodeAttrs& attrs, #if MXNET_USE_MKLDNN == 1 } else if (IsMKLDNNData(inputs)) { MKLDNNSumForward(attrs, ctx, inputs, req[0], outputs[0]); -#endif } else if (common::ContainsOnlyStorage(inputs, kDefaultStorage)) { - // This case happens when we want to create an MKLDNN NDArray but the type - // or the shape isn't supported by MKLDNN. In this case, NDArray falls back - // to the default storage type and, thus, we have to handle the default - // storage in FComputeEx. - std::vector<TBlob> in_blobs(inputs.size()); - std::vector<TBlob> out_blobs(outputs.size()); - for (size_t i = 0; i < in_blobs.size(); i++) - in_blobs[i] = inputs[i].data(); - for (size_t i = 0; i < out_blobs.size(); i++) - out_blobs[i] = outputs[i].data(); - ElementWiseSumCompute<cpu>(attrs, ctx, in_blobs, req, out_blobs); + FallBackCompute(ElementWiseSumCompute<cpu>, attrs, ctx, inputs, req, outputs); +#endif } else { LogUnimplementedOp(attrs, ctx, inputs, req, outputs); } diff --git a/tests/python/gpu/test_gluon_model_zoo_gpu.py b/tests/python/gpu/test_gluon_model_zoo_gpu.py index 6456436..378a822 100644 --- a/tests/python/gpu/test_gluon_model_zoo_gpu.py +++ b/tests/python/gpu/test_gluon_model_zoo_gpu.py @@ -37,7 +37,6 @@ def download_data(): return mx.test_utils.download( 'http://data.mxnet.io/data/val-5k-256.rec', VAL_DATA) -@unittest.skip("test fails intermittently. temporarily disabled.") @with_seed() def test_inference(): all_models = ['resnet50_v1', 'vgg19_bn', 'alexnet', #'inceptionv3', @@ -87,7 +86,9 @@ def test_inference(): cpu_out = cpu_model(mx.nd.array(data, ctx=mx.cpu())) gpu_out = gpu_model(gpu_data) out = cpu_out.asnumpy() - max_val = np.max(out) + max_val = np.max(np.abs(out)) + gpu_max_val = np.max(np.abs(gpu_out.asnumpy())) + eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val)) assert_almost_equal(out / max_val, gpu_out.asnumpy() / max_val, rtol=1e-3, atol=1e-3) def get_nn_model(name): @@ -156,7 +157,10 @@ def test_training(): gpu_out = gpu_model(gpu_data) cpu_loss = softmax_cross_entropy(cpu_out, label) gpu_loss = softmax_cross_entropy(gpu_out, gpu_label) - assert_almost_equal(cpu_out.asnumpy(), gpu_out.asnumpy(), rtol=1e-2, atol=1e-2) + max_val = np.max(np.abs(cpu_out.asnumpy())) + gpu_max_val = np.max(np.abs(gpu_out.asnumpy())) + eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val)) + assert_almost_equal(cpu_out.asnumpy() / max_val, gpu_out.asnumpy() / max_val, rtol=1e-3, atol=1e-3) cpu_loss.backward() gpu_loss.backward() cpu_trainer.step(batch_size) -- To stop receiving notification emails like this one, please contact marcoab...@apache.org.