bartekkuncer commented on a change in pull request #20567: URL: https://github.com/apache/incubator-mxnet/pull/20567#discussion_r718501481
########## File path: src/operator/nn/mkldnn/mkldnn_softmax-inl.h ########## @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file mkldnn_softmax-inl.h + * Naming convention: + * ________ + * |Softmax| + * data -------->| FWD |---> out + * |_______| + * ________ + * |Softmax|<--- out + * data_grad <---| BWD | + * |_______|<--- out_grad Review comment: Would be nice to have this drawings symmetric. ########## File path: src/operator/nn/mkldnn/mkldnn_softmax.cc ########## @@ -23,189 +23,211 @@ * \author Da Zheng */ -#include "./mkldnn_base-inl.h" -#include "./mkldnn_ops-inl.h" +#if MXNET_USE_ONEDNN == 1 -#include "../softmax-inl.h" +#include "./mkldnn_softmax-inl.h" -#if MXNET_USE_ONEDNN == 1 namespace mxnet { namespace op { -static mkldnn::softmax_forward::primitive_desc GetSoftmaxFwdPd(bool is_train, - const int axis, - const mkldnn::memory& input_mem) { - mkldnn::memory::desc data_md = input_mem.get_desc(); - auto cpu_engine = CpuEngine::Get()->get_engine(); - auto prop = is_train ? mkldnn::prop_kind::forward_training : mkldnn::prop_kind::forward_scoring; - auto desc = mkldnn::softmax_forward::desc(prop, data_md, axis); - return mkldnn::softmax_forward::primitive_desc(desc, cpu_engine); -} - -static mkldnn::softmax_backward::primitive_desc GetSoftmaxBwdPd( - const mkldnn::memory& diff_mem, - const mkldnn::memory& data_mem, - const int axis, - const mkldnn::softmax_forward::primitive_desc& hint_fwd_pd) { - mkldnn::memory::desc diff_md = diff_mem.get_desc(); - mkldnn::memory::desc data_md = data_mem.get_desc(); - auto cpu_engine = CpuEngine::Get()->get_engine(); - auto desc = mkldnn::softmax_backward::desc(diff_md, data_md, axis); - return mkldnn::softmax_backward::primitive_desc(desc, cpu_engine, hint_fwd_pd); -} - bool SupportMKLDNNSoftmax(const SoftmaxParam& param, const NDArray& data, const NDArray& output) { - // MKLDNN does not support temperature argument in their softmax function - // now. Need update this once they start to support it. const int ndim = data.shape().ndim(); const int in_dtype = data.dtype(); const int out_dtype = output.dtype(); const int axis = CheckAxis(param.axis, ndim); - // MKLDNN does not support temperature argument in their softmax function - // now. Need update this once they start to support it. + + if (param.temperature.has_value() && param.temperature.value() == 0.0) { + return false; + } + // Currently, MKLDNN shows bad performance when softmax is not performed on the last dimension - if (param.temperature.has_value() || in_dtype != mshadow::kFloat32 || in_dtype != out_dtype || - axis != (ndim - 1)) { + if (in_dtype != mshadow::kFloat32 || in_dtype != out_dtype || axis != (ndim - 1)) { return false; } - // only supports ndim = 1, 2, 3, 4 for now - return (ndim >= 1 && ndim <= 4); + // only supports up to 6 ndim + return (ndim >= 1 && ndim <= 6); } -class MKLDNNSoftmaxFwd { - public: - mkldnn::softmax_forward::primitive_desc pd; - - MKLDNNSoftmaxFwd(const bool is_train, const int axis, const mkldnn::memory& input) - : pd(GetSoftmaxFwdPd(is_train, axis, input)) { - fwd_ = std::make_shared<mkldnn::softmax_forward>(pd); - } +void MKLDNNSoftmaxForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const NDArray& in_data, + const OpReqType& req, + const NDArray& out_data) { + if (req == kNullOp) + return; + // same as the FCompute path, softmax only supports kWriteTo and kWriteInplace for now. Review comment: This comment is inconsistent with other ones and to be honest looks a bit bizarrely with small letter at the beginning and dot at the end. ########## File path: src/operator/nn/mkldnn/mkldnn_softmax.cc ########## @@ -23,189 +23,211 @@ * \author Da Zheng */ -#include "./mkldnn_base-inl.h" -#include "./mkldnn_ops-inl.h" +#if MXNET_USE_ONEDNN == 1 -#include "../softmax-inl.h" +#include "./mkldnn_softmax-inl.h" -#if MXNET_USE_ONEDNN == 1 namespace mxnet { namespace op { -static mkldnn::softmax_forward::primitive_desc GetSoftmaxFwdPd(bool is_train, - const int axis, - const mkldnn::memory& input_mem) { - mkldnn::memory::desc data_md = input_mem.get_desc(); - auto cpu_engine = CpuEngine::Get()->get_engine(); - auto prop = is_train ? mkldnn::prop_kind::forward_training : mkldnn::prop_kind::forward_scoring; - auto desc = mkldnn::softmax_forward::desc(prop, data_md, axis); - return mkldnn::softmax_forward::primitive_desc(desc, cpu_engine); -} - -static mkldnn::softmax_backward::primitive_desc GetSoftmaxBwdPd( - const mkldnn::memory& diff_mem, - const mkldnn::memory& data_mem, - const int axis, - const mkldnn::softmax_forward::primitive_desc& hint_fwd_pd) { - mkldnn::memory::desc diff_md = diff_mem.get_desc(); - mkldnn::memory::desc data_md = data_mem.get_desc(); - auto cpu_engine = CpuEngine::Get()->get_engine(); - auto desc = mkldnn::softmax_backward::desc(diff_md, data_md, axis); - return mkldnn::softmax_backward::primitive_desc(desc, cpu_engine, hint_fwd_pd); -} - bool SupportMKLDNNSoftmax(const SoftmaxParam& param, const NDArray& data, const NDArray& output) { - // MKLDNN does not support temperature argument in their softmax function - // now. Need update this once they start to support it. const int ndim = data.shape().ndim(); const int in_dtype = data.dtype(); const int out_dtype = output.dtype(); const int axis = CheckAxis(param.axis, ndim); - // MKLDNN does not support temperature argument in their softmax function - // now. Need update this once they start to support it. + + if (param.temperature.has_value() && param.temperature.value() == 0.0) { + return false; + } + // Currently, MKLDNN shows bad performance when softmax is not performed on the last dimension - if (param.temperature.has_value() || in_dtype != mshadow::kFloat32 || in_dtype != out_dtype || - axis != (ndim - 1)) { + if (in_dtype != mshadow::kFloat32 || in_dtype != out_dtype || axis != (ndim - 1)) { return false; } - // only supports ndim = 1, 2, 3, 4 for now - return (ndim >= 1 && ndim <= 4); + // only supports up to 6 ndim Review comment: `// supports ndim up to 6` ########## File path: src/operator/nn/mkldnn/mkldnn_softmax.cc ########## @@ -23,189 +23,211 @@ * \author Da Zheng */ -#include "./mkldnn_base-inl.h" -#include "./mkldnn_ops-inl.h" +#if MXNET_USE_ONEDNN == 1 -#include "../softmax-inl.h" +#include "./mkldnn_softmax-inl.h" -#if MXNET_USE_ONEDNN == 1 namespace mxnet { namespace op { -static mkldnn::softmax_forward::primitive_desc GetSoftmaxFwdPd(bool is_train, - const int axis, - const mkldnn::memory& input_mem) { - mkldnn::memory::desc data_md = input_mem.get_desc(); - auto cpu_engine = CpuEngine::Get()->get_engine(); - auto prop = is_train ? mkldnn::prop_kind::forward_training : mkldnn::prop_kind::forward_scoring; - auto desc = mkldnn::softmax_forward::desc(prop, data_md, axis); - return mkldnn::softmax_forward::primitive_desc(desc, cpu_engine); -} - -static mkldnn::softmax_backward::primitive_desc GetSoftmaxBwdPd( - const mkldnn::memory& diff_mem, - const mkldnn::memory& data_mem, - const int axis, - const mkldnn::softmax_forward::primitive_desc& hint_fwd_pd) { - mkldnn::memory::desc diff_md = diff_mem.get_desc(); - mkldnn::memory::desc data_md = data_mem.get_desc(); - auto cpu_engine = CpuEngine::Get()->get_engine(); - auto desc = mkldnn::softmax_backward::desc(diff_md, data_md, axis); - return mkldnn::softmax_backward::primitive_desc(desc, cpu_engine, hint_fwd_pd); -} - bool SupportMKLDNNSoftmax(const SoftmaxParam& param, const NDArray& data, const NDArray& output) { - // MKLDNN does not support temperature argument in their softmax function - // now. Need update this once they start to support it. const int ndim = data.shape().ndim(); const int in_dtype = data.dtype(); const int out_dtype = output.dtype(); const int axis = CheckAxis(param.axis, ndim); - // MKLDNN does not support temperature argument in their softmax function - // now. Need update this once they start to support it. + + if (param.temperature.has_value() && param.temperature.value() == 0.0) { + return false; + } + // Currently, MKLDNN shows bad performance when softmax is not performed on the last dimension - if (param.temperature.has_value() || in_dtype != mshadow::kFloat32 || in_dtype != out_dtype || - axis != (ndim - 1)) { + if (in_dtype != mshadow::kFloat32 || in_dtype != out_dtype || axis != (ndim - 1)) { return false; } - // only supports ndim = 1, 2, 3, 4 for now - return (ndim >= 1 && ndim <= 4); + // only supports up to 6 ndim + return (ndim >= 1 && ndim <= 6); } -class MKLDNNSoftmaxFwd { - public: - mkldnn::softmax_forward::primitive_desc pd; - - MKLDNNSoftmaxFwd(const bool is_train, const int axis, const mkldnn::memory& input) - : pd(GetSoftmaxFwdPd(is_train, axis, input)) { - fwd_ = std::make_shared<mkldnn::softmax_forward>(pd); - } +void MKLDNNSoftmaxForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const NDArray& in_data, + const OpReqType& req, + const NDArray& out_data) { + if (req == kNullOp) + return; + // same as the FCompute path, softmax only supports kWriteTo and kWriteInplace for now. + CHECK_NE(req, kAddTo); - const mkldnn::softmax_forward& GetFwd() const { - return *fwd_; + const auto& param = nnvm::get<SoftmaxParam>(attrs.parsed); + if (param.temperature.has_value()) { + TmpMemMgr::Get()->Init(ctx.requested[0]); } - private: - std::shared_ptr<mkldnn::softmax_forward> fwd_; -}; + const bool is_train = ctx.is_train; + const auto tensors = MKLDNNSoftmaxFwd::Tensors(in_data, out_data); + const auto& fwd = MKLDNNSoftmaxFwd::GetCached(param, tensors, is_train); + fwd.Execute(tensors); +} typedef ParamOpSign<SoftmaxParam> MKLDNNSoftmaxSignature; - -static MKLDNNSoftmaxFwd& GetSoftmaxFwd(const SoftmaxParam& param, - const int real_axis, - const bool is_train, - const NDArray& data, - const NDArray& output) { +MKLDNNSoftmaxFwd& MKLDNNSoftmaxFwd::GetCached(const SoftmaxParam& param, + const Tensors& tensors, + const bool is_train) { #if DMLC_CXX11_THREAD_LOCAL static thread_local std::unordered_map<MKLDNNSoftmaxSignature, MKLDNNSoftmaxFwd, OpHash> fwds; #else static MX_THREAD_LOCAL std::unordered_map<MKLDNNSoftmaxSignature, MKLDNNSoftmaxFwd, OpHash> fwds; #endif MKLDNNSoftmaxSignature key(param); - key.AddSign(real_axis); + const float temperature = param.temperature.has_value() ? param.temperature.value() : 1.0f; + const int axis = CheckAxis(param.axis, tensors.data.shape().ndim()); + key.AddSign(axis); key.AddSign(is_train); - key.AddSign(data); - key.AddSign(output); + key.AddSign(temperature); + key.AddSign(tensors.data); + key.AddSign(tensors.out); auto it = fwds.find(key); if (it == fwds.end()) { - MKLDNNSoftmaxFwd fwd(is_train, real_axis, *(data.GetMKLDNNData())); + MKLDNNSoftmaxFwd fwd(param, tensors, is_train); it = AddToCache(&fwds, key, fwd); } return it->second; } -void MKLDNNSoftmaxForward(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const NDArray& in_data, - const OpReqType& req, - const NDArray& out_data) { - if (req == kNullOp) - return; - // same as the FCompute path, softmax only supports kWriteTo and kWriteInplace for now. - CHECK_NE(req, kAddTo); +softmax_fwd_pd_t MKLDNNSoftmaxFwd::GetSoftmaxFwdPd(const mkldnn::memory& input_mem, + const int axis, + const bool is_train) { + const auto data_md = input_mem.get_desc(); + const auto cpu_engine = CpuEngine::Get()->get_engine(); + const auto prop = + is_train ? mkldnn::prop_kind::forward_training : mkldnn::prop_kind::forward_scoring; + const auto desc = mkldnn::softmax_forward::desc(prop, data_md, axis); + return softmax_fwd_pd_t(desc, cpu_engine); +} - const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed); - int axis = CheckAxis(param.axis, in_data.shape().ndim()); - auto fwd = GetSoftmaxFwd(param, axis, ctx.is_train, in_data, out_data); +linear_pd_t MKLDNNSoftmaxFwd::GetTemperaturePd(const mkldnn::memory& input_mem, + const float temperature) { + const auto data_md = input_mem.get_desc(); + const auto cpu_engine = CpuEngine::Get()->get_engine(); + const auto desc = mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_scoring, + mkldnn::algorithm::eltwise_linear, + data_md, + 1.0f / temperature, + 0.0f); + return linear_pd_t(desc, cpu_engine); +} - auto in_mem = in_data.GetMKLDNNData(); - auto out_mem = out_data.GetMKLDNNData(fwd.pd.dst_desc()); +void MKLDNNSoftmaxFwd::Execute(const Tensors& tensors) const { MKLDNNStream* stream = MKLDNNStream::Get(); - stream->RegisterPrimArgs(fwd.GetFwd(), {{MKLDNN_ARG_SRC, *in_mem}, {MKLDNN_ARG_DST, *out_mem}}); + + auto original_input_mem = tensors.data.GetMKLDNNData(); + const auto out_mem = tensors.out.GetMKLDNNData(softmax_pd->dst_desc()); + + mkldnn::memory* softmax_input_mem; + if (temperature_pd) { // temperature parameter used + // check whether additional buffer is needed Review comment: Would not this comment fit into one line? E. g. `check whether additional buffer is needed, when temperature parameter is being used` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
