bartekkuncer commented on a change in pull request #20759:
URL: https://github.com/apache/incubator-mxnet/pull/20759#discussion_r760347201
##########
File path: src/operator/nn/mkldnn/mkldnn_rnn.cc
##########
@@ -586,6 +613,7 @@ void MKLDNNRnnForward::SetWeightsMem(MKLDNNRnnMemMgr* mgr,
ConcatWeights(*weights_iter_r_, 1, {l2r_wh, r2l_wh}, format_tag::ldgoi);
} else if (param_.num_layer == 1 && !param_.bidirectional) {
//* single uni-directional layer, no concatenate operator needed
+ // tutttaj
Review comment:
I believe this line of comment is unnecessary.
##########
File path: src/operator/nn/mkldnn/mkldnn_rnn.cc
##########
@@ -437,35 +472,27 @@ void MKLDNNRnnForward::SetNewDataMem(void* x,
}
}
-inline void MKLDNNMemoryReorder(const mkldnn::memory& src, const
mkldnn::memory& dst) {
-#if DMLC_CXX11_THREAD_LOCAL
- static thread_local std::unordered_map<OpSignature, mkldnn::reorder, OpHash>
reorderPrimitives;
-#else
- static MX_THREAD_LOCAL std::unordered_map<OpSignature, mkldnn::reorder,
OpHash> reorderPrimitives;
-#endif
- OpSignature key{};
- key.AddSign(src);
- key.AddSign(dst);
-
- auto it = reorderPrimitives.find(key);
- if (it == reorderPrimitives.end()) {
- auto reorder = mkldnn::reorder(src, dst);
- it = AddToCache(&reorderPrimitives, key, reorder);
- }
-
- mkldnn_args_map_t net_args;
- net_args.emplace(MKLDNN_ARG_SRC, src);
- net_args.emplace(MKLDNN_ARG_DST, dst);
- MKLDNNStream::Get()->RegisterPrimArgs(it->second, net_args);
-}
-
/*
* Reorder the concatenated weights memory to a efficient memory block
* with primitive-prefered format.
*/
void MKLDNNRnnForward::ReorderWeights() {
- MKLDNNMemoryReorder(*weights_layer_r_, *weights_layer_);
- MKLDNNMemoryReorder(*weights_iter_r_, *weights_iter_);
+if (param_.quantized) {
+ const mkldnn::primitive_attr& attr = this->fwd_inf_.GetPrimAttr();
+ auto ReorderWithAttr = [&](mkldnn::memory& src, mkldnn::memory& dst) {
+ auto reorder_pd = mkldnn::reorder::primitive_desc(src, dst, attr);
+ mkldnn_args_map_t net_args;
+ net_args[MKLDNN_ARG_SRC] = src;
+ net_args[MKLDNN_ARG_DST] = dst;
+ MKLDNNStream::Get()->RegisterPrimArgs(mkldnn::reorder(reorder_pd),
net_args);
+ };
+ ReorderWithAttr(*weights_layer_r_, *weights_layer_);
+ ReorderWithAttr(*weights_iter_r_, *weights_iter_);
+ } else {
+ MKLDNNMemoryReorder(*weights_layer_r_, *weights_layer_);
+ MKLDNNMemoryReorder(*weights_iter_r_, *weights_iter_);
+ // if (param_.proj_size > 0) MKLDNNMemoryReorder(*weights_proj_r_,
*weights_proj_);
Review comment:
Why is this here? Is it a TODO?
##########
File path: src/operator/nn/mkldnn/mkldnn_rnn.cc
##########
@@ -1047,6 +1075,12 @@ void MKLDNNRnnOp::Forward(const OpContext& ctx,
weights_version_ = inputs[rnn_enum::kParams].version();
}
+ if (dmlc::GetEnv("MXNET_RNN_USE_WEIGHT_CACHE", 0) && !initialized_) {
+ LOG(INFO) << "The current weight of RNN is assumed to be fixed and cached
during "
+ "the whole inference pipeline. Please set
MXNET_RNN_USE_WEIGHT_CACHE=0, if "
+ "the weight changed at runtime.";
Review comment:
"... is going to be changed (modified) at runtime." ?
##########
File path: src/operator/quantization/mkldnn/mkldnn_quantized_rnn.cc
##########
@@ -0,0 +1,373 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_quantized_rnn.cc
+ * \brief Common functions for quantized recurrent neural network
+ * \author Zixuan Wei
+ */
+
+#if MXNET_USE_MKLDNN == 1
+
+#include "../quantization_utils.h"
+#include "./mkldnn_quantized_rnn-inl.h"
+
+namespace mxnet {
+namespace op {
+
+std::vector<float>
+GetMKLDNNRnnWeightsQParams(const MKLDNNRnnFullParam &full_param, float *w_ptr)
{
+ const int nthreads =
+ mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
+ const RNNParam &default_param = full_param.default_param;
+ const LayerParamVector &layer_params = full_param.layer_params;
+
+ const MKLDNNRnnLayerParam &layer_param0 = layer_params.at(0);
+ const size_t w_size0 = layer_param0.single_w_size;
+ const size_t wx_size0 = 4 * layer_param0.state_size *
layer_param0.input_size;
+ const size_t wh_size0 = 4 * layer_param0.state_size *
layer_param0.state_size;
+
+ int directions = 1;
+ float *wx = w_ptr;
+ float *wh = wx + wx_size0;
+ float *fake_wx = wx;
+ float *fake_wh = wh;
+
+ std::vector<float> wx_goi_max;
+ std::vector<float> wh_goi_max;
+ if (default_param.bidirectional) {
+ directions = 2;
+ wx_goi_max.resize(wx_size0);
+ wh_goi_max.resize(wh_size0);
+ fake_wx = wx_goi_max.data();
+ fake_wh = wh_goi_max.data();
+#pragma omp parallel for num_threads(nthreads)
+ for (index_t i = 0; i < static_cast<index_t>(wx_size0); ++i) {
+ fake_wx[i] = MaxAbs(wx[i], wx[i + w_size0]);
+ }
+#pragma omp parallel for num_threads(nthreads)
+ for (index_t i = 0; i < static_cast<index_t>(wh_size0); ++i) {
+ fake_wh[i] = MaxAbs(wh[i], wh[i + w_size0]);
+ }
+ }
+ std::vector<float> w_max(4 * layer_param0.state_size, 0.0);
+ const index_t input_size = layer_param0.input_size; // input
+ const index_t state_size = layer_param0.state_size; // state
+ const index_t gates_nblks = 4 * layer_param0.state_size; // gates * state
+ for (index_t go = 0; go < gates_nblks; ++go) {
+ float tmp_max = w_max[go];
+ for (index_t i = 0; i < input_size; ++i) {
+ tmp_max = MaxAbs(fake_wx[go * input_size + i], tmp_max);
+ }
+ for (index_t i = 0; i < state_size; ++i) {
+ tmp_max = MaxAbs(fake_wh[go * state_size + i], tmp_max);
+ }
+ w_max[go] = tmp_max;
+ }
+ wx += layer_param0.single_w_size * directions;
+ wh += layer_param0.single_w_size * directions;
+
+ std::vector<float> goi_max(wh_size0, 0.0);
+ for (size_t lyr = 1; lyr < layer_params.size(); ++lyr) {
+ const MKLDNNRnnLayerParam &layer_param = layer_params.at(lyr);
+ const int weight_nblks = layer_param.num_layer * directions;
+ for (int blk = 0; blk < weight_nblks; ++blk) {
+#pragma omp parallel for num_threads(nthreads)
+ for (index_t i = 0; i < static_cast<index_t>(wh_size0); ++i) {
+ goi_max[i] = MaxAbs(wx[i], wh[i]);
+ }
+ for (index_t go = 0; go < gates_nblks; ++go) {
+ float tmp = w_max[go];
+//* NOTES: min/max reductions were supported since OpenMP 3.1, which was
+// released in
Review comment:
Fix this comment please.
##########
File path: src/operator/quantization/quantize_graph_pass.cc
##########
@@ -249,8 +248,12 @@ static void MarkQuantizedNodes(const Graph& src,
Graph QuantizeGraph(Graph &&src) {
static const auto& flist_outputs =
nnvm::Op::GetAttr<nnvm::FListOutputNames>("FListOutputNames");
static const auto& need_requantize_map =
Op::GetAttr<mxnet::FNeedRequantize>("FNeedRequantize");
Review comment:
Maybe clang format whole file? Or at least the lines around the code you
changed/added?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]