[GitHub] [incubator-mxnet] bgawrych commented on a change in pull request #20724: Unifying post-quantization properties

GitBox Tue, 09 Nov 2021 23:49:49 -0800


bgawrych commented on a change in pull request #20724:
URL: https://github.com/apache/incubator-mxnet/pull/20724#discussion_r746318695




##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
 #include "../common.h"
 #include "dnnl_conv-inl.h"
 #include "dnnl_subgraph_base-inl.h"
 
 namespace mxnet {
 namespace op {
 
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op = 
{"_contrib_quantized_elemwise_add",
+                                                     
"_contrib_quantized_elemwise_mul",
+                                                     
"_contrib_quantized_npi_add",
+                                                     "_sg_onednn_conv",
+                                                     
"_sg_onednn_fully_connected",
+                                                     "_sg_onednn_selfatt_qk",
+                                                     
"_sg_onednn_selfatt_valatt",
+                                                     "_sg_onednn_batch_dot"};
+
+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {
  public:
   /*! \brief pattern match status */
   enum SelectStatus {
     kFail = 0,
     kStart,
+    kRequantize,
     kSuccess,
   };
 
  private:
+  bool disable_fuse_all;
+  bool disable_float_output;
   SelectStatus status;
-  std::vector<const nnvm::Node*> matched_list;
+  std::vector<const BiDirectedNode*> matched_list;
   std::set<std::string> support_requantize_fusion_op_name;
 
  public:
-  SgDNNLPostQuantizeSelector() {
-    support_requantize_fusion_op_name.insert("_sg_onednn_conv");
-    
support_requantize_fusion_op_name.insert("_contrib_quantized_elemwise_add");
-    support_requantize_fusion_op_name.insert("_contrib_quantized_npi_add");
+  explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool 
dis_float_output)
+      : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) 
{
+    support_requantize_fusion_op_name = support_req_fusion_op;
   }
 
-  bool Select(const nnvm::Node& n) override {
-    if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {
-      if (n.op() == Op::Get("_sg_onednn_conv")) {
-        auto const& param = nnvm::get<DNNLConvFusionParam>(n.attrs.parsed);
-        if (param.full_conv_param.dnnl_param.quantized) {
-          status = kStart;
-          matched_list.clear();
-          matched_list.push_back(&n);
-          return true;
-        }
-      } else if (n.op()->name == "_contrib_quantized_elemwise_add" ||
-                 n.op()->name == "_contrib_quantized_npi_add") {
-        status = kStart;
-        matched_list.clear();
-        matched_list.push_back(&n);
-        return true;
-      }
+  bool Select(const BiDirectedNode& n) override {
+    const nnvm::Node* raw_node = n.node;
+    if ((!disable_fuse_all) && raw_node->op() &&
+        support_requantize_fusion_op_name.count(raw_node->op()->name)) {
+      status = kStart;
+      matched_list.clear();
+      matched_list.push_back(&n);
+      return true;
     }
     return false;
   }
 
-  bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override {
+  bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) 
override {
     return false;
   }
 
-  bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override {
-    if (status == kFail || status == kSuccess || new_node.is_variable())
+  bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) 
override {
+    const nnvm::Node* raw_node     = n.node;
+    const nnvm::Node* raw_new_node = new_node.node;
+    if (status == kFail || status == kSuccess || raw_new_node->is_variable())
       return false;
     // If n isn't the last matched node, then we encoutered a internal
     // branch, we should pop out the node behind n and stop fusion.
     if (matched_list.back() != &n) {
-      status = kFail;
+      if (std::find(matched_list.begin(), matched_list.end(), &n) != 
matched_list.end()) {
+        while (matched_list.back() != &n) {
+          matched_list.pop_back();
+        }
+      }
+      status = kSuccess;
       return false;
     }
-    if (new_node.op()->name == "_contrib_requantize") {
-      auto const& param = nnvm::get<RequantizeParam>(new_node.attrs.parsed);
-      if (param.min_calib_range.has_value() && 
param.max_calib_range.has_value()) {
-        matched_list.push_back(&new_node);
+
+    switch (status) {
+      case kStart:
+        if (raw_new_node->op() == Op::Get("_contrib_requantize")) {
+          auto const& param = 
nnvm::get<RequantizeParam>(raw_new_node->attrs.parsed);
+          if (param.min_calib_range.has_value() && 
param.max_calib_range.has_value()) {
+            matched_list.push_back(&new_node);
+            status = kRequantize;
+            if (raw_node->op() == Op::Get("_sg_onednn_conv")) {
+              status = kSuccess;
+            }
+            return true;
+          }
+        }
+      case kRequantize:
+        if (!disable_float_output && raw_new_node->op() == 
Op::Get("_contrib_dequantize")) {
+          CHECK(raw_node->op() == Op::Get("_contrib_requantize"));
+          if (n.outputs.size() > 1) {
+            // check if requantize have other outputs than dequantize
+            // if it has we can't fuse dequantize
+            for (auto kv : n.outputs) {
+              const auto& node = kv.first;
+              if (node->op() != Op::Get("_contrib_dequantize")) {
+                status = kSuccess;
+                return false;
+              }
+            }
+          }
+          matched_list.push_back(&new_node);
+          status = kSuccess;
+          return true;
+        }
+      default:
         status = kSuccess;
-        return true;
-      } else {
-        status = kFail;
-      }
+        return false;
     }
-    return false;
   }
 
-  std::vector<nnvm::Node*> Filter(const std::vector<nnvm::Node*>& candidates) 
override {
-    if (status != kSuccess) {
-      return std::vector<nnvm::Node*>(0);
+  std::vector<BiDirectedNode*> Filter(const std::vector<BiDirectedNode*>& 
candidates) override {
+    if (status != kSuccess || (matched_list.size() <= 1)) {
+      return std::vector<BiDirectedNode*>(0);
     } else {
-      return candidates;
+      std::vector<BiDirectedNode*> ret;
+      for (auto i : matched_list) {
+        auto non_const_i = const_cast<BiDirectedNode*>(i);
+        if (std::find(candidates.begin(), candidates.end(), non_const_i) != 
candidates.end()) {
+          ret.push_back(non_const_i);
+        }
+      }
+      return ret;
     }
   }
 
   void Reset() override {
     CHECK_GE(matched_list.size(), 1);
-    auto new_selector = SgDNNLPostQuantizeSelector();
+    auto new_selector = SgDNNLPostQuantizeSelector(disable_fuse_all, 
disable_float_output);
     new_selector.Select(*matched_list[0]);
     *this = new_selector;
   }
 };
 
 class SgDNNLPostQuantizeProperty : public SubgraphProperty {
+ private:
+  bool disable_fuse_all;
+  bool disable_float_output;
+  std::set<std::string> support_requantize_fusion_op_name;
+
  public:
   SgDNNLPostQuantizeProperty() {
-    support_requantize_fusion_op_name.insert("_sg_onednn_conv");
-    
support_requantize_fusion_op_name.insert("_contrib_quantized_elemwise_add");
-    support_requantize_fusion_op_name.insert("_contrib_quantized_npi_add");
+    disable_fuse_all                  = 
dmlc::GetEnv("MXNET_DISABLE_ONEDNN_FUSE_ALL", false);

Review comment:
       What about changing this flag to something less general? I mean this 
flag doesn't disable all fuses (only the ones in this file). So better would be 
something like "MXNET_DISABLE_ONEDNN_FUSE_REQUANTIZE" and 
"MXNET_DISABLE_ONEDNN_FUSE_DEQUANTIZE". We also can negate default value to 
call it "MXNET_ONEDNN_FUSE_REQUANTIZE" and "MXNET_ONEDNN_FUSE_DEQUANTIZE".
   +@anko-intel 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [incubator-mxnet] bgawrych commented on a change in pull request #20724: Unifying post-quantization properties

Reply via email to