[GitHub] [incubator-mxnet] mozga-intel commented on a change in pull request #20724: Unifying post-quantization properties

GitBox Wed, 10 Nov 2021 04:10:49 -0800


mozga-intel commented on a change in pull request #20724:
URL: https://github.com/apache/incubator-mxnet/pull/20724#discussion_r746428664




##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"

Review comment:
       See above

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"

Review comment:
       [Future consideration] + @anko-intel
   ```suggestion
   #include "./dnnl_convolution-inl.h"
   ```

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"

Review comment:
       [Future Consideration] I hope, there is better to replace: 
   ```suggestion
   #include "operator/nn/fully_connected-inl.h"
   ```

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
 #include "../common.h"
 #include "dnnl_conv-inl.h"
 #include "dnnl_subgraph_base-inl.h"
 
 namespace mxnet {
 namespace op {
 
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op = 
{"_contrib_quantized_elemwise_add",

Review comment:
       This global set is likely enabled in all TU. What is an advantage of 
using it in this place?

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
 #include "../common.h"
 #include "dnnl_conv-inl.h"
 #include "dnnl_subgraph_base-inl.h"
 
 namespace mxnet {
 namespace op {
 
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op = 
{"_contrib_quantized_elemwise_add",
+                                                     
"_contrib_quantized_elemwise_mul",
+                                                     
"_contrib_quantized_npi_add",
+                                                     "_sg_onednn_conv",
+                                                     
"_sg_onednn_fully_connected",
+                                                     "_sg_onednn_selfatt_qk",
+                                                     
"_sg_onednn_selfatt_valatt",
+                                                     "_sg_onednn_batch_dot"};
+
+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {
  public:
   /*! \brief pattern match status */
   enum SelectStatus {
     kFail = 0,
     kStart,
+    kRequantize,
     kSuccess,
   };
 
  private:
+  bool disable_fuse_all;
+  bool disable_float_output;
   SelectStatus status;
-  std::vector<const nnvm::Node*> matched_list;
+  std::vector<const BiDirectedNode*> matched_list;
   std::set<std::string> support_requantize_fusion_op_name;
 
  public:
-  SgDNNLPostQuantizeSelector() {
-    support_requantize_fusion_op_name.insert("_sg_onednn_conv");
-    
support_requantize_fusion_op_name.insert("_contrib_quantized_elemwise_add");
-    support_requantize_fusion_op_name.insert("_contrib_quantized_npi_add");
+  explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool 
dis_float_output)
+      : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) 
{
+    support_requantize_fusion_op_name = support_req_fusion_op;

Review comment:
       What are the pros and cons of using this assignment? Roughly, why the 
set is global and then inexplicitly assigned?

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
 #include "../common.h"
 #include "dnnl_conv-inl.h"
 #include "dnnl_subgraph_base-inl.h"
 
 namespace mxnet {
 namespace op {
 
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op = 
{"_contrib_quantized_elemwise_add",
+                                                     
"_contrib_quantized_elemwise_mul",
+                                                     
"_contrib_quantized_npi_add",
+                                                     "_sg_onednn_conv",
+                                                     
"_sg_onednn_fully_connected",
+                                                     "_sg_onednn_selfatt_qk",
+                                                     
"_sg_onednn_selfatt_valatt",
+                                                     "_sg_onednn_batch_dot"};
+
+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {
  public:
   /*! \brief pattern match status */
   enum SelectStatus {
     kFail = 0,
     kStart,
+    kRequantize,
     kSuccess,
   };
 
  private:
+  bool disable_fuse_all;
+  bool disable_float_output;
   SelectStatus status;
-  std::vector<const nnvm::Node*> matched_list;
+  std::vector<const BiDirectedNode*> matched_list;
   std::set<std::string> support_requantize_fusion_op_name;
 
  public:
-  SgDNNLPostQuantizeSelector() {
-    support_requantize_fusion_op_name.insert("_sg_onednn_conv");
-    
support_requantize_fusion_op_name.insert("_contrib_quantized_elemwise_add");
-    support_requantize_fusion_op_name.insert("_contrib_quantized_npi_add");
+  explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool 
dis_float_output)
+      : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) 
{
+    support_requantize_fusion_op_name = support_req_fusion_op;
   }
 
-  bool Select(const nnvm::Node& n) override {
-    if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {
-      if (n.op() == Op::Get("_sg_onednn_conv")) {
-        auto const& param = nnvm::get<DNNLConvFusionParam>(n.attrs.parsed);
-        if (param.full_conv_param.dnnl_param.quantized) {
-          status = kStart;
-          matched_list.clear();
-          matched_list.push_back(&n);
-          return true;
-        }
-      } else if (n.op()->name == "_contrib_quantized_elemwise_add" ||
-                 n.op()->name == "_contrib_quantized_npi_add") {
-        status = kStart;
-        matched_list.clear();
-        matched_list.push_back(&n);
-        return true;
-      }
+  bool Select(const BiDirectedNode& n) override {
+    const nnvm::Node* raw_node = n.node;
+    if ((!disable_fuse_all) && raw_node->op() &&
+        support_requantize_fusion_op_name.count(raw_node->op()->name)) {
+      status = kStart;
+      matched_list.clear();
+      matched_list.push_back(&n);
+      return true;
     }
     return false;
   }
 
-  bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override {
+  bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) 
override {
     return false;
   }
 
-  bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override {
-    if (status == kFail || status == kSuccess || new_node.is_variable())
+  bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) 
override {
+    const nnvm::Node* raw_node     = n.node;
+    const nnvm::Node* raw_new_node = new_node.node;
+    if (status == kFail || status == kSuccess || raw_new_node->is_variable())
       return false;
     // If n isn't the last matched node, then we encoutered a internal
     // branch, we should pop out the node behind n and stop fusion.
     if (matched_list.back() != &n) {
-      status = kFail;
+      if (std::find(matched_list.begin(), matched_list.end(), &n) != 
matched_list.end()) {
+        while (matched_list.back() != &n) {
+          matched_list.pop_back();
+        }
+      }
+      status = kSuccess;
       return false;
     }
-    if (new_node.op()->name == "_contrib_requantize") {
-      auto const& param = nnvm::get<RequantizeParam>(new_node.attrs.parsed);
-      if (param.min_calib_range.has_value() && 
param.max_calib_range.has_value()) {
-        matched_list.push_back(&new_node);
+
+    switch (status) {
+      case kStart:
+        if (raw_new_node->op() == Op::Get("_contrib_requantize")) {
+          auto const& param = 
nnvm::get<RequantizeParam>(raw_new_node->attrs.parsed);
+          if (param.min_calib_range.has_value() && 
param.max_calib_range.has_value()) {
+            matched_list.push_back(&new_node);
+            status = kRequantize;
+            if (raw_node->op() == Op::Get("_sg_onednn_conv")) {
+              status = kSuccess;
+            }
+            return true;
+          }
+        }
+      case kRequantize:
+        if (!disable_float_output && raw_new_node->op() == 
Op::Get("_contrib_dequantize")) {
+          CHECK(raw_node->op() == Op::Get("_contrib_requantize"));
+          if (n.outputs.size() > 1) {
+            // check if requantize have other outputs than dequantize
+            // if it has we can't fuse dequantize
+            for (auto kv : n.outputs) {

Review comment:
       How about? If an object is const-> then `kv` is const, otherwise, the 
`kv` might be non-const.
   ```suggestion
               for (const auto kv : n.outputs) {
   ```

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
 #include "../common.h"
 #include "dnnl_conv-inl.h"
 #include "dnnl_subgraph_base-inl.h"
 
 namespace mxnet {
 namespace op {
 
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op = 
{"_contrib_quantized_elemwise_add",
+                                                     
"_contrib_quantized_elemwise_mul",
+                                                     
"_contrib_quantized_npi_add",
+                                                     "_sg_onednn_conv",
+                                                     
"_sg_onednn_fully_connected",
+                                                     "_sg_onednn_selfatt_qk",
+                                                     
"_sg_onednn_selfatt_valatt",
+                                                     "_sg_onednn_batch_dot"};
+
+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {
  public:
   /*! \brief pattern match status */
   enum SelectStatus {

Review comment:
       [+] It could be fine to replace plain enum with class enum - to avoid 
implicitly conversion to other types (like another enum or int)

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
 #include "../common.h"
 #include "dnnl_conv-inl.h"
 #include "dnnl_subgraph_base-inl.h"
 
 namespace mxnet {
 namespace op {
 
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op = 
{"_contrib_quantized_elemwise_add",
+                                                     
"_contrib_quantized_elemwise_mul",
+                                                     
"_contrib_quantized_npi_add",
+                                                     "_sg_onednn_conv",
+                                                     
"_sg_onednn_fully_connected",
+                                                     "_sg_onednn_selfatt_qk",
+                                                     
"_sg_onednn_selfatt_valatt",
+                                                     "_sg_onednn_batch_dot"};
+
+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {
  public:
   /*! \brief pattern match status */
   enum SelectStatus {

Review comment:
       Why the enum (`enum SelectStatus`) is public?

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
 #include "../common.h"
 #include "dnnl_conv-inl.h"
 #include "dnnl_subgraph_base-inl.h"
 
 namespace mxnet {
 namespace op {
 
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op = 
{"_contrib_quantized_elemwise_add",
+                                                     
"_contrib_quantized_elemwise_mul",
+                                                     
"_contrib_quantized_npi_add",
+                                                     "_sg_onednn_conv",
+                                                     
"_sg_onednn_fully_connected",
+                                                     "_sg_onednn_selfatt_qk",
+                                                     
"_sg_onednn_selfatt_valatt",
+                                                     "_sg_onednn_batch_dot"};
+
+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {
  public:
   /*! \brief pattern match status */
   enum SelectStatus {
     kFail = 0,
     kStart,
+    kRequantize,
     kSuccess,
   };
 
  private:
+  bool disable_fuse_all;
+  bool disable_float_output;
   SelectStatus status;
-  std::vector<const nnvm::Node*> matched_list;
+  std::vector<const BiDirectedNode*> matched_list;
   std::set<std::string> support_requantize_fusion_op_name;
 
  public:
-  SgDNNLPostQuantizeSelector() {
-    support_requantize_fusion_op_name.insert("_sg_onednn_conv");
-    
support_requantize_fusion_op_name.insert("_contrib_quantized_elemwise_add");
-    support_requantize_fusion_op_name.insert("_contrib_quantized_npi_add");
+  explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool 
dis_float_output)
+      : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output) 
{
+    support_requantize_fusion_op_name = support_req_fusion_op;
   }
 
-  bool Select(const nnvm::Node& n) override {
-    if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {
-      if (n.op() == Op::Get("_sg_onednn_conv")) {
-        auto const& param = nnvm::get<DNNLConvFusionParam>(n.attrs.parsed);
-        if (param.full_conv_param.dnnl_param.quantized) {
-          status = kStart;
-          matched_list.clear();
-          matched_list.push_back(&n);
-          return true;
-        }
-      } else if (n.op()->name == "_contrib_quantized_elemwise_add" ||
-                 n.op()->name == "_contrib_quantized_npi_add") {
-        status = kStart;
-        matched_list.clear();
-        matched_list.push_back(&n);
-        return true;
-      }
+  bool Select(const BiDirectedNode& n) override {
+    const nnvm::Node* raw_node = n.node;
+    if ((!disable_fuse_all) && raw_node->op() &&
+        support_requantize_fusion_op_name.count(raw_node->op()->name)) {
+      status = kStart;
+      matched_list.clear();
+      matched_list.push_back(&n);

Review comment:
       If we clear the vector here, the capacity is the same as before; Then it 
looks better to use matched_list.emplace_back(&n) to avoid creating temp object 
before. If you know, how many values you have, then you can reserve memory 
before: to avoid doing 2^{N}.
   How about using?
   ```suggestion
         matched_list.emplace_back(&n);
   ```

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
 #include "../common.h"
 #include "dnnl_conv-inl.h"
 #include "dnnl_subgraph_base-inl.h"
 
 namespace mxnet {
 namespace op {
 
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op = 
{"_contrib_quantized_elemwise_add",
+                                                     
"_contrib_quantized_elemwise_mul",
+                                                     
"_contrib_quantized_npi_add",
+                                                     "_sg_onednn_conv",
+                                                     
"_sg_onednn_fully_connected",
+                                                     "_sg_onednn_selfatt_qk",
+                                                     
"_sg_onednn_selfatt_valatt",
+                                                     "_sg_onednn_batch_dot"};
+
+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {
  public:
   /*! \brief pattern match status */
   enum SelectStatus {
     kFail = 0,
     kStart,
+    kRequantize,
     kSuccess,
   };
 
  private:
+  bool disable_fuse_all;

Review comment:
       [Consideration]
   ```suggestion
     bool disable_fuse_all{false or true};
   ```

##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
 #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
 #if MXNET_USE_ONEDNN == 1
 
+#include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
 #include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
 #include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
 #include "../common.h"
 #include "dnnl_conv-inl.h"
 #include "dnnl_subgraph_base-inl.h"
 
 namespace mxnet {
 namespace op {
 
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op = 
{"_contrib_quantized_elemwise_add",
+                                                     
"_contrib_quantized_elemwise_mul",
+                                                     
"_contrib_quantized_npi_add",
+                                                     "_sg_onednn_conv",
+                                                     
"_sg_onednn_fully_connected",
+                                                     "_sg_onednn_selfatt_qk",
+                                                     
"_sg_onednn_selfatt_valatt",
+                                                     "_sg_onednn_batch_dot"};
+
+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {
  public:
   /*! \brief pattern match status */
   enum SelectStatus {
     kFail = 0,
     kStart,
+    kRequantize,
     kSuccess,
   };
 
  private:
+  bool disable_fuse_all;
+  bool disable_float_output;
   SelectStatus status;
-  std::vector<const nnvm::Node*> matched_list;
+  std::vector<const BiDirectedNode*> matched_list;
   std::set<std::string> support_requantize_fusion_op_name;
 
  public:
-  SgDNNLPostQuantizeSelector() {
-    support_requantize_fusion_op_name.insert("_sg_onednn_conv");
-    
support_requantize_fusion_op_name.insert("_contrib_quantized_elemwise_add");
-    support_requantize_fusion_op_name.insert("_contrib_quantized_npi_add");
+  explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool 
dis_float_output)

Review comment:
       What are the pros of using `const bool` here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [incubator-mxnet] mozga-intel commented on a change in pull request #20724: Unifying post-quantization properties

Reply via email to