mozga-intel commented on a change in pull request #20724:
URL: https://github.com/apache/incubator-mxnet/pull/20724#discussion_r746524936
##########
File path: src/operator/subgraph/dnnl/dnnl_post_quantize_property.h
##########
@@ -20,146 +20,209 @@
#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_
#if MXNET_USE_ONEDNN == 1
+#include <memory>
#include <set>
#include <string>
#include <vector>
#include "../../nn/dnnl/dnnl_convolution-inl.h"
+#include "../../nn/fully_connected-inl.h"
#include "../../quantization/requantize-inl.h"
+#include "../../tensor/elemwise_binary_op-inl.h"
#include "../common.h"
#include "dnnl_conv-inl.h"
#include "dnnl_subgraph_base-inl.h"
namespace mxnet {
namespace op {
-class SgDNNLPostQuantizeSelector : public SubgraphSelector {
+const std::set<std::string> support_req_fusion_op =
{"_contrib_quantized_elemwise_add",
+
"_contrib_quantized_elemwise_mul",
+
"_contrib_quantized_npi_add",
+ "_sg_onednn_conv",
+
"_sg_onednn_fully_connected",
+ "_sg_onednn_selfatt_qk",
+
"_sg_onednn_selfatt_valatt",
+ "_sg_onednn_batch_dot"};
+
+class SgDNNLPostQuantizeSelector : public SubgraphSelectorV2 {
public:
/*! \brief pattern match status */
enum SelectStatus {
kFail = 0,
kStart,
+ kRequantize,
kSuccess,
};
private:
+ bool disable_fuse_all;
+ bool disable_float_output;
SelectStatus status;
- std::vector<const nnvm::Node*> matched_list;
+ std::vector<const BiDirectedNode*> matched_list;
std::set<std::string> support_requantize_fusion_op_name;
public:
- SgDNNLPostQuantizeSelector() {
- support_requantize_fusion_op_name.insert("_sg_onednn_conv");
-
support_requantize_fusion_op_name.insert("_contrib_quantized_elemwise_add");
- support_requantize_fusion_op_name.insert("_contrib_quantized_npi_add");
+ explicit SgDNNLPostQuantizeSelector(const bool dis_fuse_all, const bool
dis_float_output)
+ : disable_fuse_all(dis_fuse_all), disable_float_output(dis_float_output)
{
+ support_requantize_fusion_op_name = support_req_fusion_op;
}
- bool Select(const nnvm::Node& n) override {
- if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {
- if (n.op() == Op::Get("_sg_onednn_conv")) {
- auto const& param = nnvm::get<DNNLConvFusionParam>(n.attrs.parsed);
- if (param.full_conv_param.dnnl_param.quantized) {
- status = kStart;
- matched_list.clear();
- matched_list.push_back(&n);
- return true;
- }
- } else if (n.op()->name == "_contrib_quantized_elemwise_add" ||
- n.op()->name == "_contrib_quantized_npi_add") {
- status = kStart;
- matched_list.clear();
- matched_list.push_back(&n);
- return true;
- }
+ bool Select(const BiDirectedNode& n) override {
+ const nnvm::Node* raw_node = n.node;
+ if ((!disable_fuse_all) && raw_node->op() &&
+ support_requantize_fusion_op_name.count(raw_node->op()->name)) {
+ status = kStart;
+ matched_list.clear();
+ matched_list.push_back(&n);
Review comment:
If we clear the vector here, the capacity is the same as before; Then it
looks better to use matched_list.emplace_back(&n) ~ I hope to avoid creating
temp object before.
How about using?
```suggestion
matched_list.emplace_back(&n);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]