[GitHub] [tvm] masahi commented on a change in pull request #7474: Quantization in TVM

GitBox Thu, 18 Feb 2021 20:07:29 -0800


masahi commented on a change in pull request #7474:
URL: https://github.com/apache/tvm/pull/7474#discussion_r578911395




##########
File path: python/tvm/relay/transform/quantize/_quantizer_patterns.py
##########
@@ -0,0 +1,712 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Patterns to quantize and how to quantize them."""
+
+import tvm
+from tvm import relay
+
+from tvm.relay.transform.quantize import CalibrationCallback
+from tvm.relay.dataflow_pattern import (
+    is_op,
+    wildcard,
+    is_constant,
+    DFPatternCallback,
+    _DFPatternCallback,
+)
+from tvm.relay.dataflow_pattern import ffi as pattern_ffi
+from tvm.relay.frontend.common import infer_type
+from tvm.relay.op.nn.utils import get_pad_tuple2d
+
+
+class QuantizerPattern(DFPatternCallback):
+    """DFPatternCallback to rewrite patterns as quantized. Also contains extra 
information
+    used for quantization and calibration.
+
+    Parameters
+    ----------
+    calibration_callback : CalibrationCallback
+        The method we will use to calibrate the nn.conv2d pattern.
+    """
+
+    # Counts the number of times we've added a scale and zp for variable naming
+    # This needs to be a global variable and not initialized in __init__ 
because
+    # each scale and zero point must be unique, even if they are created by 
different
+    # instances.
+    scales_count = 0
+    zp_count = 0
+
+    def __init__(self, calibration_callback: CalibrationCallback = None):
+        super().__init__()
+        self.calibration_callback = calibration_callback
+
+    def calibrate_pattern(self, calibration_info):
+        """Calculates the scale and zero points for quantizing parts of a 
generic pattern. By
+        default, we call the calibrate_pattern method of the 
CalibrationCallback object that is
+        passed into QuantizerPattern during initialization. However, if you 
want a pattern specific
+        quantization method or a per-channel quantization method, you should 
overwrite the
+        QuantizerPattern's calibrate_pattern method.
+
+        Parameters
+        ----------
+        calibration_info : CalibrationInfo
+            The class containing relevant information and utility functions to 
calibrate one
+            instance of a pattern.
+
+        Returns
+        -------
+        scale_zp_map : Dictionary
+            A map from the names of scales and zero point variables in this 
pattern to their
+            values.
+        """
+        return self.calibration_callback.calibrate_pattern(calibration_info)
+
+    def callback(self, pre, post, node_map):
+        raise NotImplementedError
+
+    def scale(self, name):
+        """Helper to create the scale variable for qnn.quantize when rewriting 
our pattern.
+
+        Parameters
+        ----------
+        name : str
+            Identifier at the beginning of the scale variable.
+
+        is_weight : bool
+            Whether this scale is a weight scale or a data scale. If it is a 
weight scale, we
+            the returned variable has shape (channels,). Only used for 
per-channel quantization.
+
+        Returns
+        -------
+        var : relay.Var
+            Relay variable for scale. If the input name is 'conv2d_data', then 
the name of the
+            relay variable might be 'conv2d_data_scale_0'.
+        """
+
+        var = relay.var(
+            str(name) + "_scale_" + str(QuantizerPattern.scales_count), 
shape=(), dtype="float32"
+        )
+        QuantizerPattern.scales_count += 1
+        return var
+
+    def zero_point(self, name):
+        """Helper to create the zero point variable for qnn.quantize when 
rewriting our
+        our pattern.
+
+        Parameters
+        ----------
+        name : str
+            Identifier at the beginning of the variable.
+
+        Returns
+        -------
+        var : relay.Var
+            Relay variable for scale. If the input name is 'conv2d_data', then 
the name of the
+            relay variable might be 'conv2d_data_zero_pt_0'.
+        """
+        var = relay.var(
+            str(name) + "_zero_pt_" + str(QuantizerPattern.zp_count), 
shape=(), dtype="int32"
+        )
+        QuantizerPattern.zp_count += 1
+        return var
+
+    def create_scale_zps(self, left_name, right_name):
+        """Helper to create scales and zero points for binops.
+
+        Parameters
+        ----------
+        left_name : str
+            Identifier of the left hand side scale and zero point.
+
+        right_name : str
+            Identifier of the right hand side scale and zero point.
+        """
+        data_scale = self.scale(left_name)
+        data_zp = self.zero_point(left_name)
+        weight_scale = self.scale(right_name)
+        weight_zp = self.zero_point(right_name)
+        self.scale_zps = [data_scale, data_zp, weight_scale, weight_zp]
+
+
+class Conv2DPattern(QuantizerPattern):
+    """Pattern to rewrite nn.conv2d ops as qnn.conv2d ops.
+
+    Parameters
+    ----------
+    calibration_callback : CalibrationCallback
+        The method we will use to calibrate this pattern.
+    """
+    def __init__(self, calibration_callback: CalibrationCallback = None):
+        super().__init__(calibration_callback)
+        self.input = wildcard()
+        self.conv_weight = wildcard()
+        self.inputs = [self.input, self.conv_weight]
+        self.conv2d = is_op("nn.conv2d")(self.input, self.conv_weight)
+        self.pattern = self.conv2d
+        self.attrs = None
+        self.weight_channel_axis = None
+        self.data_channel_axis = None
+        self.channels = None
+
+    def get_kernel_size(self, kernel_shape, kernel_layout):
+        """Gets the size of the kernel.
+
+        Parameters
+        ----------
+        kernel_shape : NDArray
+            Shape of the kernel
+
+        kernel_layout : str
+            Layout of the kernel
+
+        Returns
+        -------
+            kernel_size : NDArray
+                Size of the kernel
+        """
+        if kernel_layout == "OIHW":
+            kernel_size = tuple(kernel_shape[2:4])
+        elif kernel_layout == "HWIO":
+            kernel_size = tuple(kernel_shape[0:2])
+        else:
+            raise ValueError(
+                "Quantizting kernel layout %s for conv2d is not yet supported."
+                + "Please use OIHW or HWIO",
+                kernel_layout,
+            )
+        return kernel_size
+
+    def get_attrs(self, attrs, kernel_shape):
+        """Constructs the attributes for qnn.conv2d.
+
+        Parameters
+        ----------
+        attrs : dict
+            Attributes of the original nn.conv2d
+
+        kernel_shape : NDArray
+            Shape of the kernel
+
+        Returns
+        -------
+            quantized_attrs : dict
+                Attributes for the qnn.conv2d
+        """
+        new_attr_dict = {}
+        self.kernel_layout = attrs["kernel_layout"]
+        data_layout = attrs["data_layout"]
+
+        if self.kernel_layout == "OIHW":
+            self.weight_channel_axis = 0
+        elif self.kernel_layout == "HWIO":
+            self.weight_channel_axis = 3
+        else:
+            raise ValueError(
+                "Quantizing kernel layout %s for conv2d is not yet supported."
+                + "Please use OIHW or HWIO",
+                self.kernel_layout,
+            )
+
+        if data_layout == "NCHW":
+            self.data_channel_axis = 1
+        elif data_layout == "NHWC":
+            self.data_channel_axis = 3
+        else:
+            raise ValueError(
+                "Quantizing data layout %s for conv2d is not yet supported."
+                + "Please use NCHW or NHWC",
+                data_layout,
+            )
+
+        for attr in attrs.keys():
+            attr_value = attrs[attr]
+            if isinstance(attr_value, tvm.ir.container.Array):
+                attr_value = tuple(attr_value)
+            if attr == "kernel_size":
+                kernel_size = attrs[attr]
+                if kernel_size is None:
+                    kernel_size = self.get_kernel_size(self.kernel_layout, 
kernel_shape)
+                else:
+                    kernel_size = tuple([k.value for k in attrs[attr]])
+                new_attr_dict[attr] = kernel_size
+            elif attr == "channels":
+                self.channels = attrs[attr]
+                if self.channels is None:
+                    self.channels = kernel_shape[self.weight_channel_axis]
+                if isinstance(self.channels, tvm.tir.expr.IntImm):
+                    self.channels = self.channels.value
+                new_attr_dict[attr] = self.channels
+            elif attr == "padding":
+                # We don't need to put padding in attr dict because we 
explicitly construct padding
+                self.padding = attrs[attr]
+            else:
+                new_attr_dict[attr] = attr_value
+
+        new_attr_dict["out_dtype"] = "int32"
+        self.attrs = new_attr_dict
+
+    def quantize_args(self):
+        """Helper to quantize the arguments to the qnn.conv2d."""
+        quantized_data = relay.qnn.op.quantize(
+            self.args[0], self.scale_zps[0], self.scale_zps[1], 
axis=self.data_channel_axis
+        )
+        quantized_weight = relay.qnn.op.quantize(
+            self.args[1], self.scale_zps[2], self.scale_zps[3], 
axis=self.weight_channel_axis
+        )
+        self.quantized_args = [quantized_data, quantized_weight]
+
+    def create_conv(self, args, node_map):
+        """Creates the qnn.conv2d.
+
+        Parameters
+        ----------
+        args : List[relay.Expr]
+            Quantized arguments for the qnn.conv2d.
+
+        node_map : tvm.ir.container.Map
+            Node map from DFPatternCallback's callback
+
+        Returns
+        -------
+        q_conv2d : relay.Expr
+            Quantized version of the pattern.
+        """
+        return relay.qnn.op.conv2d(*args, **self.attrs)
+
+    def callback(self, pre, post, node_map):
+        self.args = [node_map[i][0] for i in self.inputs]
+        conv2d = node_map[self.conv2d][0]
+
+        self.out_dtype = conv2d.checked_type.dtype
+
+        self.get_attrs(conv2d.attrs, 
infer_type(self.args[1]).checked_type.shape)
+
+        self.create_scale_zps("conv2d_data", "conv2d_weight")
+        self.quantize_args()
+
+        conv_scale = self.scale_zps[0] * self.scale_zps[2]  # data_scale * 
weight_scale
+
+        # Conv zp is zero since QNN deals with input zps for us
+        conv_zp = relay.const(0, dtype="int32")
+        # args = [quantized_data, quantized_weight, data_zp, weight_zp, 
data_scale, weight_scale]
+        args = self.quantized_args[0:2] + [self.scale_zps[i] for i in [1, 3, 
0, 2]]
+
+        if self.padding is not None:
+
+            top, left, bottom, right = [p.value for p in 
get_pad_tuple2d(self.padding)]
+            if self.kernel_layout == "OIHW":
+                pad_width = ((0, 0), (0, 0), (top, bottom), (left, right))
+            elif self.kernel_layout == "HWIO":
+                pad_width = (
+                    (top, bottom),
+                    (left, right),
+                    (0, 0),
+                    (0, 0),
+                )
+            pad_val = 0

Review comment:
       again, this is assuming that zero point is always 0, so you are only 
supporting symmetric Q




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] masahi commented on a change in pull request #7474: Quantization in TVM

Reply via email to