[GitHub] [tvm] lhutton1 commented on a change in pull request #9576: [microNPU] Mean legalization support

GitBox Mon, 29 Nov 2021 07:04:02 -0800


lhutton1 commented on a change in pull request #9576:
URL: https://github.com/apache/tvm/pull/9576#discussion_r757706657




##########
File path: python/tvm/relay/backend/contrib/ethosu/legalize.py
##########
@@ -831,6 +832,170 @@ def __call__(self, *args, **kwargs):
         pass
 
 
+class MeanRewriter(DFPatternCallback):
+    """Convert ethosu.mean composite functions to to an equivalent 
legalization:
+    - Case 1 (axis == [1, 2] and keepsdims == True):
+        ethosu_depthwise_conv2d + ethosu_binary_elementwise
+    - Case 2 (ifm qparams == ofm qparams): ethosu_pooling
+    - Case 3 (else): ethosu_depthwise_conv2d
+    """
+
+    def __init__(self):
+        super().__init__(require_type=True)
+        self.pattern = (
+            wildcard().has_attr({"Composite": 
ethosu_patterns.MeanParams.composite_name})
+        )(wildcard())
+
+    def callback(
+        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: 
tvm.ir.container.Map
+    ) -> tvm.relay.Expr:
+        params = ethosu_patterns.MeanParams(post.op.body)
+        params.ifm.tensor = post.args[0]
+
+        ifm_shape = params.ifm.shape
+        ofm_shape = params.ofm.shape
+        lut = relay.const([], "int8")
+        axis = params.axis
+        reduced_op = params.ifm.tensor
+
+        # Enforce 4d input
+        if len(ifm_shape) < 4:
+            axis = [x + 1 for x in axis]
+            if len(ifm_shape) == 3:
+                ifm_shape = [1, params.height, params.width, ifm_shape[2]]
+            else:
+                ifm_shape = [1, params.height, params.width, 1]
+            reduced_op = relay.reshape(reduced_op, ifm_shape)

Review comment:
       Vela doesn't support a 1D input 
(https://git.mlplatform.org/ml/ethos-u/ethos-u-vela.git/tree/SUPPORTED_OPS.md?h=refs/heads/master#n211),
 so this case isn't handled here either. I don't see why it wouldn't be 
possible to add though as it would just imply a height of 1?

##########
File path: tests/python/contrib/test_ethosu/test_codegen.py
##########
@@ -435,6 +435,110 @@ def representative_dataset():
     infra.verify_source(compiled_models, accel_type)
 
 
[email protected](
+    "accel_type",
+    ACCEL_TYPES,
+)
[email protected](
+    "ifm_shape, axis, keep_dims, use_same_quantization",
+    [
+        # mean to depthwise + multiply
+        [(1, 8, 16, 16), (1, 2), True, False],
+        [(1, 3, 4), (0, 1), True, False],
+        [(1, 65, 2, 1), (1, 2), True, False],  # special case when h > 64
+        # mean to average pool
+        [(1, 8, 16, 16), (2,), False, True],
+        [(3, 3, 4), (0,), True, True],
+        [(8, 5), (0,), False, True],
+        # mean to depthwise
+        [(1, 8, 16, 16), (2,), True, False],
+        [(1, 8, 16, 16), (2, 1), False, False],
+        [(8, 4), (0,), False, False],
+    ],
+)
+def test_mean(accel_type, ifm_shape, axis, keep_dims, use_same_quantization):
+    dtype = "int8"
+
+    def create_mod_from_tflite():
+        class Model(tf.Module):
+            @tf.function
+            def tf_function(self, x):
+                op = tf.math.reduce_mean(x, axis=axis, keepdims=keep_dims)
+                return op
+
+        model = Model()
+        concrete_func = model.tf_function.get_concrete_function(
+            tf.TensorSpec(ifm_shape, dtype=tf.float32)
+        )
+
+        # Convert the model
+        def representative_dataset():
+            for _ in range(100):
+                data = np.random.rand(*tuple(ifm_shape))
+                yield [data.astype(np.float32)]
+
+        converter = 
tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
+        converter.optimizations = [tf.lite.Optimize.DEFAULT]
+        converter.representative_dataset = representative_dataset
+        converter.target_spec.supported_ops = 
[tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        tflite_graph = converter.convert()
+        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
+
+        mod, _ = relay.frontend.from_tflite(
+            tflite_model,
+            shape_dict={"ifm": ifm_shape},
+            dtype_dict={"ifm": dtype},
+        )
+        input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
+        return mod, input_data, output_data
+
+    def create_mod_from_relay():
+        ifm = relay.var("input", shape=ifm_shape, dtype=dtype)
+        cast = relay.cast(ifm, dtype="int32")
+        mean = relay.mean(cast, axis=axis, keepdims=keep_dims)
+        requantize = relay.qnn.op.requantize(
+            mean,
+            input_scale=relay.const(1.0, dtype="float32"),
+            input_zero_point=relay.const(0, dtype="int32"),
+            output_scale=relay.const(1.0, dtype="float32"),
+            output_zero_point=relay.const(0, dtype="int32"),
+        )
+
+        func = relay.Function(relay.analysis.free_vars(requantize), requantize)
+        mod = tvm.IRModule.from_expr(func)
+
+        input_data = {"input": np.random.randint(low=-127, high=128, 
size=ifm_shape, dtype=dtype)}
+        output_data = generate_ref_data(mod, input_data)
+        return mod, input_data, output_data
+
+    mod, input_data, output_data = (
+        create_mod_from_relay() if use_same_quantization else 
create_mod_from_tflite()
+    )

Review comment:
       Yep that's correct. One way to get the same QNN params using TFLite is 
to provide a single value in the representative dataset, I couldn't find any 
other way. I thought this was quite hacky, so settled for constructing the 
graph in Relay.

##########
File path: python/tvm/relay/backend/contrib/ethosu/legalize.py
##########
@@ -831,6 +832,170 @@ def __call__(self, *args, **kwargs):
         pass
 
 
+class MeanRewriter(DFPatternCallback):
+    """Convert ethosu.mean composite functions to to an equivalent 
legalization:
+    - Case 1 (axis == [1, 2] and keepsdims == True):
+        ethosu_depthwise_conv2d + ethosu_binary_elementwise
+    - Case 2 (ifm qparams == ofm qparams): ethosu_pooling
+    - Case 3 (else): ethosu_depthwise_conv2d
+    """
+
+    def __init__(self):
+        super().__init__(require_type=True)
+        self.pattern = (
+            wildcard().has_attr({"Composite": 
ethosu_patterns.MeanParams.composite_name})
+        )(wildcard())
+
+    def callback(
+        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: 
tvm.ir.container.Map
+    ) -> tvm.relay.Expr:
+        params = ethosu_patterns.MeanParams(post.op.body)
+        params.ifm.tensor = post.args[0]
+
+        ifm_shape = params.ifm.shape
+        ofm_shape = params.ofm.shape
+        lut = relay.const([], "int8")
+        axis = params.axis
+        reduced_op = params.ifm.tensor
+
+        # Enforce 4d input
+        if len(ifm_shape) < 4:
+            axis = [x + 1 for x in axis]
+            if len(ifm_shape) == 3:
+                ifm_shape = [1, params.height, params.width, ifm_shape[2]]
+            else:
+                ifm_shape = [1, params.height, params.width, 1]
+            reduced_op = relay.reshape(reduced_op, ifm_shape)
+
+        filter_height = ifm_shape[1] if 1 in axis else 1
+        filter_width = ifm_shape[2] if 2 in axis else 1
+        in_channels = out_channels = ifm_shape[-1]
+
+        # If the height is greater than max kernel height, reshape the input
+        # from [filter_height, filter_width] to [1, 
(filter_height*filter_width)]
+        # only in the case the axis is [1, 2].
+        if axis == [1, 2] and filter_height > 64:
+            ifm_shape = (ifm_shape[0], 1, filter_height * filter_width, 
in_channels)
+            filter_width = filter_height * filter_width
+            filter_height = 1
+            reduced_op = relay.reshape(reduced_op, ifm_shape)
+
+        if axis == [1, 2] and params.keepdims:
+            weight_scale = 1
+            weight_values = np.ones([out_channels, filter_height, 
filter_width, in_channels])
+            scale_bias = vela_api.pack_biases(
+                biases=np.zeros(ifm_shape[-1]),
+                ifm_scale=params.ifm.q_params.scale_f32,
+                ifm_dtype=np.dtype(params.ifm.dtype),
+                weight_scales=np.array([weight_scale], dtype=np.float),
+                ofm_scale=params.ofm.q_params.scale_f32,
+                is_activation_tanh_or_sigmoid=False,
+            )
+
+            reduced_op = ethosu_ops.ethosu_depthwise_conv2d(
+                ifm=reduced_op,
+                weight=relay.const(weight_values, params.ifm.dtype),
+                scale_bias=relay.const(scale_bias, "uint8"),
+                lut=lut,
+                ifm_scale=float(params.ifm.q_params.scale_f32),
+                ifm_zero_point=int(params.ifm.q_params.zero_point),
+                weight_zero_point=0,
+                ofm_scale=float(params.ofm.q_params.scale_f32),
+                ofm_zero_point=int(params.ofm.q_params.zero_point),
+                kernel_shape=(filter_height, filter_width),
+                ofm_channels=out_channels,
+                ofm_dtype="int16",
+            )
+
+            n = int(filter_height * filter_width)
+            eps = 1 / (256 * (n + 1)) if n % 2 == 0 else 0
+
+            scalar_tensor = relay.const(np.ones([1, 1, 1, 1], dtype="uint8"), 
dtype="uint8")
+
+            reduced_op = ethosu_ops.ethosu_binary_elementwise(
+                ifm=reduced_op,
+                ifm2=scalar_tensor,
+                lut=lut,
+                operator_type="MUL",
+                ifm_scale=float(params.ofm.q_params.scale_f32),
+                ifm_zero_point=int(params.ofm.q_params.zero_point),
+                ifm2_scale=1 / (n - eps),
+                ifm2_zero_point=0,
+                ofm_scale=float(params.ofm.q_params.scale_f32),
+                ofm_zero_point=int(params.ofm.q_params.zero_point),
+                ifm_channels=out_channels,
+                ifm2_channels=out_channels,
+                reversed_operands=False,
+                ofm_dtype="int8",
+                rounding_mode="NATURAL",
+            )
+        elif (
+            params.ifm.q_params.scale_f32 == params.ofm.q_params.scale_f32
+            and params.ifm.q_params.zero_point == 
params.ofm.q_params.zero_point
+        ):

Review comment:
       Good question, I believe this has something to do with the accuracy of 
the output, although I'm not entirely sure :) The implementation is similar to 
Vela 
(https://git.mlplatform.org/ml/ethos-u/ethos-u-vela.git/tree/ethosu/vela/tflite_graph_optimiser.py?h=refs/heads/master#n1283)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] lhutton1 commented on a change in pull request #9576: [microNPU] Mean legalization support

Reply via email to