This is an automated email from the ASF dual-hosted git repository.
masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 74a2fa8 [microNPU] Add support for requantize (#9910)
74a2fa8 is described below
commit 74a2fa80464f91987506e3f95d877ad1a2959ba6
Author: lhutton1 <[email protected]>
AuthorDate: Mon Jan 24 00:14:05 2022 +0000
[microNPU] Add support for requantize (#9910)
* [microNPU] Add support for requantize
Adds support for stand-alone requantize operation which is legalized to
an identity operation on the NPU.
Change-Id: Ie2450c5fc72f405eddf517593236074aa4716c3b
* fix concatenate tests failing due to not being bit exact
Since requantize is now offloaded, concatenate tests were failing
due a reference not being used.
Change-Id: I44b26b5daecfefb776ca19e6646f3690f5570f52
* test multiple requantize offload
Change-Id: I60a3283461a7a7083c05289e84f570698388077b
* address comments
Change-Id: I7196a0fa468eb7c6a96f2b8a68f3a2dcf5a5693c
---
.../tvm/relay/backend/contrib/ethosu/legalize.py | 44 +++++++++
python/tvm/relay/op/contrib/ethosu.py | 59 ++++++++++++
tests/python/contrib/test_ethosu/test_codegen.py | 35 +++++++-
tests/python/contrib/test_ethosu/test_legalize.py | 100 +++++++++++++++++++++
4 files changed, 237 insertions(+), 1 deletion(-)
diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py
b/python/tvm/relay/backend/contrib/ethosu/legalize.py
index d52f3ba..b163740 100644
--- a/python/tvm/relay/backend/contrib/ethosu/legalize.py
+++ b/python/tvm/relay/backend/contrib/ethosu/legalize.py
@@ -1226,6 +1226,49 @@ class LegalizeConcat:
pass
+class RequantizeRewriter(DFPatternCallback):
+ """Convert ethos-u.requantize composite function to an identity
operation."""
+
+ def __init__(self):
+ super().__init__(require_type=True)
+ self.pattern = (
+ wildcard().has_attr({"Composite":
ethosu_patterns.RequantizeParams.composite_name})
+ )(wildcard())
+
+ def callback(
+ self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map:
tvm.ir.container.Map
+ ) -> tvm.relay.Expr:
+ params = ethosu_patterns.RequantizeParams(post.op.body)
+ params.ifm.tensor = post.args[0]
+
+ lut = relay.const([], "int8")
+
+ return ethosu_ops.ethosu_identity(
+ ifm=params.ifm.tensor,
+ lut=lut,
+ ifm_scale=float(params.ifm.q_params.scale_f32),
+ ifm_zero_point=int(params.ifm.q_params.zero_point),
+ ofm_scale=float(params.ofm.q_params.scale_f32),
+ ofm_zero_point=int(params.ofm.q_params.zero_point),
+ )
+
+
[email protected]_pass(opt_level=1)
+class LegalizeRequantize:
+ """This is the pass that wraps RequantizeRewriter."""
+
+ def transform_module(
+ self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+ ) -> tvm.ir.IRModule:
+ for global_var, func in mod.functions.items():
+ func = rewrite(RequantizeRewriter(), func)
+ mod.update_func(global_var, func)
+ return mod
+
+ def __call__(self, *args, **kwargs):
+ pass
+
+
@ir.transform.module_pass(opt_level=1)
class LegalizeEthosU:
"""This is the pass to call graph-rewrites to perform graph transformation
@@ -1255,6 +1298,7 @@ class LegalizeEthosU:
mod = LegalizeMean()(mod)
mod = LegalizeConcat()(mod)
mod = LegalizeSigmoid()(mod)
+ mod = LegalizeRequantize()(mod)
mod = LegalizeReshape()(mod)
mod = LegalizeStridedSlice()(mod)
mod = LegalizeNoOps()(mod)
diff --git a/python/tvm/relay/op/contrib/ethosu.py
b/python/tvm/relay/op/contrib/ethosu.py
index 73007cf..9ea1e2b 100644
--- a/python/tvm/relay/op/contrib/ethosu.py
+++ b/python/tvm/relay/op/contrib/ethosu.py
@@ -1145,6 +1145,60 @@ def split_pattern():
return split
+class RequantizeParams:
+ """
+ This class will parse a call to ethos-u.requantize composite function
+ and extract the parameter information.
+ """
+
+ composite_name = "ethos-u.requantize"
+
+ def __init__(self, func_body: Call):
+ from tvm.relay.backend.contrib.ethosu.util import RequantArgs
+
+ layout = "NHWC"
+ in_var = func_body.args[0]
+ requantize = func_body
+
+ self.ifm = TensorParams(
+ in_var,
+ layout=layout,
+ scale=requantize.args[RequantArgs.IFM_SCALE.value],
+ zero_point=requantize.args[RequantArgs.IFM_ZERO_POINT.value],
+ )
+ self.ofm = TensorParams(
+ requantize,
+ layout=layout,
+ scale=requantize.args[RequantArgs.OFM_SCALE.value],
+ zero_point=requantize.args[RequantArgs.OFM_ZERO_POINT.value],
+ )
+
+ attrs = requantize.attrs
+ self.out_dtype = attrs.out_dtype
+
+ def is_valid(self) -> bool:
+ """
+ Checks whether qnn.requantize has compatible attributes with HW.
+ """
+ tensor_params = [self.ifm, self.ofm]
+ if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]):
+ return False
+ if not check_dimensions(self.ifm) or not check_dimensions(self.ofm):
+ return False
+ if self.out_dtype and self.out_dtype != "int8":
+ return False
+ return True
+
+
+def requantize_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
+ """
+ This function creates the pattern for qnn.requantize.
+ """
+ return is_op("qnn.requantize")(
+ wildcard(), is_constant(), is_constant(), is_constant(), is_constant()
+ )
+
+
@register_pattern_table("ethos-u")
def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern,
Callable]]:
return [
@@ -1230,6 +1284,11 @@ def pattern_table() -> List[Tuple[str,
tvm.relay.dataflow_pattern.DFPattern, Cal
split_pattern(),
lambda pat: SplitParams(pat).is_valid(),
),
+ (
+ RequantizeParams.composite_name,
+ requantize_pattern(),
+ lambda pat: RequantizeParams(pat).is_valid(),
+ ),
]
diff --git a/tests/python/contrib/test_ethosu/test_codegen.py
b/tests/python/contrib/test_ethosu/test_codegen.py
index 4042bb0..1af8a60 100644
--- a/tests/python/contrib/test_ethosu/test_codegen.py
+++ b/tests/python/contrib/test_ethosu/test_codegen.py
@@ -950,7 +950,10 @@ def test_tflite_concat(shapes, axis, accel_type):
op = tf.concat(list(inputs), axis)
return op
- _compare_tvm_with_tflite(concat_func, shapes, accel_type)
+ # TODO(lhutton1) For now output is not bit exact with TFLite.
+ # This is because TFLite reference kernels are not being used.
+ # For this, TFLite will need upgrading to 2.6.
+ _compare_tvm_with_tflite(concat_func, shapes, accel_type,
output_tolerance=1)
@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
@@ -987,5 +990,35 @@ def test_tflite_split(accel_type, ifm_shape,
num_or_size_splits, axis):
_compare_tvm_with_tflite(split_func, [ifm_shape], accel_type)
[email protected]("accel_type", ACCEL_TYPES)
[email protected](
+ "ifm_shape,ifm_scale,ifm_zp,ofm_scale,ofm_zp",
+ [
+ [(1, 8, 8, 3), 1.0, 0, 1.0, 0],
+ [(1, 20, 30, 3), 1.345, 34, 0.32, -23],
+ ],
+)
+def test_ethosu_requantize(accel_type, ifm_shape, ifm_scale, ifm_zp,
ofm_scale, ofm_zp):
+ dtype = "int8"
+
+ def create_model():
+ ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
+ requantize = relay.qnn.op.requantize(
+ ifm,
+ relay.const(ifm_scale, dtype="float32"),
+ relay.const(ifm_zp, dtype="int32"),
+ relay.const(ofm_scale, dtype="float32"),
+ relay.const(ofm_zp, dtype="int32"),
+ )
+ return tvm.IRModule.from_expr(relay.Function([ifm], requantize))
+
+ cpu_mod = create_model()
+ input_data = {"ifm": np.random.randint(-128, high=127, size=ifm_shape,
dtype=dtype)}
+ output_data = generate_ref_data(cpu_mod, input_data)
+ ethosu_mod = partition_for_ethosu(cpu_mod)
+
+ _compare_ethosu_with_reference(ethosu_mod, input_data, output_data,
accel_type)
+
+
if __name__ == "__main__":
pytest.main([__file__])
diff --git a/tests/python/contrib/test_ethosu/test_legalize.py
b/tests/python/contrib/test_ethosu/test_legalize.py
index 9f97915..f05fec9 100644
--- a/tests/python/contrib/test_ethosu/test_legalize.py
+++ b/tests/python/contrib/test_ethosu/test_legalize.py
@@ -21,6 +21,7 @@ import pytest
pytest.importorskip("ethosu.vela")
import math
+
import numpy as np
import tensorflow as tf
import tflite.Model
@@ -1502,5 +1503,104 @@ def test_tflite_split_v_legalize(ifm_shape,
num_or_size_splits, axis):
verify(mod["tvmgen_default_ethos_u_main_0"])
[email protected](
+ "ifm_shape,ifm_scale,ifm_zp,ofm_scale,ofm_zp",
+ [[(1, 8, 8, 3), 1.0, 0, 1.0, 0], [(1, 20, 30, 3), 1.345, 34, 0.32, -23]],
+)
+def test_ethosu_requantize(ifm_shape, ifm_scale, ifm_zp, ofm_scale, ofm_zp):
+ dtype = "int8"
+
+ def create_model():
+ ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
+ requantize = relay.qnn.op.requantize(
+ ifm,
+ relay.const(ifm_scale, dtype="float32"),
+ relay.const(ifm_zp, dtype="int32"),
+ relay.const(ofm_scale, dtype="float32"),
+ relay.const(ofm_zp, dtype="int32"),
+ )
+ return tvm.IRModule.from_expr(relay.Function([ifm], requantize))
+
+ def verify(ext_func):
+ op = ext_func.body
+
+ # Check IFM
+ ifm = op.args[0].checked_type
+ assert list(ifm.shape) == list(ifm_shape)
+ assert str(ifm.dtype) == dtype
+
+ # Check OFM
+ ofm = op.checked_type
+ assert list(ofm.shape) == list(ifm_shape)
+ assert str(ofm.dtype) == dtype
+
+ # Check quantization params
+ assert math.isclose(op.attrs.ifm_scale, ifm_scale, abs_tol=1e-7)
+ assert op.attrs.ifm_zero_point == ifm_zp
+ assert math.isclose(op.attrs.ofm_scale, ofm_scale, abs_tol=1e-7)
+ assert op.attrs.ofm_zero_point == ofm_zp
+
+ rewriter = legalize.RequantizeRewriter()
+ pattern_table = [
+ (
+ ethosu.RequantizeParams.composite_name,
+ ethosu.requantize_pattern(),
+ lambda pat: ethosu.RequantizeParams(pat).is_valid(),
+ ),
+ ]
+
+ mod = create_model()
+ mod = partition_ethosu_by_table(mod, pattern_table)
+
+ mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
+ rewriter, mod["tvmgen_default_ethos_u_main_0"]
+ )
+ verify(mod["tvmgen_default_ethos_u_main_0"])
+
+
+def test_multiple_requantize_offload():
+ """
+ Testing requantize offload in the case one requantize operation is part of
+ an existing pattern (in this case Mean: cast->mean->requantize) and the
+ other is a stand-alone requantize.
+ """
+
+ def create_model():
+ ifm = relay.var("input", shape=(1, 3, 3, 4), dtype="int8")
+ cast = relay.cast(ifm, dtype="int32")
+ mean = relay.mean(cast, axis=1, keepdims=True)
+ requantize = relay.qnn.op.requantize(
+ mean,
+ input_scale=relay.const(1.0, dtype="float32"),
+ input_zero_point=relay.const(0, dtype="int32"),
+ output_scale=relay.const(1.0, dtype="float32"),
+ output_zero_point=relay.const(0, dtype="int32"),
+ )
+ requantize = relay.qnn.op.requantize(
+ requantize,
+ input_scale=relay.const(1.0, dtype="float32"),
+ input_zero_point=relay.const(0, dtype="int32"),
+ output_scale=relay.const(1.0, dtype="float32"),
+ output_zero_point=relay.const(0, dtype="int32"),
+ )
+ return tvm.IRModule.from_expr(relay.Function([ifm], requantize))
+
+ def verify(ext_func):
+ # If mean operation and separate requantize were offloaded correctly,
+ # there should only be a pooling operation followed by an identity
+ # operation leagalized.
+ op = ext_func.body
+ assert op.op.name == "contrib.ethosu.identity"
+ op = op.args[0]
+ assert ext_func.body.args[0].op.name == "contrib.ethosu.pooling"
+ op = op.args[0]
+ assert isinstance(op, relay.Var)
+
+ mod = create_model()
+ mod = ethosu.partition_for_ethosu(mod)
+ mod = legalize.LegalizeEthosU()(mod)
+ verify(mod["tvmgen_default_ethos_u_main_0"])
+
+
if __name__ == "__main__":
pytest.main([__file__])