This is an automated email from the ASF dual-hosted git repository.
masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 63592fd71d [quantize] fix bug of annotate for output of add op (#15529)
63592fd71d is described below
commit 63592fd71d854f3848d6ee31180883cedbc776f7
Author: MingkangW <[email protected]>
AuthorDate: Tue Aug 15 03:39:40 2023 +0800
[quantize] fix bug of annotate for output of add op (#15529)
* Update annotate for output of add op
The type of add op`s output is activation, it should annotate by
QAnnotateKind.ACTIVATION. If not, the graph will cast int32 into int8 directly
without quantized, when quantize resnet.
* Update test_pass_auto_quantize.py
* Update test_pass_auto_quantize.py
* change format of test case
* reformat test cast
* reformat quantize test case
* reformat test case
---
python/tvm/relay/quantize/_annotate.py | 4 +-
tests/python/relay/test_pass_auto_quantize.py | 75 +++++++++++++++++++++++++++
2 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/python/tvm/relay/quantize/_annotate.py
b/python/tvm/relay/quantize/_annotate.py
index c1a7b50d3f..b6d6c921a8 100644
--- a/python/tvm/relay/quantize/_annotate.py
+++ b/python/tvm/relay/quantize/_annotate.py
@@ -276,7 +276,7 @@ def add_rewrite(ref_call, new_args, ctx):
assert rhs_kind in [QAnnotateKind.INPUT, QAnnotateKind.ACTIVATION]
lhs_expr = attach_simulated_quantize(lhs_expr, QAnnotateKind.INPUT)
expr = _forward_op(ref_call, [lhs_expr, rhs_expr])
- return QAnnotateExpr(expr, QAnnotateKind.INPUT)
+ return QAnnotateExpr(expr, QAnnotateKind.ACTIVATION)
if lhs_kind is not None and rhs_kind is None:
if _analysis.check_constant(rhs_expr):
@@ -290,7 +290,7 @@ def add_rewrite(ref_call, new_args, ctx):
if lhs_kind is not None and rhs_kind is not None:
if lhs_kind == QAnnotateKind.INPUT and rhs_kind == QAnnotateKind.INPUT:
expr = _forward_op(ref_call, [lhs_expr, rhs_expr])
- return QAnnotateExpr(expr, QAnnotateKind.INPUT)
+ return QAnnotateExpr(expr, QAnnotateKind.ACTIVATION)
if lhs_kind == QAnnotateKind.ACTIVATION and rhs_kind ==
QAnnotateKind.ACTIVATION:
rhs_expr = attach_simulated_quantize(rhs_expr, QAnnotateKind.INPUT)
expr = _forward_op(ref_call, [lhs_expr, rhs_expr])
diff --git a/tests/python/relay/test_pass_auto_quantize.py
b/tests/python/relay/test_pass_auto_quantize.py
index 488866ab6f..30d4c36502 100644
--- a/tests/python/relay/test_pass_auto_quantize.py
+++ b/tests/python/relay/test_pass_auto_quantize.py
@@ -440,6 +440,78 @@ def test_dense_conv2d_rewrite():
relay.analysis.post_order_visit(qnn_mod["main"], _check_dense)
+def test_add_lhs_is_none_annotate():
+ data_conv = relay.var("data_conv", shape=(1, 16, 64, 64))
+ conv2d_w = relay.const(np.random.random((16, 16, 3, 3)))
+ conv2d = relay.nn.conv2d(data_conv, conv2d_w, padding=(1, 1),
kernel_size=(3, 3))
+ data_add = relay.var("data_add", shape=(16, 1, 1))
+ add = relay.add(data_add, conv2d)
+ global_avg_pool2d = relay.nn.global_avg_pool2d(add)
+ mod = tvm.IRModule.from_expr(global_avg_pool2d)
+
+ calibrate_data = [
+ {"data_conv": np.random.random((1, 16, 64, 64)), "data_add":
np.random.random((16, 1, 1))}
+ ]
+
+ with tvm.transform.PassContext(opt_level=3):
+ with relay.quantize.qconfig(calibrate_mode="kl_divergence",
skip_conv_layers=None):
+ qmod = relay.quantize.quantize(mod, dataset=calibrate_data)
+
+ params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in
mod["main"].params]
+
+ def _eval_mod(mod):
+ return relay.create_executor("vm", device=tvm.cpu(0), target="llvm",
mod=mod).evaluate()(
+ *params
+ )
+
+ mod_result = _eval_mod(mod)
+ qmod_result = _eval_mod(qmod)
+ tvm.testing.assert_allclose(mod_result.numpy(), qmod_result.numpy(),
rtol=1e-1, atol=1e-1)
+
+
+def test_add_lhs_rhs_is_input_annotate():
+ data_conv_r = relay.var("data_conv_r", shape=(1, 16, 64, 64))
+ conv2d_r = relay.nn.conv2d(
+ data_conv_r,
+ relay.const(np.random.random((16, 16, 3, 3))),
+ padding=(1, 1),
+ kernel_size=(3, 3),
+ )
+ data_conv_l = relay.var("data_conv_l", shape=(1, 16, 64, 64))
+ conv2d_l = relay.nn.conv2d(
+ data_conv_l,
+ relay.const(np.random.random((16, 16, 3, 3))),
+ padding=(1, 1),
+ kernel_size=(3, 3),
+ )
+ add = relay.add(conv2d_l, conv2d_r)
+ global_avg_pool2d = relay.nn.global_avg_pool2d(add)
+ mod = tvm.IRModule.from_expr(global_avg_pool2d)
+
+ calibrate_data = [
+ {
+ "data_conv_l": np.random.random((1, 16, 64, 64)),
+ "data_conv_r": np.random.random((1, 16, 64, 64)),
+ "data_add": np.random.random((16, 1, 1)),
+ }
+ ]
+
+ with tvm.transform.PassContext(opt_level=3):
+ with relay.quantize.qconfig(calibrate_mode="kl_divergence",
skip_conv_layers=None):
+ qmod = relay.quantize.quantize(mod, dataset=calibrate_data)
+
+ params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in
mod["main"].params]
+
+ def _eval_mod(mod):
+ return relay.create_executor("vm", device=tvm.cpu(0), target="llvm",
mod=mod).evaluate()(
+ *params
+ )
+
+ mod_result = _eval_mod(mod)
+ qmod_result = _eval_mod(qmod)
+ tvm.testing.assert_allclose(mod_result.numpy(), qmod_result.numpy(),
rtol=1e-1, atol=1e-1)
+
+
if __name__ == "__main__":
test_mul_rewrite()
test_batch_flatten_rewrite()
@@ -460,3 +532,6 @@ if __name__ == "__main__":
test_skip_conv()
test_stop_quantize()
+
+ test_add_lhs_is_none_annotate()
+ test_add_lhs_rhs_is_input_annotate()