This is an automated email from the ASF dual-hosted git repository.

masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 63592fd71d [quantize] fix bug of annotate for output of add op (#15529)
63592fd71d is described below

commit 63592fd71d854f3848d6ee31180883cedbc776f7
Author: MingkangW <[email protected]>
AuthorDate: Tue Aug 15 03:39:40 2023 +0800

    [quantize] fix bug of annotate for output of add op (#15529)
    
    * Update annotate for output of add op
    
    The type of add op`s output is activation, it should annotate by 
QAnnotateKind.ACTIVATION. If not, the graph will cast int32 into int8 directly 
without quantized, when quantize resnet.
    
    * Update test_pass_auto_quantize.py
    
    * Update test_pass_auto_quantize.py
    
    * change format of test case
    
    * reformat test cast
    
    * reformat quantize test case
    
    * reformat test case
---
 python/tvm/relay/quantize/_annotate.py        |  4 +-
 tests/python/relay/test_pass_auto_quantize.py | 75 +++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/quantize/_annotate.py 
b/python/tvm/relay/quantize/_annotate.py
index c1a7b50d3f..b6d6c921a8 100644
--- a/python/tvm/relay/quantize/_annotate.py
+++ b/python/tvm/relay/quantize/_annotate.py
@@ -276,7 +276,7 @@ def add_rewrite(ref_call, new_args, ctx):
         assert rhs_kind in [QAnnotateKind.INPUT, QAnnotateKind.ACTIVATION]
         lhs_expr = attach_simulated_quantize(lhs_expr, QAnnotateKind.INPUT)
         expr = _forward_op(ref_call, [lhs_expr, rhs_expr])
-        return QAnnotateExpr(expr, QAnnotateKind.INPUT)
+        return QAnnotateExpr(expr, QAnnotateKind.ACTIVATION)
 
     if lhs_kind is not None and rhs_kind is None:
         if _analysis.check_constant(rhs_expr):
@@ -290,7 +290,7 @@ def add_rewrite(ref_call, new_args, ctx):
     if lhs_kind is not None and rhs_kind is not None:
         if lhs_kind == QAnnotateKind.INPUT and rhs_kind == QAnnotateKind.INPUT:
             expr = _forward_op(ref_call, [lhs_expr, rhs_expr])
-            return QAnnotateExpr(expr, QAnnotateKind.INPUT)
+            return QAnnotateExpr(expr, QAnnotateKind.ACTIVATION)
         if lhs_kind == QAnnotateKind.ACTIVATION and rhs_kind == 
QAnnotateKind.ACTIVATION:
             rhs_expr = attach_simulated_quantize(rhs_expr, QAnnotateKind.INPUT)
             expr = _forward_op(ref_call, [lhs_expr, rhs_expr])
diff --git a/tests/python/relay/test_pass_auto_quantize.py 
b/tests/python/relay/test_pass_auto_quantize.py
index 488866ab6f..30d4c36502 100644
--- a/tests/python/relay/test_pass_auto_quantize.py
+++ b/tests/python/relay/test_pass_auto_quantize.py
@@ -440,6 +440,78 @@ def test_dense_conv2d_rewrite():
     relay.analysis.post_order_visit(qnn_mod["main"], _check_dense)
 
 
+def test_add_lhs_is_none_annotate():
+    data_conv = relay.var("data_conv", shape=(1, 16, 64, 64))
+    conv2d_w = relay.const(np.random.random((16, 16, 3, 3)))
+    conv2d = relay.nn.conv2d(data_conv, conv2d_w, padding=(1, 1), 
kernel_size=(3, 3))
+    data_add = relay.var("data_add", shape=(16, 1, 1))
+    add = relay.add(data_add, conv2d)
+    global_avg_pool2d = relay.nn.global_avg_pool2d(add)
+    mod = tvm.IRModule.from_expr(global_avg_pool2d)
+
+    calibrate_data = [
+        {"data_conv": np.random.random((1, 16, 64, 64)), "data_add": 
np.random.random((16, 1, 1))}
+    ]
+
+    with tvm.transform.PassContext(opt_level=3):
+        with relay.quantize.qconfig(calibrate_mode="kl_divergence", 
skip_conv_layers=None):
+            qmod = relay.quantize.quantize(mod, dataset=calibrate_data)
+
+    params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in 
mod["main"].params]
+
+    def _eval_mod(mod):
+        return relay.create_executor("vm", device=tvm.cpu(0), target="llvm", 
mod=mod).evaluate()(
+            *params
+        )
+
+    mod_result = _eval_mod(mod)
+    qmod_result = _eval_mod(qmod)
+    tvm.testing.assert_allclose(mod_result.numpy(), qmod_result.numpy(), 
rtol=1e-1, atol=1e-1)
+
+
+def test_add_lhs_rhs_is_input_annotate():
+    data_conv_r = relay.var("data_conv_r", shape=(1, 16, 64, 64))
+    conv2d_r = relay.nn.conv2d(
+        data_conv_r,
+        relay.const(np.random.random((16, 16, 3, 3))),
+        padding=(1, 1),
+        kernel_size=(3, 3),
+    )
+    data_conv_l = relay.var("data_conv_l", shape=(1, 16, 64, 64))
+    conv2d_l = relay.nn.conv2d(
+        data_conv_l,
+        relay.const(np.random.random((16, 16, 3, 3))),
+        padding=(1, 1),
+        kernel_size=(3, 3),
+    )
+    add = relay.add(conv2d_l, conv2d_r)
+    global_avg_pool2d = relay.nn.global_avg_pool2d(add)
+    mod = tvm.IRModule.from_expr(global_avg_pool2d)
+
+    calibrate_data = [
+        {
+            "data_conv_l": np.random.random((1, 16, 64, 64)),
+            "data_conv_r": np.random.random((1, 16, 64, 64)),
+            "data_add": np.random.random((16, 1, 1)),
+        }
+    ]
+
+    with tvm.transform.PassContext(opt_level=3):
+        with relay.quantize.qconfig(calibrate_mode="kl_divergence", 
skip_conv_layers=None):
+            qmod = relay.quantize.quantize(mod, dataset=calibrate_data)
+
+    params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in 
mod["main"].params]
+
+    def _eval_mod(mod):
+        return relay.create_executor("vm", device=tvm.cpu(0), target="llvm", 
mod=mod).evaluate()(
+            *params
+        )
+
+    mod_result = _eval_mod(mod)
+    qmod_result = _eval_mod(qmod)
+    tvm.testing.assert_allclose(mod_result.numpy(), qmod_result.numpy(), 
rtol=1e-1, atol=1e-1)
+
+
 if __name__ == "__main__":
     test_mul_rewrite()
     test_batch_flatten_rewrite()
@@ -460,3 +532,6 @@ if __name__ == "__main__":
 
     test_skip_conv()
     test_stop_quantize()
+
+    test_add_lhs_is_none_annotate()
+    test_add_lhs_rhs_is_input_annotate()

Reply via email to