[GitHub] [tvm] manupa-arm commented on a diff in pull request #11228: [QNN] Enable constant folding for QNN operations.

GitBox Thu, 12 May 2022 06:46:47 -0700


manupa-arm commented on code in PR #11228:
URL: https://github.com/apache/tvm/pull/11228#discussion_r871395454



##########
include/tvm/relay/transform.h:
##########
@@ -105,9 +105,11 @@ TVM_DLL Pass LazyGradientInit();
 /*!
  * \brief Fold constant expressions.
  *
+ * \param fold_qnn Whether to fold constants for QNN operations.

Review Comment:
   I think we need a explanation why there are not folded by default and when 
should the "user" of the pass should use this. 



##########
tests/python/relay/test_pass_fold_constant.py:
##########
@@ -370,6 +370,178 @@ def before():
     tvm.ir.assert_structural_equal(run_infer_type(before_mod["main"]), 
after_mod["main"])
 
 
+def test_fold_qnn_const():
+    qx = relay.var("x", shape=[2, 3], dtype="int8")
+
+    def before():
+        # Quantized INT8 weights
+        qw = relay.const(np.array([[1, 3, 5], [2, 4, 6]], dtype="int8"), 
"int8")
+        op = relay.op.nn.dense(
+            relay.qnn.op.dequantize(qx, relay.const(2.0), relay.const(0)),
+            relay.qnn.op.dequantize(qw, relay.const(2.0), relay.const(0)),
+        )
+        return relay.Function([qx], op)
+
+    def expected():
+        # FP32 weights
+        w = relay.const(np.array([[2.0, 6.0, 10.0], [4.0, 8.0, 12.0]], 
dtype="float32"), "float32")
+        op = relay.op.nn.dense(relay.qnn.op.dequantize(qx, relay.const(2.0), 
relay.const(0)), w)
+        return relay.Function([qx], op)
+
+    # Nothing changed after applying FoldConstant
+    a = run_opt_pass(before(), transform.FoldConstant())
+    b = run_opt_pass(before(), transform.InferType())
+    tvm.ir.assert_structural_equal(a, b)
+
+    # Fold QNN constants
+    a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
+    b = run_opt_pass(expected(), transform.InferType())
+    tvm.ir.assert_structural_equal(a, b)
+
+
+def test_fold_quantize():
+    t = relay.TensorType([1, 2, 3], "int8")
+
+    def before():
+        data = tvm.nd.array(np.array([1.0, 2.0, 3.0], dtype="float32"))
+        const_fp = relay.const(data, dtype="float32")
+        const_i8 = relay.qnn.op.quantize(
+            const_fp, output_scale=relay.const(0.5), 
output_zero_point=relay.const(0)
+        )
+        x = relay.var("x", t)
+        add = relay.op.subtract(x, const_i8)
+        func = relay.Function([x], add)
+        return func
+
+    def expected():
+        data = tvm.nd.array(np.array([2, 4, 6], dtype="int8"))
+        const_i8 = relay.const(data, dtype="int8")
+        x = relay.var("x", t)
+        add = relay.op.subtract(x, const_i8)
+        func = relay.Function([x], add)
+        return func
+
+    # Nothing changed after applying FoldConstant
+    a = run_opt_pass(before(), transform.FoldConstant())
+    b = run_opt_pass(before(), transform.InferType())
+    tvm.ir.assert_structural_equal(a, b)
+
+    # Fold QNN constants
+    a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
+    b = run_opt_pass(expected(), transform.InferType())
+    tvm.ir.assert_structural_equal(a, b)
+
+
+def test_fold_qnn_add():
+    dtype = "uint8"
+
+    def before():
+        add = relay.qnn.op.add(
+            relay.const(np.ones((2, 3), dtype=dtype), dtype=dtype),
+            relay.const(np.ones((2, 3), dtype=dtype), dtype=dtype),
+            relay.const(2.0, dtype="float32"),
+            relay.const(0, dtype="int32"),
+            relay.const(2.0, dtype="float32"),
+            relay.const(0, dtype="int32"),
+            relay.const(1.0, dtype="float32"),
+            relay.const(0, dtype="int32"),
+        )
+        func = relay.Function([], add)
+        return func
+
+    def expected():
+        data = relay.const(np.array([[4, 4, 4], [4, 4, 4]], dtype=dtype), 
dtype)
+        func = relay.Function([], data)
+        return func
+
+    # Nothing changed after applying FoldConstant
+    a = run_opt_pass(before(), transform.FoldConstant())
+    b = run_opt_pass(before(), transform.InferType())
+    tvm.ir.assert_structural_equal(a, b)
+
+    # Fold QNN constants
+    a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
+    b = run_opt_pass(expected(), transform.InferType())
+    tvm.ir.assert_structural_equal(a, b)
+
+
+def test_fold_qnn_conv2d_qnn_mul():

Review Comment:
   Following up with the conversation, I think we need a test case with 
multiple qnn ops where some of them have fully constant arguments while other 
doesn't (which seems to be the main motivation for the change, IIUC).



##########
python/tvm/relay/transform/transform.py:
##########
@@ -261,32 +261,39 @@ def LazyGradientInit():
     return _ffi_api.LazyGradientInit()
 
 
-def FoldConstantExpr(expr, mod):
+def FoldConstantExpr(expr, mod, fold_qnn=False):
     """Fold the constant expressions in a Relay program.
     Parameters
     ----------
     expr: Expr
         The expression to fold
     mod: IRModule
         The module the expr lives in (for global calls)
+    fold_qnn: bool
+        Whether to fold constants for QNN operations.
 
     Returns
     -------
     new_expr: Expr
         The expr after Constant Folding
     """
-    return _ffi_api.FoldConstantExpr(expr, mod)
+    return _ffi_api.FoldConstantExpr(expr, mod, fold_qnn)
 
 
-def FoldConstant():
+def FoldConstant(fold_qnn=False):
     """Fold the constant expressions in a Relay program.
 
+    Parameters
+    ----------
+    fold_qnn: bool

Review Comment:
   This should also be improved to state the reasoning you mentioned in the PR 
why all constant subgraphs of QNN is not folded by default.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] manupa-arm commented on a diff in pull request #11228: [QNN] Enable constant folding for QNN operations.

Reply via email to