manupa-arm commented on code in PR #11228:
URL: https://github.com/apache/tvm/pull/11228#discussion_r871395454
##########
include/tvm/relay/transform.h:
##########
@@ -105,9 +105,11 @@ TVM_DLL Pass LazyGradientInit();
/*!
* \brief Fold constant expressions.
*
+ * \param fold_qnn Whether to fold constants for QNN operations.
Review Comment:
I think we need a explanation why there are not folded by default and when
should the "user" of the pass should use this.
##########
tests/python/relay/test_pass_fold_constant.py:
##########
@@ -370,6 +370,178 @@ def before():
tvm.ir.assert_structural_equal(run_infer_type(before_mod["main"]),
after_mod["main"])
+def test_fold_qnn_const():
+ qx = relay.var("x", shape=[2, 3], dtype="int8")
+
+ def before():
+ # Quantized INT8 weights
+ qw = relay.const(np.array([[1, 3, 5], [2, 4, 6]], dtype="int8"),
"int8")
+ op = relay.op.nn.dense(
+ relay.qnn.op.dequantize(qx, relay.const(2.0), relay.const(0)),
+ relay.qnn.op.dequantize(qw, relay.const(2.0), relay.const(0)),
+ )
+ return relay.Function([qx], op)
+
+ def expected():
+ # FP32 weights
+ w = relay.const(np.array([[2.0, 6.0, 10.0], [4.0, 8.0, 12.0]],
dtype="float32"), "float32")
+ op = relay.op.nn.dense(relay.qnn.op.dequantize(qx, relay.const(2.0),
relay.const(0)), w)
+ return relay.Function([qx], op)
+
+ # Nothing changed after applying FoldConstant
+ a = run_opt_pass(before(), transform.FoldConstant())
+ b = run_opt_pass(before(), transform.InferType())
+ tvm.ir.assert_structural_equal(a, b)
+
+ # Fold QNN constants
+ a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
+ b = run_opt_pass(expected(), transform.InferType())
+ tvm.ir.assert_structural_equal(a, b)
+
+
+def test_fold_quantize():
+ t = relay.TensorType([1, 2, 3], "int8")
+
+ def before():
+ data = tvm.nd.array(np.array([1.0, 2.0, 3.0], dtype="float32"))
+ const_fp = relay.const(data, dtype="float32")
+ const_i8 = relay.qnn.op.quantize(
+ const_fp, output_scale=relay.const(0.5),
output_zero_point=relay.const(0)
+ )
+ x = relay.var("x", t)
+ add = relay.op.subtract(x, const_i8)
+ func = relay.Function([x], add)
+ return func
+
+ def expected():
+ data = tvm.nd.array(np.array([2, 4, 6], dtype="int8"))
+ const_i8 = relay.const(data, dtype="int8")
+ x = relay.var("x", t)
+ add = relay.op.subtract(x, const_i8)
+ func = relay.Function([x], add)
+ return func
+
+ # Nothing changed after applying FoldConstant
+ a = run_opt_pass(before(), transform.FoldConstant())
+ b = run_opt_pass(before(), transform.InferType())
+ tvm.ir.assert_structural_equal(a, b)
+
+ # Fold QNN constants
+ a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
+ b = run_opt_pass(expected(), transform.InferType())
+ tvm.ir.assert_structural_equal(a, b)
+
+
+def test_fold_qnn_add():
+ dtype = "uint8"
+
+ def before():
+ add = relay.qnn.op.add(
+ relay.const(np.ones((2, 3), dtype=dtype), dtype=dtype),
+ relay.const(np.ones((2, 3), dtype=dtype), dtype=dtype),
+ relay.const(2.0, dtype="float32"),
+ relay.const(0, dtype="int32"),
+ relay.const(2.0, dtype="float32"),
+ relay.const(0, dtype="int32"),
+ relay.const(1.0, dtype="float32"),
+ relay.const(0, dtype="int32"),
+ )
+ func = relay.Function([], add)
+ return func
+
+ def expected():
+ data = relay.const(np.array([[4, 4, 4], [4, 4, 4]], dtype=dtype),
dtype)
+ func = relay.Function([], data)
+ return func
+
+ # Nothing changed after applying FoldConstant
+ a = run_opt_pass(before(), transform.FoldConstant())
+ b = run_opt_pass(before(), transform.InferType())
+ tvm.ir.assert_structural_equal(a, b)
+
+ # Fold QNN constants
+ a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True))
+ b = run_opt_pass(expected(), transform.InferType())
+ tvm.ir.assert_structural_equal(a, b)
+
+
+def test_fold_qnn_conv2d_qnn_mul():
Review Comment:
Following up with the conversation, I think we need a test case with
multiple qnn ops where some of them have fully constant arguments while other
doesn't (which seems to be the main motivation for the change, IIUC).
##########
python/tvm/relay/transform/transform.py:
##########
@@ -261,32 +261,39 @@ def LazyGradientInit():
return _ffi_api.LazyGradientInit()
-def FoldConstantExpr(expr, mod):
+def FoldConstantExpr(expr, mod, fold_qnn=False):
"""Fold the constant expressions in a Relay program.
Parameters
----------
expr: Expr
The expression to fold
mod: IRModule
The module the expr lives in (for global calls)
+ fold_qnn: bool
+ Whether to fold constants for QNN operations.
Returns
-------
new_expr: Expr
The expr after Constant Folding
"""
- return _ffi_api.FoldConstantExpr(expr, mod)
+ return _ffi_api.FoldConstantExpr(expr, mod, fold_qnn)
-def FoldConstant():
+def FoldConstant(fold_qnn=False):
"""Fold the constant expressions in a Relay program.
+ Parameters
+ ----------
+ fold_qnn: bool
Review Comment:
This should also be improved to state the reasoning you mentioned in the PR
why all constant subgraphs of QNN is not folded by default.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]