gemini-code-assist[bot] commented on code in PR #19538:
URL: https://github.com/apache/tvm/pull/19538#discussion_r3221254510
##########
python/tvm/relax/frontend/tflite/tflite_frontend.py:
##########
@@ -2637,27 +2545,27 @@ def convert_fully_connected(self, op):
dtype=bias_tensor_type_str,
source_name=bias_tensor.tensor.Name(),
)
+ if bias_tensor.qnn_params:
+ bias_expr = self.dequantize(bias_expr, bias_tensor)
+ elif input_tensor.qnn_params and bias_tensor_type in (
+ TensorType.INT32,
+ TensorType.INT64,
+ ):
+ bias_scale_val = (
+
get_scalar_from_constant(input_tensor.qnn_params["scale"])
+ *
get_scalar_from_constant(weight_tensor.qnn_params["scale"])
+ )
+ bias_expr = relax.op.dequantize(
+ bias_expr,
+ scale=relax.const(bias_scale_val, "float32"),
+ zero_point=relax.const(0, "int32"),
+ axis=0,
+ )
Review Comment:

Using `get_scalar_from_constant` on `weight_tensor.qnn_params["scale"]` will
cause an `AssertionError` for per-channel quantized models because the scale is
a constant tensor, not a scalar. To support both per-tensor and per-channel
quantization robustly, you should use `relax.op.multiply` on the scale
expressions and pass the resulting expression directly to `relax.op.dequantize`.
```suggestion
elif input_tensor.qnn_params and bias_tensor_type in (
TensorType.INT32,
TensorType.INT64,
):
bias_scale = relax.op.multiply(
input_tensor.qnn_params["scale"],
weight_tensor.qnn_params["scale"],
)
bias_expr = relax.op.dequantize(
bias_expr,
scale=bias_scale,
zero_point=relax.const(0, "int32"),
axis=0,
)
```
##########
python/tvm/relax/frontend/tflite/tflite_frontend.py:
##########
@@ -2900,37 +2823,32 @@ def convert_conv(self, op, conv_type):
dtype=bias_tensor_type_str,
source_name=bias_tensor.tensor.Name(),
)
+ # For quantized conv, INT32/INT64 bias must be dequantized
+ # to float32 before adding to the float conv output.
+ if bias_tensor.qnn_params:
+ bias_expr = self.dequantize(bias_expr, bias_tensor)
+ elif input_tensor.qnn_params and bias_tensor_type in (
+ TensorType.INT32,
+ TensorType.INT64,
+ ):
+ bias_expr = relax.op.dequantize(
+ bias_expr,
+ scale=relax.const(
+
get_scalar_from_constant(input_tensor.qnn_params["scale"])
+ *
get_scalar_from_constant(weight_tensor.qnn_params["scale"]),
+ "float32",
+ ),
+ zero_point=relax.const(0, "int32"),
+ axis=0,
+ )
Review Comment:

The use of `get_scalar_from_constant` here will crash when importing
per-channel quantized Conv2D models. Since `relax.op.dequantize` accepts an
expression for the `scale` parameter, you can use `relax.op.multiply` to
compute the implicit bias scale (`input_scale * weight_scale`) as a Relax
expression. This handles both scalar and tensor scales correctly.
```python
bias_expr = relax.op.dequantize(
bias_expr,
scale=relax.op.multiply(
input_tensor.qnn_params["scale"],
weight_tensor.qnn_params["scale"],
),
zero_point=relax.const(0, "int32"),
axis=0,
)
```
##########
python/tvm/relax/frontend/tflite/tflite_frontend.py:
##########
@@ -4461,34 +4375,26 @@ def convert_transpose_conv(self, op):
dtype=bias_tensor_type_str,
source_name=bias_tensor.tensor.Name(),
)
- channel_axis = 3
- out = relax.op.nn.bias_add(out, bias_expr, axis=channel_axis)
+ if bias_tensor.qnn_params:
+ bias_expr = self.dequantize(bias_expr, bias_tensor)
+ elif input_tensor.qnn_params and bias_tensor_type in (
+ TensorType.INT32,
+ TensorType.INT64,
+ ):
+ bias_scale_val = (
+ get_scalar_from_constant(input_tensor.qnn_params["scale"])
+ *
get_scalar_from_constant(weights_tensor.qnn_params["scale"])
+ )
+ bias_expr = relax.op.dequantize(
+ bias_expr,
+ scale=relax.const(bias_scale_val, "float32"),
+ zero_point=relax.const(0, "int32"),
+ axis=0,
+ )
Review Comment:

This `get_scalar_from_constant` call will crash on per-channel quantized
TransposeConv models. Use `relax.op.multiply` to combine the input and weight
scales into a single expression for the bias dequantization.
```python
bias_expr = relax.op.dequantize(
bias_expr,
scale=relax.op.multiply(
input_tensor.qnn_params["scale"],
weights_tensor.qnn_params["scale"],
),
zero_point=relax.const(0, "int32"),
axis=0,
)
```
##########
python/tvm/relax/frontend/tflite/tflite_frontend.py:
##########
@@ -4411,25 +4329,21 @@ def convert_transpose_conv(self, op):
padding = (0, 0, 0, 0)
if input_tensor.qnn_params:
- input_zero_point = input_tensor.qnn_params["zero_point"]
- kernel_zero_point = weights_tensor.qnn_params["zero_point"]
- input_scale = input_tensor.qnn_params["scale"]
- kernel_scale = weights_tensor.qnn_params["scale"]
- out_dtype = "int64" if output_tensor_type_str == "int16" else
"int32"
- out = _qnn.op.conv2d_transpose(
- in_expr,
+ in_f32 = self.dequantize(in_expr, input_tensor)
Review Comment:

For TransposeConv, the code assumes the quantization axis is 0 (OC) and
remaps it to 1 for the `IOHW` layout. Please add a validation check to ensure
`weights_tensor.qnn_params["axis"] == 0` to avoid silent errors with
non-standard models.
```python
if input_tensor.qnn_params:
if weights_tensor.qnn_params["axis"] != 0:
raise tvm.error.OpNotImplemented(
f"Per-channel quantized transpose convolution is only
supported when the "
f"quantization axis is 0 (OC), but got
{weights_tensor.qnn_params['axis']}."
)
```
##########
python/tvm/relax/frontend/tflite/tflite_frontend.py:
##########
@@ -2869,15 +2777,30 @@ def convert_conv(self, op, conv_type):
)
if input_tensor.qnn_params:
- qnn_conv2d_params = dict(params)
- qnn_conv2d_params["input_zero_point"] =
input_tensor.qnn_params["zero_point"]
- qnn_conv2d_params["kernel_zero_point"] =
weight_tensor.qnn_params["zero_point"]
- qnn_conv2d_params["out_dtype"] = (
- "int64" if output_tensor_type_str == "int16" else "int32"
+ # Dequantize input activation
+ in_f32 = self.dequantize(in_expr, input_tensor)
+ # Dequantize weight with per-channel axis remap.
+ # TFLite weight original layout: [OC, KH, KW, IC]
+ # After transpose to HWIO: [KH, KW, IC, OC]
+ # QuantizedDimension() == 0 (OC in original) → axis 3 in HWIO.
+ weight_axis = weight_tensor.qnn_params["axis"]
+ if is_depthwise_conv:
+ if weight_axis != 0:
+ raise tvm.error.OpNotImplemented(
+ "Per-channel quantized depthwise convolution is not
supported "
+ "because the channel axis changes semantics after the "
+ "[1,KH,KW,C*M] → [KH,KW,C,M] reshape."
+ )
+ else:
+ weight_axis = 3
Review Comment:

Similar to the FullyConnected case, the code silently assumes the
quantization axis for regular Conv2D weights is 0 (OC) and remaps it to 3 for
the `HWIO` layout. It is better to explicitly validate that `weight_axis == 0`
before overwriting it, consistent with the check performed for depthwise
convolution above.
```python
else:
if weight_axis != 0:
raise tvm.error.OpNotImplemented(
f"Per-channel quantized convolution is only
supported when the "
f"quantization axis is 0 (OC), but got
{weight_axis}."
)
weight_axis = 3
```
##########
python/tvm/relax/frontend/tflite/tflite_frontend.py:
##########
@@ -2600,20 +2510,18 @@ def convert_fully_connected(self, op):
)
weight_expr = self.get_tensor_expr(weight_tensor)
- weight_shape = weight_expr.struct_info.shape
weight_expr = relax.op.permute_dims(weight_expr, [1, 0])
if input_tensor.qnn_params:
- out = _qnn.op.dense(
- in_expr,
+ # Dequantize input and weight (OC remapped from axis 0 to 1)
Review Comment:

The code assumes that the quantization axis for FullyConnected weights is
always 0 (OC) before remapping it to 1 after the `[OC, IC] -> [IC, OC]`
permutation. While this is the standard for TFLite, it is safer to validate
that `weight_tensor.qnn_params["axis"]` is indeed 0 before proceeding, as
ignoring it could lead to incorrect results if a non-standard model is imported.
```python
if input_tensor.qnn_params:
if weight_tensor.qnn_params["axis"] != 0:
raise tvm.error.OpNotImplemented(
f"Per-channel quantized fully connected is only
supported when the "
f"quantization axis is 0 (OC), but got
{weight_tensor.qnn_params['axis']}."
)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]