zhaoyang-star commented on code in PR #12116:
URL: https://github.com/apache/tvm/pull/12116#discussion_r925321844


##########
src/relay/qnn/op/leaky_relu.cc:
##########
@@ -82,42 +87,62 @@ Expr QnnLeakyReluCanonicalize(const Attrs& attrs, const 
Array<Expr>& new_args,
   // by a small alpha value < 1.
   //
   // We assume the same scale and zero point for alpha and the input tensor.
-  // Let T = s(q_t - z) where q_t is the input arg[0]
-  // Then, the quantized value of alpha * T is:
-  // q(a * T, s, z) = [(a * T) / s] + z = a * s(q_t - z) / s + z = a * (q_t - 
z) + z
-  // = a * q_t + (1 - a) * z
+  // LeakyReLU can be written in terms of respective quantized tensors, scales 
and
+  // zero points as
   //
-  // We return the quantized value of alpha * T for all values q_t < 
input_zero_point.
-
-  ICHECK_EQ(new_args.size(), 3);
-  Expr quantized_data = Cast(new_args[0], DataType::Int(32));
+  //    scale_o * (Q_o - zp_o) = alpha * scale_i * (Q_i - zp_i)  when Q_i < 
zp_i  (1)
+  //                       Q_o = Q_i when Q_i >= zp_i  (2)
+  //
+  // Since the input qnn params can be different than output qnn params, we 
first requantize the
+  // input tensor to the output qnn params. After requantizing Q_i, equation 
(1) becames equation
+  // (3) where Q_i' is the requantized data from Q_i.
+  //
+  //    scale_o * (Q_o - zp_o) = alpha * scale_o * (Q_i' - zp_o)  when Q_i < 
zp_i (3)
+  //                       Q_o = alpha * Q_i' + (1 - alpha) * zp_o  when Q_i < 
zp_i (4)

Review Comment:
   Done



##########
src/relay/qnn/op/leaky_relu.cc:
##########
@@ -82,42 +87,62 @@ Expr QnnLeakyReluCanonicalize(const Attrs& attrs, const 
Array<Expr>& new_args,
   // by a small alpha value < 1.
   //
   // We assume the same scale and zero point for alpha and the input tensor.
-  // Let T = s(q_t - z) where q_t is the input arg[0]
-  // Then, the quantized value of alpha * T is:
-  // q(a * T, s, z) = [(a * T) / s] + z = a * s(q_t - z) / s + z = a * (q_t - 
z) + z
-  // = a * q_t + (1 - a) * z
+  // LeakyReLU can be written in terms of respective quantized tensors, scales 
and
+  // zero points as
   //
-  // We return the quantized value of alpha * T for all values q_t < 
input_zero_point.
-
-  ICHECK_EQ(new_args.size(), 3);
-  Expr quantized_data = Cast(new_args[0], DataType::Int(32));
+  //    scale_o * (Q_o - zp_o) = alpha * scale_i * (Q_i - zp_i)  when Q_i < 
zp_i  (1)
+  //                       Q_o = Q_i when Q_i >= zp_i  (2)
+  //
+  // Since the input qnn params can be different than output qnn params, we 
first requantize the
+  // input tensor to the output qnn params. After requantizing Q_i, equation 
(1) becames equation
+  // (3) where Q_i' is the requantized data from Q_i.
+  //
+  //    scale_o * (Q_o - zp_o) = alpha * scale_o * (Q_i' - zp_o)  when Q_i < 
zp_i (3)
+  //                       Q_o = alpha * Q_i' + (1 - alpha) * zp_o  when Q_i < 
zp_i (4)
+  ICHECK_EQ(new_args.size(), 5);
+  Expr data = Cast(new_args[0], DataType::Int(32));
+  Expr input_scale = new_args[1];
   Expr input_zero_point = Cast(new_args[2], DataType::Int(32));
+  Expr output_scale = new_args[3];
+  Expr output_zero_point = Cast(new_args[4], DataType::Int(32));
 
   const auto* q_attrs = attrs.as<LeakyReluAttrs>();
   auto alpha = q_attrs->alpha;
 
+  const auto input_shape = get_shape(arg_types[0]);
+  const auto input_dtype = arg_types[0].as<TensorTypeNode>()->dtype;
+
+  // requantize the input to Q_i'
+  auto requantized_expr = RequantizeOrUpcast(data, input_scale, 
input_zero_point, output_scale,
+                                             output_zero_point, input_shape);
+
+  // alpha * Q_i'
   int32_t fixed_point_multiplier, shift;
   std::tie(fixed_point_multiplier, shift) = 
GetFixedPointMultiplierShift(alpha);
-  auto prod = FixedPointMultiply(quantized_data, fixed_point_multiplier, 
shift);
+  auto prod = FixedPointMultiply(requantized_expr, fixed_point_multiplier, 
shift);
 
+  // (1 - alpha) * zp_o
   int32_t fixed_point_multiplier_z, shift_z;
   std::tie(fixed_point_multiplier_z, shift_z) = GetFixedPointMultiplierShift(1 
- alpha);
-  auto scaled_z = FixedPointMultiply(input_zero_point, 
fixed_point_multiplier_z, shift_z);
+  auto scaled_z = FixedPointMultiply(output_zero_point, 
fixed_point_multiplier_z, shift_z);
 
+  // alpha * Q_i + (1 - alpha) * zp_o

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to