zhaoyang-star commented on code in PR #12116: URL: https://github.com/apache/tvm/pull/12116#discussion_r925321844
########## src/relay/qnn/op/leaky_relu.cc: ########## @@ -82,42 +87,62 @@ Expr QnnLeakyReluCanonicalize(const Attrs& attrs, const Array<Expr>& new_args, // by a small alpha value < 1. // // We assume the same scale and zero point for alpha and the input tensor. - // Let T = s(q_t - z) where q_t is the input arg[0] - // Then, the quantized value of alpha * T is: - // q(a * T, s, z) = [(a * T) / s] + z = a * s(q_t - z) / s + z = a * (q_t - z) + z - // = a * q_t + (1 - a) * z + // LeakyReLU can be written in terms of respective quantized tensors, scales and + // zero points as // - // We return the quantized value of alpha * T for all values q_t < input_zero_point. - - ICHECK_EQ(new_args.size(), 3); - Expr quantized_data = Cast(new_args[0], DataType::Int(32)); + // scale_o * (Q_o - zp_o) = alpha * scale_i * (Q_i - zp_i) when Q_i < zp_i (1) + // Q_o = Q_i when Q_i >= zp_i (2) + // + // Since the input qnn params can be different than output qnn params, we first requantize the + // input tensor to the output qnn params. After requantizing Q_i, equation (1) becames equation + // (3) where Q_i' is the requantized data from Q_i. + // + // scale_o * (Q_o - zp_o) = alpha * scale_o * (Q_i' - zp_o) when Q_i < zp_i (3) + // Q_o = alpha * Q_i' + (1 - alpha) * zp_o when Q_i < zp_i (4) Review Comment: Done ########## src/relay/qnn/op/leaky_relu.cc: ########## @@ -82,42 +87,62 @@ Expr QnnLeakyReluCanonicalize(const Attrs& attrs, const Array<Expr>& new_args, // by a small alpha value < 1. // // We assume the same scale and zero point for alpha and the input tensor. - // Let T = s(q_t - z) where q_t is the input arg[0] - // Then, the quantized value of alpha * T is: - // q(a * T, s, z) = [(a * T) / s] + z = a * s(q_t - z) / s + z = a * (q_t - z) + z - // = a * q_t + (1 - a) * z + // LeakyReLU can be written in terms of respective quantized tensors, scales and + // zero points as // - // We return the quantized value of alpha * T for all values q_t < input_zero_point. - - ICHECK_EQ(new_args.size(), 3); - Expr quantized_data = Cast(new_args[0], DataType::Int(32)); + // scale_o * (Q_o - zp_o) = alpha * scale_i * (Q_i - zp_i) when Q_i < zp_i (1) + // Q_o = Q_i when Q_i >= zp_i (2) + // + // Since the input qnn params can be different than output qnn params, we first requantize the + // input tensor to the output qnn params. After requantizing Q_i, equation (1) becames equation + // (3) where Q_i' is the requantized data from Q_i. + // + // scale_o * (Q_o - zp_o) = alpha * scale_o * (Q_i' - zp_o) when Q_i < zp_i (3) + // Q_o = alpha * Q_i' + (1 - alpha) * zp_o when Q_i < zp_i (4) + ICHECK_EQ(new_args.size(), 5); + Expr data = Cast(new_args[0], DataType::Int(32)); + Expr input_scale = new_args[1]; Expr input_zero_point = Cast(new_args[2], DataType::Int(32)); + Expr output_scale = new_args[3]; + Expr output_zero_point = Cast(new_args[4], DataType::Int(32)); const auto* q_attrs = attrs.as<LeakyReluAttrs>(); auto alpha = q_attrs->alpha; + const auto input_shape = get_shape(arg_types[0]); + const auto input_dtype = arg_types[0].as<TensorTypeNode>()->dtype; + + // requantize the input to Q_i' + auto requantized_expr = RequantizeOrUpcast(data, input_scale, input_zero_point, output_scale, + output_zero_point, input_shape); + + // alpha * Q_i' int32_t fixed_point_multiplier, shift; std::tie(fixed_point_multiplier, shift) = GetFixedPointMultiplierShift(alpha); - auto prod = FixedPointMultiply(quantized_data, fixed_point_multiplier, shift); + auto prod = FixedPointMultiply(requantized_expr, fixed_point_multiplier, shift); + // (1 - alpha) * zp_o int32_t fixed_point_multiplier_z, shift_z; std::tie(fixed_point_multiplier_z, shift_z) = GetFixedPointMultiplierShift(1 - alpha); - auto scaled_z = FixedPointMultiply(input_zero_point, fixed_point_multiplier_z, shift_z); + auto scaled_z = FixedPointMultiply(output_zero_point, fixed_point_multiplier_z, shift_z); + // alpha * Q_i + (1 - alpha) * zp_o Review Comment: Done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
