manupa-arm commented on a change in pull request #9531:
URL: https://github.com/apache/tvm/pull/9531#discussion_r763209660



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +220,197 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay 
partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, 
kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, 
%req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are 
filled into call_extern
+    // 
https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = 
GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = 
GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = 
tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), 
ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    Array<PrimExpr> input_shape = 
fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    tvm::Array<PrimExpr> call_ext_args = 
{tir::StringImm("arm_fully_connected_s8"), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = 
{tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);

Review comment:
       [Clarity] I think we can bring the declaration "bias_shape" closer to 
here as it is not used before this

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -194,7 +180,7 @@ class RelayToTIRVisitor : public MixedModeMutator {
     if (depth_multiplier != -1) {
       cmsisnn_api = "arm_depthwise_conv_wrapper_s8";
       Array<PrimExpr> depthwise_filter_shape{1, filter_shape[0], 
filter_shape[1], out_channels};

Review comment:
       Is it possible to use something like 
   `
   int kernel_pos_o = kernel_layout.find("O");
   `
   instead of the numbers ? 

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +220,197 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay 
partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, 
kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, 
%req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are 
filled into call_extern
+    // 
https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = 
GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = 
GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = 
tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), 
ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    Array<PrimExpr> input_shape = 
fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    tvm::Array<PrimExpr> call_ext_args = 
{tir::StringImm("arm_fully_connected_s8"), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = 
{tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape);

Review comment:
       [Clarity] I think we can bring the declaration "cmsisnn_output_shape" 
closer to here as it is not used before this

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -164,21 +156,15 @@ class RelayToTIRVisitor : public MixedModeMutator {
                                         ToArg(dilation_w),   
ToArg(dilation_h),    ToArg(clip_min),
                                         ToArg(clip_max)};
 
-    // cmsis_nn_dims *input_dims (NHWC)
+    // layout NHWC

Review comment:
       Sorry Ashutosh, I dont still follow what this comment mean. How is this 
related to the following line ?

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -164,21 +156,15 @@ class RelayToTIRVisitor : public MixedModeMutator {
                                         ToArg(dilation_w),   
ToArg(dilation_h),    ToArg(clip_min),
                                         ToArg(clip_max)};
 
-    // cmsis_nn_dims *input_dims (NHWC)
+    // layout NHWC
     Array<PrimExpr> input_shape = 
conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
-    Array<PrimExpr> input_dims = CMSISNNDimensions(input_shape);
 
-    // cmsis_nn_dims *filter_dims (OHWI for Conv2D and IHWO for depthwise)
+    // OHWI for Conv2D and IHWO for depthwise

Review comment:
       Same here, where is the information OHWI is used ?

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +220,197 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay 
partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, 
kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, 
%req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are 
filled into call_extern
+    // 
https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = 
GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = 
GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = 
tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), 
ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    Array<PrimExpr> input_shape = 
fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    tvm::Array<PrimExpr> call_ext_args = 
{tir::StringImm("arm_fully_connected_s8"), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = 
{tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);

Review comment:
       [Clarity] I think we can bring the declaration "cmsisnn_input_shape"  
closer to here as it is not used before this

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +220,197 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay 
partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, 
kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, 
%req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are 
filled into call_extern
+    // 
https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = 
GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = 
GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = 
tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), 
ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    Array<PrimExpr> input_shape = 
fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    tvm::Array<PrimExpr> call_ext_args = 
{tir::StringImm("arm_fully_connected_s8"), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = 
{tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);

Review comment:
       [Clarity] I think we can bring the declaration "cmsisnn_filter_shape" 
closer to here as it is not used before this




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to