Alex-grovety commented on issue #8717:
URL: https://github.com/apache/tvm/issues/8717#issuecomment-915247652


   1.) TIR Primfunc for first example:
   ```
   PrimFunc([placeholder, placeholder, placeholder, T_cast]) 
attrs={"from_legacy_te_schedule": (bool)1, "global_symbol": 
"tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_21", 
"tir.noalias": (bool)1} {
     allocate DepthwiseConv2d[int32 * 401408], storage_scope = global
     for (i, 0, 56) {
       for (j, 0, 56) {
         for (c, 0, 128) {
           DepthwiseConv2d[(((i*7168) + (j*128)) + c)] = 0
           for (di, 0, 3) {
             for (dj, 0, 3) {
               DepthwiseConv2d[(((i*7168) + (j*128)) + c)] = 
(DepthwiseConv2d[(((i*7168) + (j*128)) + c)] + (int32(tir.if_then_else(((((1 <= 
(i + di)) && ((i + di) < 57)) && (1 <= (j + dj))) && ((j + dj) < 57)), 
placeholder[((((((i*7168) + (di*7168)) + (j*128)) + (dj*128)) + c) - 7296)], 
(int16)0))*int32(placeholder[(((di*384) + (dj*128)) + c)])))
             }
           }
         }
       }
     }
     for (ax0.ax1.fused.ax2.fused.ax3.fused, 0, 401408) {
       T_cast[ax0.ax1.fused.ax2.fused.ax3.fused] = 
int16(uint8(max(min(tir.q_multiply_shift((DepthwiseConv2d[ax0.ax1.fused.ax2.fused.ax3.fused]
 + placeholder[floormod(ax0.ax1.fused.ax2.fused.ax3.fused, 128)]), 2080045879, 
31, -4), 255), 0)))
     }
   }
   ```
   TIR Primfunc for second example:
   ```
   PrimFunc([placeholder, placeholder, placeholder, placeholder, T_cast]) 
attrs={"from_legacy_te_schedule": (bool)1, "global_symbol": 
"tvmgen_default_fused_nn_conv2d_subtract_add_fixed_point_multiply_clip_cast_10",
 "tir.noalias": (bool)1} {
     allocate PaddedInput[uint8 * 430592], storage_scope = global
     allocate DepthwiseConv2d[int32 * 401408], storage_scope = global
     for (i1, 0, 58) {
       for (i2, 0, 58) {
         for (i3, 0, 128) {
           PaddedInput[(((i1*7424) + (i2*128)) + i3)] = placeholder[(((i1*7424) 
+ (i2*128)) + i3)]
         }
       }
     }
     for (i, 0, 56) {
       for (j, 0, 56) {
         for (c, 0, 128) {
           DepthwiseConv2d[(((i*7168) + (j*128)) + c)] = 0
           for (di, 0, 3) {
             for (dj, 0, 3) {
               DepthwiseConv2d[(((i*7168) + (j*128)) + c)] = 
(DepthwiseConv2d[(((i*7168) + (j*128)) + c)] + (int32(PaddedInput[(((((i*7424) 
+ (di*7424)) + (j*128)) + (dj*128)) + c)])*int32(placeholder[(((di*384) + 
(dj*128)) + c)])))
             }
           }
         }
       }
     }
     for (ax1, 0, 56) {
       for (ax2, 0, 56) {
         for (ax3, 0, 128) {
           DepthwiseConv2d[(((ax1*7168) + (ax2*128)) + ax3)] = 
(DepthwiseConv2d[(((ax1*7168) + (ax2*128)) + ax3)] - placeholder[(((ax1*7168) + 
(ax2*128)) + ax3)])
         }
       }
     }
     for (ax1, 0, 56) {
       for (ax2, 0, 56) {
         for (ax3, 0, 128) {
           DepthwiseConv2d[(((ax1*7168) + (ax2*128)) + ax3)] = 
(DepthwiseConv2d[(((ax1*7168) + (ax2*128)) + ax3)] + placeholder[ax3])
         }
       }
     }
     for (i1, 0, 56) {
       for (i2, 0, 56) {
         for (i3, 0, 128) {
           DepthwiseConv2d[(((i1*7168) + (i2*128)) + i3)] = 
tir.q_multiply_shift(DepthwiseConv2d[(((i1*7168) + (i2*128)) + i3)], 
2080045879, 31, -4)
         }
       }
     }
     for (i1, 0, 56) {
       for (i2, 0, 56) {
         for (i3, 0, 128) {
           DepthwiseConv2d[(((i1*7168) + (i2*128)) + i3)] = 
max(min(DepthwiseConv2d[(((i1*7168) + (i2*128)) + i3)], 255), 0)
         }
       }
     }
     for (ax1, 0, 56) {
       for (ax2, 0, 56) {
         for (ax3, 0, 128) {
           T_cast[(((ax1*7168) + (ax2*128)) + ax3)] = 
uint8(DepthwiseConv2d[(((ax1*7168) + (ax2*128)) + ax3)])
         }
       }
     }
   }
   ```
   
   3.) For second example with fusion we get (from default_lib1.c for 
test_quant_mobilenet_tfl):
   ```
   TVM_DLL int32_t 
tvmgen_default_fused_nn_conv2d_subtract_add_fixed_point_multiply_clip_cast_10(void*
 args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* 
out_ret_tcode, void* resource_handle) {
     void* arg0 = (((TVMValue*)args)[0].v_handle);
     int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)];
     void* arg1 = (((TVMValue*)args)[1].v_handle);
     int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)];
     void* arg2 = (((TVMValue*)args)[2].v_handle);
     int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)];
     void* arg3 = (((TVMValue*)args)[3].v_handle);
     int32_t arg3_code = ((int32_t*)arg_type_ids)[(3)];
     void* arg4 = (((TVMValue*)args)[4].v_handle);
     int32_t arg4_code = ((int32_t*)arg_type_ids)[(4)];
     void* placeholder = (((DLTensor*)arg0)[0].data);
     void* arg0_shape = (((DLTensor*)arg0)[0].shape);
     void* arg0_strides = (((DLTensor*)arg0)[0].strides);
     int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id);
     void* placeholder1 = (((DLTensor*)arg1)[0].data);
     void* arg1_shape = (((DLTensor*)arg1)[0].shape);
     void* arg1_strides = (((DLTensor*)arg1)[0].strides);
     void* placeholder2 = (((DLTensor*)arg2)[0].data);
     void* arg2_shape = (((DLTensor*)arg2)[0].shape);
     void* arg2_strides = (((DLTensor*)arg2)[0].strides);
     void* placeholder3 = (((DLTensor*)arg3)[0].data);
     void* arg3_shape = (((DLTensor*)arg3)[0].shape);
     void* arg3_strides = (((DLTensor*)arg3)[0].strides);
     void* T_cast = (((DLTensor*)arg4)[0].data);
     void* arg4_shape = (((DLTensor*)arg4)[0].shape);
     void* arg4_strides = (((DLTensor*)arg4)[0].strides);
     if (!(arg0_strides == NULL)) {
     }
     if (!(arg1_strides == NULL)) {
     }
     if (!(arg2_strides == NULL)) {
     }
     if (!(arg3_strides == NULL)) {
     }
     if (!(arg4_strides == NULL)) {
     }
     void* DepthwiseConv2d = TVMBackendAllocWorkspace(1, dev_id, 
(uint64_t)1605632, 0, 32);
     if (DepthwiseConv2d == NULL) {
       return -1;
     }
     for (int32_t i = 0; i < 56; ++i) {
       for (int32_t j = 0; j < 56; ++j) {
         for (int32_t c = 0; c < 128; ++c) {
           ((int32_t*)DepthwiseConv2d)[((((i * 7168) + (j * 128)) + c))] = 0;
           for (int32_t di = 0; di < 3; ++di) {
             for (int32_t dj = 0; dj < 3; ++dj) {
               ((int32_t*)DepthwiseConv2d)[((((i * 7168) + (j * 128)) + c))] = 
(((int32_t*)DepthwiseConv2d)[((((i * 7168) + (j * 128)) + c))] + 
(((int32_t)((uint8_t*)placeholder)[((((((i * 7424) + (di * 7424)) + (j * 128)) 
+ (dj * 128)) + c))]) * ((int32_t)((int8_t*)placeholder1)[((((di * 384) + (dj * 
128)) + c))])));
             }
           }
         }
       }
     }
     for (int32_t ax0_ax1_fused_ax2_fused_ax3_fused = 0; 
ax0_ax1_fused_ax2_fused_ax3_fused < 401408; 
++ax0_ax1_fused_ax2_fused_ax3_fused) {
       int32_t _1 = (int32_t)(((((0 != 0) ? 
(((int64_t)((((int32_t*)DepthwiseConv2d)[(ax0_ax1_fused_ax2_fused_ax3_fused)] + 
((int32_t*)placeholder3)[((ax0_ax1_fused_ax2_fused_ax3_fused & 127))]) - 
((int32_t*)placeholder2)[(ax0_ax1_fused_ax2_fused_ax3_fused)])) << 
((int64_t)0)) : 
((int64_t)((((int32_t*)DepthwiseConv2d)[(ax0_ax1_fused_ax2_fused_ax3_fused)] + 
((int32_t*)placeholder3)[((ax0_ax1_fused_ax2_fused_ax3_fused & 127))]) - 
((int32_t*)placeholder2)[(ax0_ax1_fused_ax2_fused_ax3_fused)]))) * 
(int64_t)2080045879) + ((int64_t)1 << ((int64_t)((4 + 31) - 1)))) >> 
((int64_t)(4 + 31)));
       int32_t _2 = (_1) < (255) ? (_1) : (255);
       ((uint8_t*)T_cast)[(ax0_ax1_fused_ax2_fused_ax3_fused)] = 
((uint8_t)((_2) > (0) ? (_2) : (0)));
     }
     if (TVMBackendFreeWorkspace(1, dev_id, DepthwiseConv2d) != 0) {
       return -1;
     }
     return 0;
   }
   ```
   TIR Primfunc:
   ```
   PrimFunc([placeholder, placeholder, placeholder, placeholder, T_cast]) 
attrs={"from_legacy_te_schedule": (bool)1, "global_symbol": 
"tvmgen_default_fused_nn_conv2d_subtract_add_fixed_point_multiply_clip_cast_10",
 "tir.noalias": (bool)1} {
     allocate DepthwiseConv2d[int32 * 401408], storage_scope = global
     for (i, 0, 56) {
       for (j, 0, 56) {
         for (c, 0, 128) {
           DepthwiseConv2d[(((i*7168) + (j*128)) + c)] = 0
           for (di, 0, 3) {
             for (dj, 0, 3) {
               DepthwiseConv2d[(((i*7168) + (j*128)) + c)] = 
(DepthwiseConv2d[(((i*7168) + (j*128)) + c)] + (int32(placeholder[(((((i*7424) 
+ (di*7424)) + (j*128)) + (dj*128)) + c)])*int32(placeholder[(((di*384) + 
(dj*128)) + c)])))
             }
           }
         }
       }
     }
     for (ax0.ax1.fused.ax2.fused.ax3.fused, 0, 401408) {
       T_cast[ax0.ax1.fused.ax2.fused.ax3.fused] = 
uint8(max(min(tir.q_multiply_shift(((DepthwiseConv2d[ax0.ax1.fused.ax2.fused.ax3.fused]
 + placeholder[floormod(ax0.ax1.fused.ax2.fused.ax3.fused, 128)]) - 
placeholder[ax0.ax1.fused.ax2.fused.ax3.fused]), 2080045879, 31, -4), 255), 0))
     }
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to