JosseVanDelm opened a new issue, #12381:
URL: https://github.com/apache/tvm/issues/12381
Hi, there seems to be a mismatch in the behavior reported by relay code and
topi generated code when it comes to average pooling.
TVM's generated output buffer is typed as 32 bits (`int32_t`) even though it
should be 8 bits (`int8_t`).
This is a problem since the code following this kernel call will interpret
the 32 bits as a 8 bit buffer.
Manually changing the code from expected to actual behavior solves the issue.
I can not find a way to set the output datatype or workaround this issue.
This issue is not present in the max pooling operator.
### Expected behavior
```c
TVM_DLL int32_t tvmgen_default_fused_nn_avg_pool2d(int8_t* placeholder,
int8_t* tensor) {
int8_t tensor1[64];
for (int32_t ax0_ax1_fused_ax2_fused = 0; ax0_ax1_fused_ax2_fused < 16;
++ax0_ax1_fused_ax2_fused) {
for (int32_t ax4_init = 0; ax4_init < 4; ++ax4_init) {
tensor1[((ax0_ax1_fused_ax2_fused * 4) + ax4_init)] = (int8_t)0;
}
for (int32_t rv0_rv1_fused = 0; rv0_rv1_fused < 64; ++rv0_rv1_fused) {
for (int32_t ax4 = 0; ax4 < 4; ++ax4) {
int32_t cse_var_1 = ((ax0_ax1_fused_ax2_fused * 4) + ax4);
tensor1[cse_var_1] = (tensor1[cse_var_1] +
placeholder[(((ax0_ax1_fused_ax2_fused * 256) + (rv0_rv1_fused * 4)) + ax4)]);
}
}
}
for (int32_t ax0_ax1_fused = 0; ax0_ax1_fused < 16; ++ax0_ax1_fused) {
for (int32_t ax41 = 0; ax41 < 4; ++ax41) {
int32_t cse_var_2 = ((ax0_ax1_fused * 4) + ax41);
tensor[cse_var_2] = (((int8_t)tensor1[cse_var_2]) / 64);
}
}
return 0;
}
```
### Actual behavior
```c
TVM_DLL int32_t tvmgen_default_fused_nn_avg_pool2d(int8_t* placeholder,
int32_t* tensor) {
int8_t tensor1[64];
for (int32_t ax0_ax1_fused_ax2_fused = 0; ax0_ax1_fused_ax2_fused < 16;
++ax0_ax1_fused_ax2_fused) {
for (int32_t ax4_init = 0; ax4_init < 4; ++ax4_init) {
tensor1[((ax0_ax1_fused_ax2_fused * 4) + ax4_init)] = (int8_t)0;
}
for (int32_t rv0_rv1_fused = 0; rv0_rv1_fused < 64; ++rv0_rv1_fused) {
for (int32_t ax4 = 0; ax4 < 4; ++ax4) {
int32_t cse_var_1 = ((ax0_ax1_fused_ax2_fused * 4) + ax4);
tensor1[cse_var_1] = (tensor1[cse_var_1] +
placeholder[(((ax0_ax1_fused_ax2_fused * 256) + (rv0_rv1_fused * 4)) + ax4)]);
}
}
}
for (int32_t ax0_ax1_fused = 0; ax0_ax1_fused < 16; ++ax0_ax1_fused) {
for (int32_t ax41 = 0; ax41 < 4; ++ax41) {
int32_t cse_var_2 = ((ax0_ax1_fused * 4) + ax41);
tensor[cse_var_2] = (((int32_t)tensor1[cse_var_2]) / 64);
}
}
return 0;
}
```
### Environment
Our own tvm-fork, rebased on e0a0e67e55c036077fd07728ac28ecd7d2c9d72b
running on ubuntu 20.04
### Steps to reproduce
```python
def create_model():
x = relay.var("input", relay.TensorType((1,1,16,16), 'int8'))
x = relay.nn.avg_pool2d(x, pool_size=(2,2), strides=(2,2),
padding=[0,0,0,0])
x = relay.reshape(x, (1,8*8))
fc_weights_name = "fc_weights"
fc_weights_shape = (10,64)
fc_weights = relay.var(fc_weights_name,
relay.TensorType(fc_weights_shape, "int8"))
params = {fc_weights_name: tvm.nd.array(np.ones(fc_weights_shape,
dtype=np.int8))}
x = relay.nn.dense(x, fc_weights, out_dtype="int8")
# create an IR module from the relay expression
mod = tvm.ir.IRModule()
mod = mod.from_expr(x)
return mod, params
```
The mod and params can be compiled with TVMC and the C aot runtime
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]