zxybazh commented on issue #10899:
URL: https://github.com/apache/tvm/issues/10899#issuecomment-1088152512
Here is the generated TIR script.
```
# from tvm.script import tir as T
@tvm.script.ir_module
class Module:
@T.prim_func
def main(placeholder: T.Buffer[(1, 512, 7, 7), "float32"], tensor:
T.Buffer[(1, 512, 1, 1), "float32"]) -> None:
# function attr dict
T.func_attr({"global_symbol": "main", "tir.noalias": True})
# body
# with T.block("root")
tensor_1 = T.alloc_buffer([1, 512, 1, 1], dtype="float32")
tensor_1_rf = T.alloc_buffer([1, 512, 1, 1, 49], dtype="float32")
for i0, i1, i2, i3, i4_i5_fused_0_i4_i5_fused_1_fused_0,
i4_i5_fused_0_i4_i5_fused_1_fused_1 in T.grid(1, 512, 1, 1, 49, 1):
with T.block("tensor_rf"):
vi4_i5_fused_0_i4_i5_fused_1_fused_0 = T.axis.spatial(49,
i4_i5_fused_0_i4_i5_fused_1_fused_0)
ax0 = T.axis.spatial(1, 0)
ax1 = T.axis.spatial(512, i1)
ax2 = T.axis.spatial(1, 0)
ax3 = T.axis.spatial(1, 0)
T.reads(placeholder[ax0, ax1, ax2 * 7 +
vi4_i5_fused_0_i4_i5_fused_1_fused_0 // 7, ax3 * 7 +
vi4_i5_fused_0_i4_i5_fused_1_fused_0 % 7])
T.writes(tensor_1_rf[ax0, ax1, ax2, ax3,
vi4_i5_fused_0_i4_i5_fused_1_fused_0])
with T.init():
tensor_1_rf[ax0, ax1, ax2, ax3,
vi4_i5_fused_0_i4_i5_fused_1_fused_0] = T.float32(0)
tensor_1_rf[ax0, ax1, ax2, ax3,
vi4_i5_fused_0_i4_i5_fused_1_fused_0] = tensor_1_rf[ax0, ax1, ax2, ax3,
vi4_i5_fused_0_i4_i5_fused_1_fused_0] + placeholder[ax0, ax1, ax2 * 7 +
vi4_i5_fused_0_i4_i5_fused_1_fused_0 // 7, ax3 * 7 +
vi4_i5_fused_0_i4_i5_fused_1_fused_0 % 7]
for i0, i1, i2, i3, i4_i5_fused_0_i4_i5_fused_1_fused_0,
i4_i5_fused_0_i4_i5_fused_1_fused_1 in T.grid(1, 512, 1, 1, 49, 1):
with T.block("tensor"):
vi4_i5_fused_0_i4_i5_fused_1_fused_0 = T.axis.reduce(49,
i4_i5_fused_0_i4_i5_fused_1_fused_0)
ax0 = T.axis.spatial(1, 0)
ax1 = T.axis.spatial(512, i1)
ax2 = T.axis.spatial(1, 0)
ax3 = T.axis.spatial(1, 0)
T.reads(tensor_1_rf[ax0, ax1, ax2, ax3,
vi4_i5_fused_0_i4_i5_fused_1_fused_0])
T.writes(tensor_1[ax0, ax1, ax2, ax3])
with T.init():
tensor_1[ax0, ax1, ax2, ax3] = T.float32(0)
tensor_1[ax0, ax1, ax2, ax3] = tensor_1[ax0, ax1, ax2, ax3]
+ tensor_1_rf[ax0, ax1, ax2, ax3, vi4_i5_fused_0_i4_i5_fused_1_fused_0]
for i0, i1, i2, i3 in T.grid(1, 512, 1, 1):
with T.block("tensor_1"):
ax0, ax1, ax2, ax3 = T.axis.remap("SSSS", [i0, i1, i2, i3])
T.reads(tensor_1[ax0, ax1, ax2, ax3])
T.writes(tensor[ax0, ax1, ax2, ax3])
tensor[ax0, ax1, ax2, ax3] = tensor_1[ax0, ax1, ax2, ax3] *
T.float32(0.020408163265306121)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]