junrushao1994 commented on code in PR #12246:
URL: https://github.com/apache/tvm/pull/12246#discussion_r933939884
##########
tests/python/unittest/test_meta_schedule_space_cpu.py:
##########
@@ -1201,6 +1201,180 @@ def gmm_2(X: T.Buffer[(1, 128, 128), "float32"], Y:
T.Buffer[(1, 128, 128), "flo
)
+def test_cpu_grp():
+ # fmt: off
+ @T.prim_func
+ def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight:
T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128),
"float32"]) -> None:
+ # function attr dict
+ T.func_attr({"global_symbol": "main", "tir.noalias": True})
+ # body
+ with T.block("root"):
+ T.reads()
+ T.writes()
+ T.block_attr({"meta_schedule.parallel":288,
"meta_schedule.unroll_explicit":16, "meta_schedule.vectorize":64})
+ PadInput = T.alloc_buffer([1, 58, 58, 64], dtype="float32")
+ conv2d_nhwc_global = T.alloc_buffer([1, 28, 28, 128],
dtype="float32")
+ for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 7, 1, 2):
+ for ax0, ax1, ax2, ax3 in T.grid(1, 9, 57, 32):
+ with T.block("PadInput"):
+ i0 = T.axis.spatial(1, ax0)
+ i1 = T.axis.spatial(58, i1_0 * 8 + ax1)
+ i2 = T.axis.spatial(58, ax2)
+ i3 = T.axis.spatial(64, i3_0 * 32 + ax3)
+ T.reads(inputs[i0, i1 - 1, i2 - 1, i3])
+ T.writes(PadInput[i0, i1, i2, i3])
+ PadInput[i0, i1, i2, i3] = T.if_then_else(1 <= i1 and
i1 < 57 and 1 <= i2 and i2 < 57, inputs[i0, i1 - 1, i2 - 1, i3], T.float32(0),
dtype="float32")
+ for i0_1, i1_1, i2_1, i3_1 in T.grid(1, 4, 1, 1):
+ for i4_0, i5_0, i6_0, i0_2, i1_2, i2_2, i3_2, i4_1, i5_1,
i6_1, i0_3, i1_3, i2_3, i3_3 in T.grid(1, 3, 8, 1, 1, 4, 4, 3, 1, 2, 1, 1, 7,
16):
+ with T.block("conv2d_nhwc"):
+ n = T.axis.spatial(1, i0_3 + i0_0 + i0_1 + i0_2)
+ h = T.axis.spatial(28, i1_0 * 4 + i1_1 + i1_2 +
i1_3)
+ w = T.axis.spatial(28, i2_0 * 28 + i2_1 * 28 +
i2_2 * 7 + i2_3)
+ co = T.axis.spatial(128, i3_0 * 64 + i3_1 * 64 +
i3_2 * 16 + i3_3)
+ rh = T.axis.reduce(3, i4_0 * 3 + i4_1)
+ rw = T.axis.reduce(3, i5_0 + i5_1)
+ rc = T.axis.reduce(16, i6_0 * 2 + i6_1)
+ T.reads(PadInput[n, h * 2 + rh, w * 2 + rw, co //
32 * 16 + rc], weight[rh, rw, rc, co])
+ T.writes(conv2d_nhwc_global[n, h, w, co])
+
T.block_attr({"meta_schedule.tiling_structure":"SSRSRS"})
+ with T.init():
+ conv2d_nhwc_global[n, h, w, co] = T.float32(0)
+ conv2d_nhwc_global[n, h, w, co] =
conv2d_nhwc_global[n, h, w, co] + PadInput[n, h * 2 + rh, w * 2 + rw, co // 32
* 16 + rc] * weight[rh, rw, rc, co]
+ for ax0, ax1, ax2, ax3 in T.grid(1, 1, 28, 64):
+ with T.block("conv2d_nhwc_global"):
+ v0 = T.axis.spatial(1, ax0)
+ v1 = T.axis.spatial(28, i1_0 * 4 + i1_1 + ax1)
+ v2 = T.axis.spatial(28, ax2)
+ v3 = T.axis.spatial(128, i3_0 * 64 + ax3)
+ T.reads(conv2d_nhwc_global[v0, v1, v2, v3])
+ T.writes(conv2d_nhwc[v0, v1, v2, v3])
+ conv2d_nhwc[v0, v1, v2, v3] =
conv2d_nhwc_global[v0, v1, v2, v3]
+ @T.prim_func
Review Comment:
it's probably not a problem here, but if those functions are in global
scope, i would prefer adding blank lines in-between
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]