Lunderberg commented on code in PR #11417:
URL: https://github.com/apache/tvm/pull/11417#discussion_r882050980


##########
python/tvm/topi/hexagon/utils.py:
##########
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tvm import te
+
+
+def n11c_1024c_2d(n, h, w, c):
+    return [n, h, w, c // 1024, te.AXIS_SEPARATOR, c % 1024]
+
+
+def n11c_1024c_1d(n, h, w, c):
+    return [n, h, w, c // 1024, c % 1024]
+
+
+def nhwc_8h2w32c2w_2d(n, h, w, c):
+    return [n, h // 8, w // 4, c // 32, te.AXIS_SEPARATOR, h % 8, (w % 4) // 
2, c % 32, w % 2]
+
+
+def nhwc_8h2w32c2w_1d(n, h, w, c):
+    return [n, h // 8, w // 4, c // 32, h % 8, (w % 4) // 2, c % 32, w % 2]
+
+
+def get_layout_transform_fn(layout):
+    if layout == "nhwc-8h2w32c2w-2d":
+        return nhwc_8h2w32c2w_2d
+    if layout == "nhwc-8h2w32c2w-1d":
+        return nhwc_8h2w32c2w_1d
+    elif layout == "n11c-1024c-2d":
+        return n11c_1024c_2d
+    elif layout == "n11c-1024c-1d":
+        return n11c_1024c_1d
+    else:
+        raise RuntimeError(f"Unexpected layout '{layout}'")
+
+
+def apply_transform(s, block, block_index: int, buffer_type: str, layout: str):
+    """Apply transform layout on a buffer
+
+    Parameters
+    ----------
+    s: Schedule
+    block : BlockRV
+        The block that accesses the target buffer
+    buffer_index: int
+        The index of the buffer in block's read or write region
+    buffer_type : str
+        Type of the buffer index, "read" or "write"
+    layout : str
+        Layout of the buffer
+    """
+    transform_fn = get_layout_transform_fn(layout)
+    if layout == "nhwc-8h2w32c2w-1d":
+        axis_separators = [4]
+    elif layout == "n11c-1024c-1d":
+        axis_separators = [2]
+    else:
+        raise RuntimeError(f"Unexpected layout '{layout}'")
+
+    s.transform_layout(block, block_index, buffer_type, transform_fn)

Review Comment:
   FYI, after #11269 lands, the calling `layout_transform` will also handle the 
call to `set_axis_separators`, so this function may become simpler or empty.



##########
python/tvm/topi/hexagon/slice_ops/avg_pool2d.py:
##########
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tvm.ir.module import IRModule
+from tvm import te
+from tvm import tir
+from tvm.script import tir as T
+from ..utils import apply_transform, get_layout_transform_fn
+
+
+# The slice op implementation for avg_pool2d makes serveral assumptions:
+# 1) Both input and output are a multiple of croutons, and the input is already
+#    padded for a given output shape as per any crouton and non-crouton related
+#    padding.
+# 2) The current implementation assumes 'count_include_pad' to be 'True'. It 
can
+#    modified to support 'False' but the element count for the pooling window 
must
+#    be pre-computed and provided as an input to reduce the run-time overhead.
+# 3) 'padding' is also ignored. It must be handled outside of the sliced op.
+# 4) Please note that this implementation will not work if the output was 
padded
+#    for the croutons. Since we loop over the logical output shape, this can 
result
+#    into out-of-bound access for the input.
+
+def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
+    kh, kw = kernel
+    rh = te.reduce_axis((0, kh), name="rh")
+    rw = te.reduce_axis((0, kw), name="rw")
+    ob, oh, ow, oc = out_shape
+    sh, sw = stride
+    dh, dw = dilation
+    Area = float(1) / (kh * kw)
+
+    Sum = te.compute(
+        out_shape,
+        lambda b, h, w, c: te.sum(
+            A[b, h * sh + dh * rh, w * sw + dw * rw, c].astype("float32"), 
axis=[rh, rw]
+        ),
+        name="sum",
+    )
+    Avg = te.compute(
+        out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * 
Area).astype(A.dtype), name="avg"

Review Comment:
   Nitpick: The name `Area` threw me a bit, as I initially thought `Area` 
should be the area of the kernel relative to a single value, rather than the 
area of a value relative to the kernel.  Can we rename `Area` to either 
`InvArea` or `NumValues`?



##########
python/tvm/topi/hexagon/slice_ops/avg_pool2d.py:
##########
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tvm.ir.module import IRModule
+from tvm import te
+from tvm import tir
+from tvm.script import tir as T
+from ..utils import apply_transform, get_layout_transform_fn
+
+
+# The slice op implementation for avg_pool2d makes serveral assumptions:
+# 1) Both input and output are a multiple of croutons, and the input is already
+#    padded for a given output shape as per any crouton and non-crouton related
+#    padding.
+# 2) The current implementation assumes 'count_include_pad' to be 'True'. It 
can
+#    modified to support 'False' but the element count for the pooling window 
must
+#    be pre-computed and provided as an input to reduce the run-time overhead.
+# 3) 'padding' is also ignored. It must be handled outside of the sliced op.
+# 4) Please note that this implementation will not work if the output was 
padded
+#    for the croutons. Since we loop over the logical output shape, this can 
result
+#    into out-of-bound access for the input.
+
+def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
+    kh, kw = kernel
+    rh = te.reduce_axis((0, kh), name="rh")
+    rw = te.reduce_axis((0, kw), name="rw")
+    ob, oh, ow, oc = out_shape
+    sh, sw = stride
+    dh, dw = dilation
+    Area = float(1) / (kh * kw)
+

Review Comment:
   Can we add validation to the `out_shape`, at least for static shapes?  Since 
the external handling of padding means that we can't compute `out_shape` from 
the other parameters, it would be good to validate that `out_shape` isn't too 
large.



##########
python/tvm/topi/hexagon/slice_ops/avg_pool2d.py:
##########
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tvm.ir.module import IRModule
+from tvm import te
+from tvm import tir
+from tvm.script import tir as T
+from ..utils import apply_transform, get_layout_transform_fn
+
+
+# The slice op implementation for avg_pool2d makes serveral assumptions:

Review Comment:
   Can this be moved from a comment to a docstring?



##########
tests/python/contrib/test_hexagon/infrastructure.py:
##########
@@ -228,3 +227,18 @@ def compute(n, ho, wo, ko, hi, wi, ki):
         )
 
     return output_shape, compute
+
+
+# Transpose and reshape numpy array according to the specified layout
+def transform_numpy(arr_np, layout):
+    if layout == "nhwc":
+        return arr_np
+    elif layout == "nhwc-8h2w32c2w":
+        N, H, W, C = arr_np.shape
+        return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 
32]).transpose(0, 1, 3, 6, 2, 4, 7, 5)
+    elif layout == "n11c-1024c":
+        N, H, W, C = arr_np.shape
+        assert (H == 1 and W == 1), "The size of H and W must be 1!"
+        return arr_np.reshape([N, C//1024, 1024]).transpose(0, 1, 2)

Review Comment:
   This doesn't agree with the definition given in 
`topi.hexagon.utils.n11c_1024c_1d`.  Even though it results in the same 
flattened shape when `H` and `W` are 1, they should still be included in order 
to pass the shape validation inserted in `MakePackedAPI`.  Also, the 
`.transpose` should only be required if the dimensions are being reordered.  
The reshape below should match the layout transform defined in 
`topi.hexagon.utils.n11c_1024c_1d`.
   
   ```python
   return arr_np.reshape([N, 1, 1, C//1024, 1024])
   ```



##########
python/tvm/topi/hexagon/slice_ops/avg_pool2d.py:
##########
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tvm.ir.module import IRModule
+from tvm import te
+from tvm import tir
+from tvm.script import tir as T
+from ..utils import apply_transform, get_layout_transform_fn
+
+
+# The slice op implementation for avg_pool2d makes serveral assumptions:
+# 1) Both input and output are a multiple of croutons, and the input is already
+#    padded for a given output shape as per any crouton and non-crouton related
+#    padding.
+# 2) The current implementation assumes 'count_include_pad' to be 'True'. It 
can
+#    modified to support 'False' but the element count for the pooling window 
must
+#    be pre-computed and provided as an input to reduce the run-time overhead.
+# 3) 'padding' is also ignored. It must be handled outside of the sliced op.
+# 4) Please note that this implementation will not work if the output was 
padded
+#    for the croutons. Since we loop over the logical output shape, this can 
result
+#    into out-of-bound access for the input.
+
+def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
+    kh, kw = kernel
+    rh = te.reduce_axis((0, kh), name="rh")
+    rw = te.reduce_axis((0, kw), name="rw")
+    ob, oh, ow, oc = out_shape
+    sh, sw = stride
+    dh, dw = dilation
+    Area = float(1) / (kh * kw)
+
+    Sum = te.compute(
+        out_shape,
+        lambda b, h, w, c: te.sum(
+            A[b, h * sh + dh * rh, w * sw + dw * rw, c].astype("float32"), 
axis=[rh, rw]
+        ),
+        name="sum",
+    )
+    Avg = te.compute(
+        out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * 
Area).astype(A.dtype), name="avg"
+    )
+    return Avg
+
+
+# Schedule for input and output layout nhwc-8h2w32c2w
+def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: 
str):
+    func = te.create_prim_func([ins, outs])
+    s = tir.Schedule(func)
+    Sum = s.get_block("sum")
+    Avg = s.get_block("avg")
+
+    apply_transform(s, Sum, 0, "read", input_layout)
+    apply_transform(s, Avg, 0, "write", output_layout)
+
+    # Schedule 'Sum'
+    bn, bh, bw, bc, rx, ry = s.get_loops(Sum)
+    bho, bhi = s.split(bh, [None, 8])
+    bwo, bwi = s.split(bw, [None, 4])
+    bwio, bwii = s.split(bwi, [None, 2])  # Doesn't seem to be doing anything
+    bco, bci = s.split(bc, [None, 32])
+    s.reorder(bn, bho, bwo, bco, bhi, bwio, rx, ry, bci, bwii)  # --- DOESN'T 
do anything
+    bci_wii = s.fuse(bci, bwii)  # --- DOESN'T do anything

Review Comment:
   Same question here, after fusing I see extents `T.grid(1, 1, 2, 1, 8, 2, 3, 
3, 64)` and can't reproduce the lack of effect.



##########
python/tvm/topi/hexagon/slice_ops/avg_pool2d.py:
##########
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tvm.ir.module import IRModule
+from tvm import te
+from tvm import tir
+from tvm.script import tir as T
+from ..utils import apply_transform, get_layout_transform_fn
+
+
+# The slice op implementation for avg_pool2d makes serveral assumptions:
+# 1) Both input and output are a multiple of croutons, and the input is already
+#    padded for a given output shape as per any crouton and non-crouton related
+#    padding.
+# 2) The current implementation assumes 'count_include_pad' to be 'True'. It 
can
+#    modified to support 'False' but the element count for the pooling window 
must
+#    be pre-computed and provided as an input to reduce the run-time overhead.
+# 3) 'padding' is also ignored. It must be handled outside of the sliced op.
+# 4) Please note that this implementation will not work if the output was 
padded
+#    for the croutons. Since we loop over the logical output shape, this can 
result
+#    into out-of-bound access for the input.
+
+def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
+    kh, kw = kernel
+    rh = te.reduce_axis((0, kh), name="rh")
+    rw = te.reduce_axis((0, kw), name="rw")
+    ob, oh, ow, oc = out_shape
+    sh, sw = stride
+    dh, dw = dilation
+    Area = float(1) / (kh * kw)
+
+    Sum = te.compute(
+        out_shape,
+        lambda b, h, w, c: te.sum(
+            A[b, h * sh + dh * rh, w * sw + dw * rw, c].astype("float32"), 
axis=[rh, rw]
+        ),
+        name="sum",
+    )
+    Avg = te.compute(
+        out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * 
Area).astype(A.dtype), name="avg"
+    )
+    return Avg
+
+
+# Schedule for input and output layout nhwc-8h2w32c2w
+def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: 
str):
+    func = te.create_prim_func([ins, outs])
+    s = tir.Schedule(func)
+    Sum = s.get_block("sum")
+    Avg = s.get_block("avg")
+
+    apply_transform(s, Sum, 0, "read", input_layout)
+    apply_transform(s, Avg, 0, "write", output_layout)
+
+    # Schedule 'Sum'
+    bn, bh, bw, bc, rx, ry = s.get_loops(Sum)
+    bho, bhi = s.split(bh, [None, 8])
+    bwo, bwi = s.split(bw, [None, 4])
+    bwio, bwii = s.split(bwi, [None, 2])  # Doesn't seem to be doing anything
+    bco, bci = s.split(bc, [None, 32])
+    s.reorder(bn, bho, bwo, bco, bhi, bwio, rx, ry, bci, bwii)  # --- DOESN'T 
do anything

Review Comment:
   What do you have before and after these lines?  Running the test case 
`test_avg_pool2d_slice.py::TestAvgPool2dSlice::test_avg_pool2d_slice[nhwc-8h2w32c2w-False-str
   
ide0-kernel0-float16-dilation0-padding0-True-nhwc-8h2w32c2w-output_shape0-False]`
 and using `print(s.mod.script())`, I can see the loopnest before this line to 
have extents `T.grid(1, 1, 8, 2, 2, 2, 1, 32, 3, 3)` and afterward to have 
extents `T.grid(1, 1, 2, 1, 8, 2, 3, 3, 32, 2)`, so it does look like the 
reorder is having an effect.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to