This is an automated email from the ASF dual-hosted git repository.
masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 6f600f1 [Refactor] Remove dead code from depthwise_conv2d for Intel
graphics (#8381)
6f600f1 is described below
commit 6f600f1a52c07cea4c1c19c0f017719be76974eb
Author: Egor Churaev <[email protected]>
AuthorDate: Fri Jul 2 09:30:50 2021 +0300
[Refactor] Remove dead code from depthwise_conv2d for Intel graphics (#8381)
After fix a66186b, I saw that it should be necessary to do the same fix
for depthwise_conv2d for intel graphics. I saw that we never used the
removed code and it is just the same code from
cuda/depthwise_conv2d.py. So we can use the cuda implementation when it
will be necessary.
---
python/tvm/topi/intel_graphics/depthwise_conv2d.py | 183 ---------------------
1 file changed, 183 deletions(-)
diff --git a/python/tvm/topi/intel_graphics/depthwise_conv2d.py
b/python/tvm/topi/intel_graphics/depthwise_conv2d.py
index fabd63b..02af465 100644
--- a/python/tvm/topi/intel_graphics/depthwise_conv2d.py
+++ b/python/tvm/topi/intel_graphics/depthwise_conv2d.py
@@ -20,7 +20,6 @@ import tvm
from tvm import te
from tvm import autotvm
from ..utils import traverse_inline
-from .. import tag
from .. import nn
from ..nn.depthwise_conv2d import depthwise_conv2d_infer_layout
@@ -136,188 +135,6 @@ def schedule_depthwise_conv2d_nchw(cfg, outs):
return s
-def schedule_depthwise_conv2d_nhwc(outs):
- """Schedule for depthwise_conv2d nhwc forward.
-
- Parameters
- ----------
- outs: Array of Tensor
- The computation graph description of depthwise_conv2d
- in the format of an array of tensors.
-
- Returns
- -------
- s: Schedule
- The computation schedule for depthwise_conv2d nhwc.
- """
- outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
- s = te.create_schedule([x.op for x in outs])
-
- def _schedule(temp, Filter, DepthwiseConv2d):
- s[temp].compute_inline()
- FS = s.cache_read(Filter, "shared", [DepthwiseConv2d])
- if DepthwiseConv2d.op in s.outputs:
- Output = DepthwiseConv2d
- CL = s.cache_write(DepthwiseConv2d, "local")
- else:
- Output = outs[0].op.output(0)
- s[DepthwiseConv2d].set_scope("local")
-
- block_x = te.thread_axis("blockIdx.x")
- thread_x = te.thread_axis("threadIdx.x")
-
- b, h, w, c = s[Output].op.axis
-
- # num_thread here could be 728, it is larger than cuda.max_num_threads
- num_thread = tvm.arith.Analyzer().simplify(temp.shape[3]).value
- target = tvm.target.Target.current()
- if target and (target.kind.name not in ["cuda", "nvptx"]):
- num_thread = target.max_num_threads
- xoc, xic = s[Output].split(c, factor=num_thread)
- s[Output].reorder(xoc, b, h, w, xic)
- xo, yo, _, _ = s[Output].tile(h, w, x_factor=2, y_factor=2)
- fused = s[Output].fuse(yo, xo)
- fused = s[Output].fuse(fused, b)
- fused = s[Output].fuse(fused, xoc)
-
- s[Output].bind(fused, block_x)
- s[Output].bind(xic, thread_x)
-
- if DepthwiseConv2d.op in s.outputs:
- s[CL].compute_at(s[Output], xic)
- else:
- s[DepthwiseConv2d].compute_at(s[Output], xic)
-
- _, _, ci, fi = s[FS].op.axis
- s[FS].compute_at(s[Output], fused)
- fused = s[FS].fuse(fi, ci)
- s[FS].bind(fused, thread_x)
-
- scheduled_ops = []
-
- def traverse(OP):
- """Internal traverse function"""
- # inline all one-to-one-mapping operators except the last stage
(output)
- if tag.is_broadcast(OP.tag):
- if OP not in s.outputs:
- s[OP].compute_inline()
- for tensor in OP.input_tensors:
- if tensor.op.input_tensors and tensor.op not in scheduled_ops:
- traverse(tensor.op)
- # schedule depthwise_conv2d
- if OP.tag == "depthwise_conv2d_nhwc":
- PaddedInput = OP.input_tensors[0]
- Filter = OP.input_tensors[1]
- if isinstance(Filter.op, tvm.te.ComputeOp) and "dilate" in
Filter.op.tag:
- s[Filter].compute_inline()
- DepthwiseConv2d = OP.output(0)
- _schedule(PaddedInput, Filter, DepthwiseConv2d)
-
- scheduled_ops.append(OP)
-
- traverse(outs[0].op)
- return s
-
-
-def schedule_depthwise_conv2d_backward_input_nhwc(outs):
- """Schedule for depthwise_conv2d nhwc backward wrt input.
-
- Parameters
- ----------
- outs: Array of Tensor
- The computation graph description of depthwise_conv2d
- backward wrt input in the format of an array of tensors.
-
- Returns
- -------
- s: Schedule
- The computation schedule for depthwise_conv2d backward
- wrt input with layout nhwc.
- """
- outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
- s = te.create_schedule([x.op for x in outs])
-
- def _schedule(Padded_out_grad, In_grad):
- s[Padded_out_grad].compute_inline()
-
- block_x = te.thread_axis("blockIdx.x")
- thread_x = te.thread_axis("threadIdx.x")
- _, h, w, c = In_grad.op.axis
-
- fused_hwc = s[In_grad].fuse(h, w, c)
- xoc, xic = s[In_grad].split(fused_hwc, factor=128)
-
- s[In_grad].bind(xoc, block_x)
- s[In_grad].bind(xic, thread_x)
-
- def traverse(OP):
- # inline all one-to-one-mapping operators except the last stage
(output)
- if OP.tag == "depthwise_conv2d_backward_input_nhwc":
- Padded_out_grad = OP.input_tensors[0]
- Dilated_out_grad = Padded_out_grad.op.input_tensors[0]
- s[Dilated_out_grad].compute_inline()
- In_grad = OP.output(0)
- _schedule(Padded_out_grad, In_grad)
- else:
- raise ValueError("Depthwise conv backward wrt input for non-NHWC
is not supported.")
-
- traverse(outs[0].op)
- return s
-
-
-def schedule_depthwise_conv2d_backward_weight_nhwc(outs):
- """Schedule for depthwise_conv2d nhwc backward wrt weight.
-
- Parameters
- ----------
- outs: Array of Tensor
- The computation graph description of depthwise_conv2d
- backward wrt weight in the format of an array of tensors.
-
- Returns
- -------
- s: Schedule
- The computation schedule for depthwise_conv2d backward
- wrt weight with layout nhwc.
- """
- outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
- s = te.create_schedule([x.op for x in outs])
-
- def _schedule(Weight_grad):
- block_x = te.thread_axis("blockIdx.x")
- thread_y = te.thread_axis("threadIdx.y")
- thread_x = te.thread_axis("threadIdx.x")
-
- db, dh, dw = Weight_grad.op.reduce_axis
-
- fused_dbdhdw = s[Weight_grad].fuse(db, dh, dw)
- _, ki = s[Weight_grad].split(fused_dbdhdw, factor=8)
- BF = s.rfactor(Weight_grad, ki)
-
- fused_fwcm = s[Weight_grad].fuse(*s[Weight_grad].op.axis)
-
- xo, xi = s[Weight_grad].split(fused_fwcm, factor=32)
-
- s[Weight_grad].bind(xi, thread_x)
- s[Weight_grad].bind(xo, block_x)
-
- s[Weight_grad].bind(s[Weight_grad].op.reduce_axis[0], thread_y)
- s[BF].compute_at(s[Weight_grad], s[Weight_grad].op.reduce_axis[0])
-
- def traverse(OP):
- # inline all one-to-one-mapping operators except the last stage
(output)
- if OP.tag == "depthwise_conv2d_backward_weight_nhwc":
- Padded_in = OP.input_tensors[1]
- s[Padded_in].compute_inline()
- Weight_grad = OP.output(0)
- _schedule(Weight_grad)
- else:
- raise ValueError("Depthwise conv backward wrt weight for non-NHWC
is not supported.")
-
- traverse(outs[0].op)
- return s
-
-
@depthwise_conv2d_infer_layout.register("intel_graphics")
def _depthwise_conv2d_infer_layout(workload, _):
"""Infer input/output shapes and layouts from a workload and cfg.