[incubator-tvm] branch master updated: add dilation in x86 NCHWc depthwise conv support (#4962) (#6267)

masahi Thu, 13 Aug 2020 22:17:54 -0700

This is an automated email from the ASF dual-hosted git repository.

masahi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git



The following commit(s) were added to refs/heads/master by this push:
     new ad0dbe0  add dilation in x86 NCHWc depthwise conv support (#4962) 
(#6267)
ad0dbe0 is described below

commit ad0dbe0332c05c80152aa1eb274aad591229231a
Author: wjliu <[email protected]>
AuthorDate: Fri Aug 14 13:17:38 2020 +0800

    add dilation in x86 NCHWc depthwise conv support (#4962) (#6267)
---
 python/tvm/topi/x86/depthwise_conv2d.py                | 18 ++++++++++++------
 tests/python/frontend/pytorch/test_forward.py          |  7 +------
 tests/python/topi/python/test_topi_depthwise_conv2d.py |  7 ++++---
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/python/tvm/topi/x86/depthwise_conv2d.py 
b/python/tvm/topi/x86/depthwise_conv2d.py
index 0976c33..acbe0f7 100644
--- a/python/tvm/topi/x86/depthwise_conv2d.py
+++ b/python/tvm/topi/x86/depthwise_conv2d.py
@@ -122,13 +122,18 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, 
padding, dilation,
 
     strides = strides if isinstance(strides, (tuple, list)) else (strides, 
strides)
     HSTR, WSTR = strides
-    pad_top, pad_left, pad_down, pad_right = get_pad_tuple(padding, 
(filter_height, filter_width))
 
     dh, dw = dilation if isinstance(dilation, (tuple, list)) else (dilation, 
dilation)
-    assert (dh, dw) == (1, 1), "Does not support dilation"
 
-    out_height = (in_height - filter_height + pad_top + pad_down) // HSTR + 1
-    out_width = (in_width - filter_width + pad_left + pad_right) // WSTR + 1
+    dilated_kernel_h = (filter_height - 1) * dh + 1
+    dilated_kernel_w = (filter_width - 1) * dw + 1
+    pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
+        padding, (dilated_kernel_h, dilated_kernel_w))
+    HPAD = pad_top + pad_down
+    WPAD = pad_left + pad_right
+
+    out_height = (in_height + HPAD - dilated_kernel_h) // HSTR + 1
+    out_width = (in_width + WPAD - dilated_kernel_w) // WSTR + 1
 
     cfg.define_split("tile_ic", in_channel, num_outputs=2)
     cfg.define_split("tile_oc", out_channel, num_outputs=2)
@@ -140,7 +145,7 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, 
padding, dilation,
         te.placeholder((batch, in_channel, in_height, in_width), 
dtype=data.dtype),
         te.placeholder((out_channel, channel_multiplier, filter_height, 
filter_width),
                        dtype=kernel.dtype),
-        strides, padding, out_dtype)
+        strides, (pad_top, pad_down), out_dtype)
     if cfg.is_fallback:
         _fallback_schedule(cfg, wkl)
 
@@ -172,6 +177,7 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, 
padding, dilation,
     else:
         data_pad = data
 
+
     # depthconv stage
     idxdiv = tvm.tir.indexdiv
     idxmod = tvm.tir.indexmod
@@ -184,7 +190,7 @@ def depthwise_conv2d_NCHWc(cfg, data, kernel, strides, 
padding, dilation,
             (data_pad[
                 b,
                 idxdiv(idxdiv(oco * out_channel_block + oci, 
channel_multiplier), in_channel_block),
-                oh*HSTR+kh, ow*WSTR+kw,
+                oh*HSTR+kh*dh, ow*WSTR+kw*dw,
                 idxmod(idxdiv(oco * out_channel_block + oci, 
channel_multiplier), in_channel_block)]
              .astype(out_dtype) *
              kernel[oco, 0, kh, kw, 0, oci].astype(out_dtype)),
diff --git a/tests/python/frontend/pytorch/test_forward.py 
b/tests/python/frontend/pytorch/test_forward.py
index ae03a70..88203f5 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -1552,12 +1552,7 @@ def test_segmentaton_models():
     inp = [torch.rand((1, 3, 300, 300), dtype=torch.float)]
 
     verify_model(SegmentationModelWrapper(fcn.eval()), inp, atol=1e-4, 
rtol=1e-4)
-
-    # depthwise + dilated covolution not supported on x86
-    # see https://github.com/apache/incubator-tvm/issues/4962
-    cuda_ctx = ("cuda", tvm.gpu(0))
-    if cuda_ctx[1].exist:
-        verify_model(SegmentationModelWrapper(deeplab.eval()), inp, 
[cuda_ctx], atol=1e-4, rtol=1e-4)
+    verify_model(SegmentationModelWrapper(deeplab.eval()), inp, atol=1e-4, 
rtol=1e-4)
 
 
 def test_3d_models():
diff --git a/tests/python/topi/python/test_topi_depthwise_conv2d.py 
b/tests/python/topi/python/test_topi_depthwise_conv2d.py
index 93a166d..5497e11 100644
--- a/tests/python/topi/python/test_topi_depthwise_conv2d.py
+++ b/tests/python/topi/python/test_topi_depthwise_conv2d.py
@@ -269,7 +269,6 @@ def depthwise_conv2d_with_workload_NCHWc(batch, in_channel, 
in_height, channel_m
     filter_width = filter_height
     stride_h = stride_w = stride
 
-    assert dilation == 1, "depthwise_conv2d_NCHWc currently does not support 
dilation."
     assert channel_multiplier == 1, "depthwise_conv2d_NCHWc currently does not 
support channel multiplier > 1."
     pad_h, pad_w, _, _ = get_pad_tuple(padding, (filter_height, filter_width))
     padding_args = (pad_h, pad_w)
@@ -307,7 +306,7 @@ def depthwise_conv2d_with_workload_NCHWc(batch, in_channel, 
in_height, channel_m
             # declare
             DepthwiseConv2d = topi.x86.depthwise_conv2d_NCHWc(Input, Filter,
                                                               (stride_h, 
stride_w),
-                                                              padding_args,
+                                                              padding,
                                                               (dilation, 
dilation),
                                                               in_layout,
                                                               out_layout, 
dtype)
@@ -330,8 +329,9 @@ def depthwise_conv2d_with_workload_NCHWc(batch, in_channel, 
in_height, channel_m
             input_np = np.random.uniform(size=input_shape).astype(dtype)
             filter_np = np.random.uniform(size=filter_shape).astype(dtype)
             # correctness with scipy
+            dw_np = tvm.topi.testing.dilate_python(filter_np, (1, 1, dilation, 
dilation)).astype(dtype)
             depthwise_conv2d_scipy = 
tvm.topi.testing.depthwise_conv2d_python_nchw(
-                input_np, filter_np, stride, padding)
+                input_np, dw_np, stride, padding)
             relu_scipy = np.maximum(depthwise_conv2d_scipy, 0)
             return (_transform_data(input_np, ic_block),
                     _transform_kernel(filter_np, oc_block),
@@ -390,6 +390,7 @@ def test_depthwise_conv2d():
     # depthwise_conv2d_with_workload_nhwc(1, 728, 64, 1, 3, 1, "SAME", 
dilation=2)
 
     # NCHW[x]c
+    depthwise_conv2d_with_workload_NCHWc(1, 728, 32, 1, 3, 1, "SAME", 
dilation=2)
     depthwise_conv2d_with_workload_NCHWc(1, 728, 32, 1, 3, 1, "SAME")
     depthwise_conv2d_with_workload_NCHWc(1, 728, 32, 1, 3, 1, "VALID")

[incubator-tvm] branch master updated: add dilation in x86 NCHWc depthwise conv support (#4962) (#6267)

Reply via email to