This is an automated email from the ASF dual-hosted git repository.
comaniac pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git
The following commit(s) were added to refs/heads/master by this push:
new e78aa61 dynamic conv2d for cuda (#6598)
e78aa61 is described below
commit e78aa611274f9a946ae7243fe68427b55d5ddd18
Author: Zhi <[email protected]>
AuthorDate: Thu Oct 1 00:46:56 2020 -0700
dynamic conv2d for cuda (#6598)
---
include/tvm/tir/expr.h | 6 ++++--
python/tvm/topi/cuda/conv2d_direct.py | 4 +++-
python/tvm/topi/cuda/conv2d_nhwc_winograd.py | 14 ++++++++++++--
python/tvm/topi/cuda/conv2d_winograd.py | 16 ++++++++++++++--
src/tir/ir/expr.cc | 6 +++++-
tests/python/relay/test_any.py | 6 +++---
6 files changed, 41 insertions(+), 11 deletions(-)
diff --git a/include/tvm/tir/expr.h b/include/tvm/tir/expr.h
index 9e6f440..eee0dee 100644
--- a/include/tvm/tir/expr.h
+++ b/include/tvm/tir/expr.h
@@ -1085,9 +1085,11 @@ class Reduce : public PrimExpr {
/*! \brief Any shape. */
class AnyNode : public PrimExprNode {
public:
- void VisitAttrs(AttrVisitor* v) {}
+ void VisitAttrs(AttrVisitor* v) { v->Visit("dtype", &dtype); }
- bool SEqualReduce(const AnyNode* other, SEqualReducer equal) const { return
true; }
+ bool SEqualReduce(const AnyNode* other, SEqualReducer equal) const {
+ return equal(dtype, other->dtype);
+ }
void SHashReduce(SHashReducer hash_reduce) const {}
diff --git a/python/tvm/topi/cuda/conv2d_direct.py
b/python/tvm/topi/cuda/conv2d_direct.py
index 2065ab9..e1f3d82 100644
--- a/python/tvm/topi/cuda/conv2d_direct.py
+++ b/python/tvm/topi/cuda/conv2d_direct.py
@@ -117,4 +117,6 @@ def schedule_direct_cuda(cfg, s, conv):
N, CO, OH, OW = get_const_tuple(output.shape)
_, KH, KW, CI = get_const_tuple(kernel.shape)
- cfg.add_flop(2 * N * OH * OW * CO * CI * KH * KW)
+
+ if isinstance(N, int):
+ cfg.add_flop(2 * N * OH * OW * CO * CI * KH * KW)
diff --git a/python/tvm/topi/cuda/conv2d_nhwc_winograd.py
b/python/tvm/topi/cuda/conv2d_nhwc_winograd.py
index cc0bbeb..246437a 100644
--- a/python/tvm/topi/cuda/conv2d_nhwc_winograd.py
+++ b/python/tvm/topi/cuda/conv2d_nhwc_winograd.py
@@ -302,6 +302,15 @@ def nhwc_winograd_cuda(
tile_size = _infer_tile_size(data, kernel)
N, H, W, CI = get_const_tuple(data.shape)
+ if isinstance(N, tvm.tir.Any):
+ N = tvm.te.size_var("n")
+
+ if not isinstance(H, int) or not isinstance(W, int):
+ raise RuntimeError(
+ "cuda winograd nhwc conv2d doesn't support dynamic \
+ input height or width."
+ )
+
if isinstance(dilation, int):
dilation_h = dilation_w = dilation
else:
@@ -330,7 +339,7 @@ def nhwc_winograd_cuda(
H = (H + pt + pb - KH) // HSTR + 1
W = (W + pl + pr - KW) // WSTR + 1
nH, nW = (H + m - 1) // m, (W + m - 1) // m
- P = N * nH * nW
+ P = N * nH * nW if isinstance(N, int) else nH * nW
# Determine whether the shape is available with tensorcore
shape_judge = (
@@ -432,7 +441,8 @@ def nhwc_winograd_cuda(
name="output",
tag="conv2d_nhwc_winograd",
)
- cfg.add_flop(2 * N * CO * H * W * CI * KH * KW)
+ if isinstance(N, int):
+ cfg.add_flop(2 * N * CO * H * W * CI * KH * KW)
return output
diff --git a/python/tvm/topi/cuda/conv2d_winograd.py
b/python/tvm/topi/cuda/conv2d_winograd.py
index 69513d5..11502e1 100644
--- a/python/tvm/topi/cuda/conv2d_winograd.py
+++ b/python/tvm/topi/cuda/conv2d_winograd.py
@@ -44,6 +44,15 @@ def winograd_cuda(cfg, data, kernel, strides, padding,
dilation, out_dtype, pre_
N, CI, H, W = get_const_tuple(data.shape)
+ if isinstance(N, tvm.tir.Any):
+ N = tvm.te.size_var("n")
+
+ if not isinstance(H, int) or not isinstance(W, int):
+ raise RuntimeError(
+ "cuda winograd conv2d doesn't support dynamic input\
+ height or width."
+ )
+
if isinstance(dilation, int):
dilation_h = dilation_w = dilation
else:
@@ -73,7 +82,8 @@ def winograd_cuda(cfg, data, kernel, strides, padding,
dilation, out_dtype, pre_
H = (H + pt + pb - KH) // HSTR + 1
W = (W + pl + pr - KW) // WSTR + 1
nH, nW = (H + m - 1) // m, (W + m - 1) // m
- P = N * nH * nW
+
+ P = N * nH * nW if isinstance(N, int) else nH * nW
# transform kernel
if not pre_computed:
@@ -141,7 +151,9 @@ def winograd_cuda(cfg, data, kernel, strides, padding,
dilation, out_dtype, pre_
name="output",
tag="conv2d_nchw_winograd",
)
- cfg.add_flop(2 * N * CO * H * W * CI * KH * KW)
+
+ if isinstance(N, int):
+ cfg.add_flop(2 * N * CO * H * W * CI * KH * KW)
return output
diff --git a/src/tir/ir/expr.cc b/src/tir/ir/expr.cc
index 687dfd6..f648aca 100644
--- a/src/tir/ir/expr.cc
+++ b/src/tir/ir/expr.cc
@@ -908,7 +908,11 @@ TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
});
// Any
-Any::Any() { data_ = make_object<AnyNode>(); }
+Any::Any() {
+ auto n = make_object<AnyNode>();
+ n->dtype = DataType::Int(32);
+ data_ = std::move(n);
+}
TVM_REGISTER_GLOBAL("tir.Any").set_body_typed([]() { return Any(); });
diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py
index c13f679..b9b58a5 100644
--- a/tests/python/relay/test_any.py
+++ b/tests/python/relay/test_any.py
@@ -444,7 +444,7 @@ def verify_any_conv2d(
# TODO(@kevinthesun): Support dynamic input height and width.
-# TODO(@kevinthesun): Support gpu to enable gpu tests.
[email protected]_gpu
def test_any_conv2d():
verify_any_conv2d(
(relay.Any(), 64, 224, 224),
@@ -501,7 +501,7 @@ def verify_any_conv2d_NCHWc(
# TODO(@kevinthesun): Support dynamic input height and width.
-# TODO(@kevinthesun): Support gpu to enable gpu tests.
[email protected]_gpu
def test_any_conv2d_NCHWc():
verify_any_conv2d_NCHWc(
(relay.Any(), 8, 224, 224, 8),
@@ -563,7 +563,7 @@ def verify_any_conv2d_transpose_nchw(
# TODO(@kevinthesun): Support dynamic input height and width.
-# TODO(@kevinthesun): Support gpu to enable gpu tests.
[email protected]_gpu
def test_any_conv2d_transpose_nchw():
verify_any_conv2d_transpose_nchw(
(relay.Any(), 64, 224, 224),