[GitHub] [incubator-tvm] comaniac commented on a change in pull request #6395: [BYOC][TensorRT] TensorRT BYOC integration

GitBox Mon, 21 Sep 2020 16:46:47 -0700


comaniac commented on a change in pull request #6395:
URL: https://github.com/apache/incubator-tvm/pull/6395#discussion_r492380095




##########
File path: CMakeLists.txt
##########
@@ -76,6 +76,8 @@ tvm_option(USE_COREML "Build with coreml support" OFF)
 tvm_option(USE_TARGET_ONNX "Build with ONNX Codegen support" OFF)
 tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF)
 tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library 
graph runtime" OFF)
+tvm_option(USE_TENSORRT "Build with TensorRT" OFF)

Review comment:
       The message is a bit confusing. `USE_TENSORRT` means enabling the 
TensorRT codegen for graph partitininog. It doesn't require TensorRT to be 
available in the system environment. IIUC, maybe it's better to say "Build with 
TensorRT codegen", although I just found that "Build with Arm Compute Library" 
has the same issue.
   
   @lhutton1 could you also share your thoughts for this?

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch

Review comment:
       Do we need a guard here as in L52?

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,671 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+import os
+import logging
+import numpy as np
+
+# Version to use for annotation when there is no linked TRT.
+#TENSORRT_VERSION = (6, 0, 1)
+#USE_IMPLICIT_BATCH = True
+#REMOVE_NO_MAC_SUBGRAPHS = False
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch
+
+def get_tensorrt_remove_no_mac_subgraphs():
+    pass_ctx = tvm.transform.PassContext.current()
+    return 
pass_ctx.config["relay.ext.tensorrt.options"].remove_no_mac_subgraphs
+
+def partition_for_tensorrt(mod, params=None, version=None, 
use_implicit_batch=True,
+                           remove_no_mac_subgraphs=False, max_workspace_size=1 
<< 30):
+    """Partition the graph greedily offloading supported
+    operators to TensorRT.
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+    version : Optional[Tuple(int)]
+        TensorRT version to target as tuple of (major, minor, patch). If TVM 
is compiled with
+        USE_TENSORRT_GRAPH_RUNTIME=ON, the linked TensorRT version will be 
used instead.
+    use_implicit_batch : Optional[bool]
+        Use TensorRT implicit batch mode (default true). Setting to false will 
enable explicit batch
+        mode which will widen supported operators to include those which 
modify the batch dimension,
+        but may reduce performance for some models.
+    remove_no_mac_subgraphs : Optional[bool]
+        Removes subgraphs which have been partitioned for TensorRT if they do 
not have any
+        multiply-accumulate operations. The removed subgraphs will go through 
TVM's standard
+        compilation instead. Can improve performance.
+    max_workspace_size : Optional[int]
+        How many bytes of workspace size to allow each subgraph to use for 
TensorRT engine creation.
+        See TensorRT documentation for more info.
+    Returns
+    -------
+    mod : annotated and partitioned module.
+    config : "relay.ext.tensorrt.options" configuration which should be given 
to PassContext when building.
+    """
+    config = {
+        "use_implicit_batch": use_implicit_batch,
+        "max_workspace_size": max_workspace_size,
+        "remove_no_mac_subgraphs": remove_no_mac_subgraphs
+    }
+    if version:
+        assert isinstance(version, tuple) and len(version) == 3
+        config["tensorrt_version"] = version
+    else:
+        linked_version = 
tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+        if not linked_version:
+            logging.warn("TVM was not built against TensorRT and no version 
was provided to partition_for_tensorrt. Defaulting to 6.0.1")
+            linked_version = (6, 0, 1)
+        config["tensorrt_version"] = linked_version
+
+    if params:
+        mod['main'] = bind_params_by_name(mod['main'], params)
+    seq = tvm.transform.Sequential([transform.InferType(),
+                                    RemoveDropoutPass(),
+                                    transform.RemoveUnusedFunctions(),
+                                    transform.ConvertLayout({'nn.conv2d': 
['NCHW', 'default'],
+                                                             'nn.conv3d': 
['NCDHW', 'default']}),
+                                    transform.FoldConstant(),
+                                    transform.AnnotateTarget('tensorrt'),
+                                    transform.MergeCompilerRegions(),
+                                    transform.PartitionGraph(),
+                                    transform.InferType()])
+    with tvm.transform.PassContext(opt_level=3, 
config={"relay.ext.tensorrt.options": config}):
+        mod = seq(mod)
+        mod = prune_tensorrt_subgraphs(mod)
+    return mod, config
+
+
+def _register_external_op_helper(op_name, supported=True):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            print("Only float32 inputs are supported for TensorRT.")
+            return False
+        return supported
+    return _func_wrapper
+
+
+def _register_external_op_helper_func(op_name, func):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            print("Only float32 inputs are supported for TensorRT.")
+            return False
+        return func(attrs, args, op_name)
+    return _func_wrapper
+
+
+# Ops which are always supported
+_register_external_op_helper("nn.relu")
+_register_external_op_helper("sigmoid")
+_register_external_op_helper("tanh")
+_register_external_op_helper("subtract")
+_register_external_op_helper("multiply")
+_register_external_op_helper("divide")
+_register_external_op_helper("power")
+_register_external_op_helper("maximum")
+_register_external_op_helper("minimum")
+_register_external_op_helper("exp")
+_register_external_op_helper("log")
+_register_external_op_helper("sqrt")
+_register_external_op_helper("abs")
+_register_external_op_helper("negative")
+_register_external_op_helper("nn.batch_flatten")
+_register_external_op_helper("clip")
+
[email protected]_op_attr("add", "target.tensorrt")
+def add_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not get_tensorrt_use_implicit_batch_mode() and \
+            (isinstance(args[0], Constant) or isinstance(args[1], Constant)) 
and \
+            args[0].checked_type.shape[0] == args[1].checked_type.shape[0] and 
\
+            args[0].checked_type.shape[0] != 1 and \
+            (len(args[0].checked_type.shape) > 3 or 
len(args[1].checked_type.shape) > 3):
+        print("add: bug in TRT with adding batched constants.")
+        return False
+    return True
+
[email protected]_op_attr("nn.batch_norm", "target.tensorrt")
+def batch_norm_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if int(attrs.axis) not in (1, 3):
+        print("nn.batch_norm: axis is {} but must be 1 or 
3.".format(int(attrs.axis)))
+        return False
+    return True
+
[email protected]_op_attr("nn.softmax", "target.tensorrt")
+def softmax_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and int(attrs.axis) == 0:
+        print("nn.softmax: can't modify batch dimension.")
+        return False
+    return True
+
[email protected]_op_attr("nn.conv2d", "target.tensorrt")
+def conv2d_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.data_layout != "NCHW":
+        print("nn.conv2d: data_layout is {} but must be 
NCHW.".format(attrs.data_layout))
+        return False
+    if attrs.kernel_layout != "OIHW":
+        print("nn.conv2d: kernel_layout is {} but must be 
OIHW.".format(attrs.kernel_layout))
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCHW":
+        print("nn.conv2d: out_layout is {} but must be 
NCHW.".format(attrs.out_layout))
+        return False
+    return True
+
[email protected]_op_attr("nn.dense", "target.tensorrt")
+def dense_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    input_rank = len(args[0].checked_type.shape)
+    weight_rank = len(args[1].checked_type.shape)
+    if input_rank not in (2, 3, 4):
+        print("nn.dense: input has rank {} but must be 2, 3 or 
4.".format(input_rank))
+        return False
+    if weight_rank != 2:
+        print("nn.dense: weight has rank {} but must be 
2.".format(weight_rank))
+        return False
+    return True
+
[email protected]_op_attr("nn.bias_add", "target.tensorrt")
+def bias_add_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    input_rank = len(args[0].checked_type.shape)
+    if input_rank not in (2, 3, 4):
+        print("nn.bias_add: input rank is {} but must be 2, 3 or 
4.".format(input_rank))
+        return False
+    return True
+
[email protected]_op_attr("nn.max_pool2d", "target.tensorrt")
+def max_pool_2d_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        print("nn.max_pool2d: layout is {} but must be 
NCHW.".format(attrs.layout))
+        return False
+    if attrs.ceil_mode and get_tensorrt_version() < (5, 1, 5):
+        print("nn.avg_pool2d: ceil_mode=True requires TensorRT 5.1.5 or 
greater.")
+        return False
+    return True
+
[email protected]_op_attr("nn.avg_pool2d", "target.tensorrt")
+def avg_pool_2d_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        print("nn.avg_pool2d: layout is {} but must be 
NCHW.".format(attrs.layout))
+        return False
+    if attrs.count_include_pad and len(attrs.padding) == 4:
+        print("nn.avg_pool2d: inclusive-counted blended or average "
+                "pooling is not supported in combination with asymmetric 
padding")
+        return False
+    if attrs.ceil_mode and get_tensorrt_version() < (5, 1, 5):
+        print("nn.avg_pool2d: ceil_mode=True requires TensorRT 5.1.5 or 
greater.")
+        return False
+    return True
+
[email protected]_op_attr("nn.global_max_pool2d", "target.tensorrt")
+def global_max_pool_2d_annotate_fn(attrs, args): # pylint: 
disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        print("nn.global_max_pool2d: layout is {} but must be 
NCHW.".format(attrs.layout))
+        return False
+    return True
+
[email protected]_op_attr("nn.global_avg_pool2d", "target.tensorrt")
+def global_avg_pool_2d_annotate_fn(attrs, args): # pylint: 
disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        print("nn.global_avg_pool2d: layout is {} but must be 
NCHW.".format(attrs.layout))
+        return False
+    return True
+
[email protected]_op_attr("expand_dims", "target.tensorrt")
+def expand_dims_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and int(attrs.axis) == 0:
+        print("expand_dims: can't modify batch dimension.")
+        return False
+    return True
+
[email protected]_op_attr("squeeze", "target.tensorrt")
+def squeeze_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not attrs.axis:
+        print("squeeze: must explicitly set axis.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and any([axis == 0 for axis in 
map(int, attrs.axis)]):
+        print("squeeze: can't modify batch dimension.")
+        return False
+    return True
+
[email protected]_op_attr("concatenate", "target.tensorrt")
+def concatenate_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.dtype != "float32" for x in args[0].checked_type.fields]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not get_tensorrt_use_implicit_batch_mode():
+        return True
+    if int(attrs.axis) == 0:
+        print("concatenate: can't modify batch dimension.")
+        return False
+    if isinstance(args[0], Tuple):
+        for tuple_input in args[0].fields:
+            if isinstance(tuple_input, Constant):
+                print("concatenate: can't concatenate tensors with constants.")
+                return False
+    return True
+
[email protected]_op_attr("nn.conv2d_transpose", "target.tensorrt")
+def conv2d_transpose_annotate_fn(attrs, args): # pylint: 
disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.data_layout != "NCHW":
+        print("nn.conv2d_transpose: data_layout is {} but must be 
NCHW.".format(
+            attrs.data_layout))
+        return False
+    if attrs.kernel_layout != "OIHW":
+        print("nn.conv2d_transpose: kernel_layout is {} but must be 
OIHW.".format(
+            attrs.kernel_layout))
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCHW":
+        print("nn.conv2d_transpose: out_layout is {} but must be NCHW.".format(
+            attrs.out_layout))
+        return False
+    if attrs.dilation and any([rate != 1 for rate in map(int, 
attrs.dilation)]):
+        print("nn.conv2d_transpose: dilation rate must be 1.")
+        return False
+    return True
+
[email protected]_op_attr("transpose", "target.tensorrt")
+def transpose_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and int(attrs.axes[0]) != 0:
+        print("transpose: can't modify batch dimension.")
+        return False
+    return True
+
[email protected]_op_attr("layout_transform", "target.tensorrt")
+def resize_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if (attrs.src_layout, attrs.dst_layout) not in [("NCHW", "NHWC"), ("NHWC", 
"NCHW"), ("NDHWC", "NCDHW"), ("NCDHW", "NDHWC")]:
+        print("layout_transform: {} to {} is not 
supported.".format(attrs.src_layout, attrs.dst_layout))
+        return False
+    return True
+
[email protected]_op_attr("reshape", "target.tensorrt")
+def reshape_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if args[0].checked_type.dtype != "float32":
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if any([x < -1 for x in map(int, attrs.newshape)]):
+        print("reshape: new shape dims must be explicit.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode():
+        shape = list(map(int, args[0].checked_type.shape))
+        new_shape = list(map(int, attrs.newshape))
+        if len(new_shape) == 0 or len(shape) == 0:
+            print("reshape: Can't reshape to or from scalar.")
+            return False
+        # TRT cannot modify batch dimension.
+        original_volume = np.prod(shape)
+        # First, resolve 0.
+        for i, value in enumerate(new_shape):
+            if value == 0:
+                new_shape[i] = shape[i]
+        # Resolve -1.
+        for i, value in enumerate(new_shape):
+            if value == -1:
+                new_shape[i] = original_volume // np.prod([x for x in 
new_shape if x != -1])
+        if shape[0] != new_shape[0]:
+            print("reshape: can't modify batch dimension.")
+            return False
+    return True
+
[email protected]_op_attr("nn.pad", "target.tensorrt")
+def pad_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.pad_mode != "constant":
+        print("nn.pad: pad mode is {} but must be 
constant.".format(attrs.pad_mode))
+        return False
+    if float(attrs.pad_value) != 0.0:
+        print("nn.pad: pad value is {} but must be 
0.0.".format(float(attrs.pad_value)))
+        return False
+    if any([x != 0 for x in attrs.pad_width[0]]) or any([x != 0 for x in 
attrs.pad_width[1]]):
+        print("nn.pad: can't pad batch or channel dimensions.")
+        return False
+    if len(attrs.pad_width) == 5 and any([x != 0 for x in attrs.pad_width[2]]):
+        print("nn.pad: can only pad last two dimensions for 5D inputs.")
+    return True
+
+def reduce_annotate_fn(attrs, args, op_name):
+    if not attrs.axis or len(attrs.axis) == 0:
+        print("{}: cannot reduce to scalar.".format(op_name))
+        return False
+    if attrs.exclude:
+        print("{}: exclude not supported.".format(op_name))
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and any([x == 0 for x in 
map(int, attrs.axis)]):
+        print("{}: can't modify batch dimension.".format(op_name))
+        return False
+    return True
+
+_register_external_op_helper_func("sum", reduce_annotate_fn)
+_register_external_op_helper_func("prod", reduce_annotate_fn)
+_register_external_op_helper_func("max", reduce_annotate_fn)
+_register_external_op_helper_func("min", reduce_annotate_fn)
+_register_external_op_helper_func("mean", reduce_annotate_fn)
+
+def trt_5_1_5_annotate_fn(attrs, args, op_name):
+    if get_tensorrt_version() < (5, 1, 5):
+        print("{}: requires TensorRT version 5.1.5 or higher.".format(op_name))
+        return False
+    return True
+
+_register_external_op_helper_func("nn.leaky_relu", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("sin", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("cos", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("atan", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("ceil", trt_5_1_5_annotate_fn)
+
[email protected]_op_attr("strided_slice", "target.tensorrt")
+def strided_slice_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if args[0].checked_type.dtype != "float32":
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not trt_5_1_5_annotate_fn(attrs, args, "strided_slice"):
+        return False
+    if get_tensorrt_use_implicit_batch_mode():
+        batch_dim_begin_modified = attrs.begin[0] is not None and 
int(attrs.begin[0]) != 0
+        batch_dim_end_modified = attrs.end[0] is not None and 
int(attrs.end[0]) != -1 and \
+                                    int(attrs.end[0]) != 
int(args[0].checked_type.shape[0])
+        if batch_dim_begin_modified or batch_dim_end_modified:
+            print("strided_slice: can't modify batch dimension.")
+            return False
+    if any([x is not None and x <= 0 for x in attrs.strides]):
+        print("strided_slice: stride must be positive")
+        return False
+    return True
+
[email protected]_op_attr("nn.adaptive_max_pool2d", "target.tensorrt")
+def adapative_max_pool2d_annotate_fn(attrs, args): # pylint: 
disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if len(attrs.output_size) == 0 or any([size != 1 for size in map(int, 
attrs.output_size)]):
+        print("nn.adaptive_max_pool2d: output size must be (1, 1).")
+        return False
+    return True
+
[email protected]_op_attr("nn.adaptive_avg_pool2d", "target.tensorrt")
+def adapative_avg_pool2d_annotate_fn(attrs, args): # pylint: 
disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if len(attrs.output_size) == 0 or any([size != 1 for size in map(int, 
attrs.output_size)]):
+        print("nn.adaptive_avg_pool2d: output size must be (1, 1).")
+        return False
+    return True
+
[email protected]_op_attr("nn.upsampling", "target.tensorrt")
+def upsampling_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    # TODO(trevmorr): Output does not match TVM. Disable.
+    return False
+
[email protected]_op_attr("nn.conv3d", "target.tensorrt")
+def conv3d_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_version() < (6, 0, 1):
+        print("nn.conv3d: requires TensorRT version 6.0.1 or higher.")
+        return False
+    if attrs.data_layout != "NCDHW":
+        print("nn.conv3d: data_layout is {} but must be 
NCDHW.".format(attrs.data_layout))
+        return False
+    if attrs.kernel_layout != "OIDHW":
+        print("nn.conv3d: kernel_layout is {} but must be 
OIDHW.".format(attrs.kernel_layout))
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCDHW":
+        print("nn.conv3d: out_layout is {} but must be 
NCDHW.".format(attrs.out_layout))
+        return False
+    return True
+
[email protected]_op_attr("nn.max_pool3d", "target.tensorrt")
+def max_pool_3d_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_version() < (6, 0, 1):
+        print("nn.max_pool3d: requires TensorRT version 6.0.1 or higher.")
+        return False
+    if attrs.layout != "NCDHW":
+        print("nn.max_pool3d: layout is {} but must be 
NCDHW.".format(attrs.layout))
+        return False
+    return True
+
[email protected]_op_attr("nn.avg_pool3d", "target.tensorrt")
+def avg_pool_3d_annotate_fn(attrs, args): # pylint: disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_version() < (6, 0, 1):
+        print("nn.avg_pool3d: requires TensorRT version 6.0.1 or higher.")
+        return False
+    if attrs.layout != "NCDHW":
+        print("nn.avg_pool3d: layout is {} but must be 
NCDHW.".format(attrs.layout))
+        return False
+    return True
+
[email protected]_op_attr("nn.conv3d_transpose", "target.tensorrt")
+def conv3d_transpose_annotate_fn(attrs, args): # pylint: 
disable=unused-variable
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        print("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_version() < (6, 0, 1):
+        print("nn.conv3d_transpose: requires TensorRT version 6.0.1 or 
higher.")
+        return False
+    if attrs.data_layout != "NCDHW":
+        print("nn.conv3d_transpose: data_layout is {} but must be 
NCDHW.".format(
+            attrs.data_layout))
+        return False
+    if attrs.kernel_layout != "OIDHW":
+        print("nn.conv3d_transpose: kernel_layout is {} but must be 
OIDHW.".format(
+            attrs.kernel_layout))
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCDHW":
+        print("nn.conv3d_transpose: out_layout is {} but must be 
NCDHW.".format(
+            attrs.out_layout))
+        return False
+    if attrs.dilation and any([rate != 1 for rate in map(int, 
attrs.dilation)]):
+        print("nn.conv3d_transpose: dilation rate must be 1.")
+        return False
+    if attrs.output_padding and any([x != 0 for x in map(int, 
attrs.output_padding)]):
+        print("nn.conv3d_transpose: output padding is not supported.")
+        return False
+    return True
+
+def is_invalid_subgraph(params, body):
+    # Remove invalid subgraphs for implicit batch mode.
+    if get_tensorrt_use_implicit_batch_mode():
+        input_batch_sizes = []
+        for var in params:
+            # In implicit batch mode, all inputs must have same batch size
+            if isinstance(var.checked_type, relay.TupleType):
+                for tupe_type in var.checked_type.fields:
+                    # Scalar inputs not allowed
+                    if len(tupe_type.shape) == 0:
+                        print('tensorrt: scalar inputs not supported')
+                        return True
+                    input_batch_sizes.append(int(tupe_type.shape[0]))
+            else:
+                # Scalar inputs not allowed
+                if len(var.checked_type.shape) == 0:
+                    print('tensorrt: scalar inputs not supported')
+                    return True
+                input_batch_sizes.append(int(var.checked_type.shape[0]))
+        if len(input_batch_sizes) > 1 and \
+           any([x != input_batch_sizes[0] for x in input_batch_sizes[1:]]):
+            print('tensorrt: inputs have different batch sizes')
+            return True
+    # Remove subgraphs with no multiply-accumulates
+    if get_tensorrt_remove_no_mac_subgraphs() and 
relay.analysis.get_total_mac_number(body) == 0:
+        return True
+    return False
+
+def prune_tensorrt_subgraphs(mod, target="tensorrt"):
+    class VarReplacer(ExprMutator):
+        """
+        Visit an expression while replacing vars according to var_map. Used by
+        SubgraphRemover/PruneSubgraphs to return a subgraph originally 
partitioned to TRT back to TVM.
+        """
+        def __init__(self, var_map):
+            ExprMutator.__init__(self)
+            self.var_map = var_map
+
+        def visit_var(self, var):
+            if var in self.var_map:
+                return self.var_map[var]
+            return super().visit_var(var)
+
+    class SubgraphRemover(ExprMutator):

Review comment:
       This is the issue we discussed in this PR about how to deal with 
post-partitioning judgements. We could later on figure out an approach to 
generalize this requirement.

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]

Review comment:
       ```suggestion
       ret: Tuple[int, int, int]
   ```

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch
+
+
+def get_tensorrt_remove_no_mac_subgraphs():
+    pass_ctx = tvm.transform.PassContext.current()
+    return 
pass_ctx.config["relay.ext.tensorrt.options"].remove_no_mac_subgraphs
+
+
+def partition_for_tensorrt(
+    mod,
+    params=None,
+    version=None,
+    use_implicit_batch=True,
+    remove_no_mac_subgraphs=False,
+    max_workspace_size=1 << 30,
+):
+    """Partition the graph greedily offloading supported
+    operators to TensorRT.
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+    version : Optional[Tuple(int)]

Review comment:
       ```suggestion
       version : Optional[Tuple[int, int, int]]
   ```

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch
+
+
+def get_tensorrt_remove_no_mac_subgraphs():
+    pass_ctx = tvm.transform.PassContext.current()
+    return 
pass_ctx.config["relay.ext.tensorrt.options"].remove_no_mac_subgraphs
+
+
+def partition_for_tensorrt(
+    mod,
+    params=None,
+    version=None,
+    use_implicit_batch=True,
+    remove_no_mac_subgraphs=False,
+    max_workspace_size=1 << 30,
+):
+    """Partition the graph greedily offloading supported
+    operators to TensorRT.

Review comment:
       Shoud be fine in one line.

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch
+
+
+def get_tensorrt_remove_no_mac_subgraphs():
+    pass_ctx = tvm.transform.PassContext.current()
+    return 
pass_ctx.config["relay.ext.tensorrt.options"].remove_no_mac_subgraphs

Review comment:
       Do we need a guard here as in L52?

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch
+
+
+def get_tensorrt_remove_no_mac_subgraphs():
+    pass_ctx = tvm.transform.PassContext.current()
+    return 
pass_ctx.config["relay.ext.tensorrt.options"].remove_no_mac_subgraphs
+
+
+def partition_for_tensorrt(
+    mod,
+    params=None,
+    version=None,
+    use_implicit_batch=True,
+    remove_no_mac_subgraphs=False,
+    max_workspace_size=1 << 30,
+):
+    """Partition the graph greedily offloading supported
+    operators to TensorRT.
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+    version : Optional[Tuple(int)]
+        TensorRT version to target as tuple of (major, minor, patch). If TVM 
is compiled with
+        USE_TENSORRT_GRAPH_RUNTIME=ON, the linked TensorRT version will be 
used instead.
+    use_implicit_batch : Optional[bool]
+        Use TensorRT implicit batch mode (default true). Setting to false will 
enable explicit batch
+        mode which will widen supported operators to include those which 
modify the batch dimension,
+        but may reduce performance for some models.
+    remove_no_mac_subgraphs : Optional[bool]
+        Removes subgraphs which have been partitioned for TensorRT if they do 
not have any
+        multiply-accumulate operations. The removed subgraphs will go through 
TVM's standard
+        compilation instead. Can improve performance.
+    max_workspace_size : Optional[int]
+        How many bytes of workspace size to allow each subgraph to use for 
TensorRT engine creation.
+        See TensorRT documentation for more info.
+    Returns
+    -------
+    mod : annotated and partitioned module.
+    config : "relay.ext.tensorrt.options" configuration which should be given 
to PassContext when
+             building.
+    """
+    config = {
+        "use_implicit_batch": use_implicit_batch,
+        "max_workspace_size": max_workspace_size,
+        "remove_no_mac_subgraphs": remove_no_mac_subgraphs,
+    }
+    if version:
+        assert isinstance(version, tuple) and len(version) == 3
+        config["tensorrt_version"] = version
+    else:
+        linked_version = 
tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+        if not linked_version:
+            logging.warning(
+                "TVM was not built against TensorRT and no version was 
provided to "
+                "partition_for_tensorrt. Defaulting to 6.0.1"
+            )
+            linked_version = (6, 0, 1)
+        config["tensorrt_version"] = linked_version
+
+    if params:
+        mod["main"] = bind_params_by_name(mod["main"], params)
+    seq = tvm.transform.Sequential(
+        [
+            transform.InferType(),
+            RemoveDropoutPass(),
+            transform.RemoveUnusedFunctions(),
+            transform.ConvertLayout(
+                {"nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", 
"default"]}
+            ),
+            transform.FoldConstant(),
+            transform.AnnotateTarget("tensorrt"),
+            transform.MergeCompilerRegions(),
+            transform.PartitionGraph(),
+            transform.InferType(),
+        ]
+    )
+    with tvm.transform.PassContext(opt_level=3, 
config={"relay.ext.tensorrt.options": config}):
+        mod = seq(mod)
+        mod = prune_tensorrt_subgraphs(mod)
+    return mod, config
+
+
+def _register_external_op_helper(op_name, supported=True):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            logging.info("Only float32 inputs are supported for TensorRT.")
+            return False
+        return supported
+
+    return _func_wrapper
+
+
+def _register_external_op_helper_func(op_name, func):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            logging.info("Only float32 inputs are supported for TensorRT.")
+            return False
+        return func(attrs, args, op_name)
+
+    return _func_wrapper

Review comment:
       These two functions share the same logic so it would be better to merge 
them:
   
   ```python
   def _register_external_op_helper_with_checker(op_name, checker):
       # The body of _register_external_op_helper_func
   
   def _register_external_op_helper(op_name, supported=True):
       return _register_external_op_helper_with_checker(op_name, lambda attrs, 
args, op_name: supported)
   ```

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch
+
+
+def get_tensorrt_remove_no_mac_subgraphs():
+    pass_ctx = tvm.transform.PassContext.current()
+    return 
pass_ctx.config["relay.ext.tensorrt.options"].remove_no_mac_subgraphs
+
+
+def partition_for_tensorrt(
+    mod,
+    params=None,
+    version=None,
+    use_implicit_batch=True,
+    remove_no_mac_subgraphs=False,
+    max_workspace_size=1 << 30,
+):
+    """Partition the graph greedily offloading supported
+    operators to TensorRT.
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+    version : Optional[Tuple(int)]
+        TensorRT version to target as tuple of (major, minor, patch). If TVM 
is compiled with
+        USE_TENSORRT_GRAPH_RUNTIME=ON, the linked TensorRT version will be 
used instead.
+    use_implicit_batch : Optional[bool]
+        Use TensorRT implicit batch mode (default true). Setting to false will 
enable explicit batch
+        mode which will widen supported operators to include those which 
modify the batch dimension,
+        but may reduce performance for some models.
+    remove_no_mac_subgraphs : Optional[bool]
+        Removes subgraphs which have been partitioned for TensorRT if they do 
not have any
+        multiply-accumulate operations. The removed subgraphs will go through 
TVM's standard
+        compilation instead. Can improve performance.
+    max_workspace_size : Optional[int]
+        How many bytes of workspace size to allow each subgraph to use for 
TensorRT engine creation.
+        See TensorRT documentation for more info.
+    Returns
+    -------
+    mod : annotated and partitioned module.
+    config : "relay.ext.tensorrt.options" configuration which should be given 
to PassContext when
+             building.

Review comment:
       ```suggestion
       mod_n_config: Tuple[Module, Dict[str, Any]]
           A tuple of 1) annotated and partitioned module and 2) 
"relay.ext.tensorrt.options" configuration
           which should be given to PassContext when building.
   ```

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch
+
+
+def get_tensorrt_remove_no_mac_subgraphs():
+    pass_ctx = tvm.transform.PassContext.current()
+    return 
pass_ctx.config["relay.ext.tensorrt.options"].remove_no_mac_subgraphs
+
+
+def partition_for_tensorrt(
+    mod,
+    params=None,
+    version=None,
+    use_implicit_batch=True,
+    remove_no_mac_subgraphs=False,
+    max_workspace_size=1 << 30,
+):
+    """Partition the graph greedily offloading supported
+    operators to TensorRT.
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+    version : Optional[Tuple(int)]
+        TensorRT version to target as tuple of (major, minor, patch). If TVM 
is compiled with
+        USE_TENSORRT_GRAPH_RUNTIME=ON, the linked TensorRT version will be 
used instead.
+    use_implicit_batch : Optional[bool]
+        Use TensorRT implicit batch mode (default true). Setting to false will 
enable explicit batch
+        mode which will widen supported operators to include those which 
modify the batch dimension,
+        but may reduce performance for some models.
+    remove_no_mac_subgraphs : Optional[bool]
+        Removes subgraphs which have been partitioned for TensorRT if they do 
not have any
+        multiply-accumulate operations. The removed subgraphs will go through 
TVM's standard
+        compilation instead. Can improve performance.
+    max_workspace_size : Optional[int]
+        How many bytes of workspace size to allow each subgraph to use for 
TensorRT engine creation.
+        See TensorRT documentation for more info.
+    Returns
+    -------
+    mod : annotated and partitioned module.
+    config : "relay.ext.tensorrt.options" configuration which should be given 
to PassContext when
+             building.
+    """
+    config = {
+        "use_implicit_batch": use_implicit_batch,
+        "max_workspace_size": max_workspace_size,
+        "remove_no_mac_subgraphs": remove_no_mac_subgraphs,
+    }
+    if version:
+        assert isinstance(version, tuple) and len(version) == 3
+        config["tensorrt_version"] = version
+    else:
+        linked_version = 
tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+        if not linked_version:
+            logging.warning(
+                "TVM was not built against TensorRT and no version was 
provided to "
+                "partition_for_tensorrt. Defaulting to 6.0.1"
+            )
+            linked_version = (6, 0, 1)
+        config["tensorrt_version"] = linked_version
+
+    if params:
+        mod["main"] = bind_params_by_name(mod["main"], params)
+    seq = tvm.transform.Sequential(
+        [
+            transform.InferType(),
+            RemoveDropoutPass(),
+            transform.RemoveUnusedFunctions(),
+            transform.ConvertLayout(
+                {"nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", 
"default"]}
+            ),
+            transform.FoldConstant(),
+            transform.AnnotateTarget("tensorrt"),
+            transform.MergeCompilerRegions(),
+            transform.PartitionGraph(),
+            transform.InferType(),
+        ]
+    )
+    with tvm.transform.PassContext(opt_level=3, 
config={"relay.ext.tensorrt.options": config}):
+        mod = seq(mod)
+        mod = prune_tensorrt_subgraphs(mod)
+    return mod, config
+
+
+def _register_external_op_helper(op_name, supported=True):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            logging.info("Only float32 inputs are supported for TensorRT.")
+            return False
+        return supported
+
+    return _func_wrapper
+
+
+def _register_external_op_helper_func(op_name, func):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            logging.info("Only float32 inputs are supported for TensorRT.")
+            return False
+        return func(attrs, args, op_name)
+
+    return _func_wrapper
+
+
+# Ops which are always supported
+_register_external_op_helper("nn.relu")
+_register_external_op_helper("sigmoid")
+_register_external_op_helper("tanh")
+_register_external_op_helper("subtract")
+_register_external_op_helper("multiply")
+_register_external_op_helper("divide")
+_register_external_op_helper("power")
+_register_external_op_helper("maximum")
+_register_external_op_helper("minimum")
+_register_external_op_helper("exp")
+_register_external_op_helper("log")
+_register_external_op_helper("sqrt")
+_register_external_op_helper("abs")
+_register_external_op_helper("negative")
+_register_external_op_helper("nn.batch_flatten")
+_register_external_op_helper("clip")
+
+
[email protected]_op_attr("add", "target.tensorrt")
+def add_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if add is supported by TensorRT."""
+
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")

Review comment:
       - Use the standard logger: `logger = logging.getLogger("TensorRT")`.
   - "info" seems not a right log level for messages like this. Should be 
"warn" or "debug".
   - Ditto to other logs.

##########
File path: docs/deploy/tensorrt.rst
##########
@@ -0,0 +1,267 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Relay TensorRT Integration
+==============================================
+**Author**: `Trevor Morris <https://github.com/trevor-m>`_
+
+Introduction
+------------
+
+NVIDIA TensorRT is a library for optimized deep learning inference. This 
integration will offload as
+many operators as possible from Relay to TensorRT, providing a performance 
boost on NVIDIA GPUs
+without the need to tune schedules.
+
+Installing TensorRT
+------------------------------
+
+In order to download TensorRT, you will need to create an NVIDIA Developer 
program account. Please
+see NVIDIA's documentation for more info:
+https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html. If you 
have a Jetson device
+such as a TX1, TX2, Xavier, or Nano, TensorRT will already be installed on the 
device via the
+JetPack SDK.
+
+There are two methods to install TensorRT:
+
+* System install via deb or rpm package.
+* Tar file installation.
+
+With the tar file installation method, you must provide the path of the 
extracted tar archive to
+USE_TENSORT_GRAPH_RUNTIME=/path/to/TensorRT. With the system install method,
+USE_TENSORT_GRAPH_RUNTIME=ON will automatically locate your installation.
+
+Building TVM with TensorRT support
+----------------------------------
+
+There are two separate build flags for TensorRT integration in TVM:
+
+* USE_TENSORT=ON/OFF - This flag will enable compiling a TensorRT module, 
which does not require any
+TensorRT library.
+* USE_TENSORT_GRAPH_RUNTIME=ON/OFF/path-to-TensorRT - This flag will enable 
the TensorRT runtime
+module. This will build TVM against the TensorRT libraries.
+
+Example setting in config.cmake file:
+
+.. code:: cmake
+
+    set(USE_TENSORRT ON)
+    set(USE_TENSORRT_GRAPH_RUNTIME /home/ubuntu/TensorRT-7.0.0.11)
+
+
+Usage
+-----
+
+.. note::
+
+    This section may not stay up-to-date with changes to the API.
+
+Create a relay graph from a MXNet ResNet18 model.

Review comment:
       s/relay/Relay/g

##########
File path: docs/deploy/tensorrt.rst
##########
@@ -0,0 +1,267 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Relay TensorRT Integration
+==============================================

Review comment:
       ```suggestion
   ==========================
   ```

##########
File path: docs/deploy/tensorrt.rst
##########
@@ -0,0 +1,267 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Relay TensorRT Integration
+==============================================
+**Author**: `Trevor Morris <https://github.com/trevor-m>`_
+
+Introduction
+------------
+
+NVIDIA TensorRT is a library for optimized deep learning inference. This 
integration will offload as
+many operators as possible from Relay to TensorRT, providing a performance 
boost on NVIDIA GPUs
+without the need to tune schedules.

Review comment:
       Better to provide an overview of this tutorial, including what 
information/takeaway readers could expect after reading this article.
   
   After reading through, I would suggest mentioning the following points:
   1. (optional) Some model performance numbers to impressive readers.
   2. Install TensorRT and build TVM with it.
   3. Build a ResNet-18 with TensorRT.
   4. Operator support (I also suggest moving this section to the end along 
with the last section about adding new operators.)
   5. Deploy the built model with TensorRT runtime.
   6. How to support other operators.

##########
File path: docs/deploy/tensorrt.rst
##########
@@ -0,0 +1,267 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Relay TensorRT Integration
+==============================================
+**Author**: `Trevor Morris <https://github.com/trevor-m>`_
+
+Introduction
+------------
+
+NVIDIA TensorRT is a library for optimized deep learning inference. This 
integration will offload as
+many operators as possible from Relay to TensorRT, providing a performance 
boost on NVIDIA GPUs
+without the need to tune schedules.
+
+Installing TensorRT
+------------------------------

Review comment:
       ```suggestion
   -------------------
   ```
   Note that misaligning will cause doc generation warnings and failed the CI.

##########
File path: docs/deploy/tensorrt.rst
##########
@@ -0,0 +1,267 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Relay TensorRT Integration
+==============================================
+**Author**: `Trevor Morris <https://github.com/trevor-m>`_
+
+Introduction
+------------
+
+NVIDIA TensorRT is a library for optimized deep learning inference. This 
integration will offload as
+many operators as possible from Relay to TensorRT, providing a performance 
boost on NVIDIA GPUs
+without the need to tune schedules.
+
+Installing TensorRT
+------------------------------
+
+In order to download TensorRT, you will need to create an NVIDIA Developer 
program account. Please
+see NVIDIA's documentation for more info:
+https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html. If you 
have a Jetson device
+such as a TX1, TX2, Xavier, or Nano, TensorRT will already be installed on the 
device via the
+JetPack SDK.
+
+There are two methods to install TensorRT:
+
+* System install via deb or rpm package.
+* Tar file installation.
+
+With the tar file installation method, you must provide the path of the 
extracted tar archive to
+USE_TENSORT_GRAPH_RUNTIME=/path/to/TensorRT. With the system install method,
+USE_TENSORT_GRAPH_RUNTIME=ON will automatically locate your installation.
+
+Building TVM with TensorRT support
+----------------------------------
+
+There are two separate build flags for TensorRT integration in TVM:
+
+* USE_TENSORT=ON/OFF - This flag will enable compiling a TensorRT module, 
which does not require any
+TensorRT library.
+* USE_TENSORT_GRAPH_RUNTIME=ON/OFF/path-to-TensorRT - This flag will enable 
the TensorRT runtime
+module. This will build TVM against the TensorRT libraries.
+
+Example setting in config.cmake file:
+
+.. code:: cmake
+
+    set(USE_TENSORRT ON)
+    set(USE_TENSORRT_GRAPH_RUNTIME /home/ubuntu/TensorRT-7.0.0.11)
+
+
+Usage
+-----
+
+.. note::
+
+    This section may not stay up-to-date with changes to the API.

Review comment:
       Better to say this section is tested with TensorRT version XXX and may 
not stay up-to-date with newer versions that changes the APIs.

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch
+
+
+def get_tensorrt_remove_no_mac_subgraphs():
+    pass_ctx = tvm.transform.PassContext.current()
+    return 
pass_ctx.config["relay.ext.tensorrt.options"].remove_no_mac_subgraphs
+
+
+def partition_for_tensorrt(
+    mod,
+    params=None,
+    version=None,
+    use_implicit_batch=True,
+    remove_no_mac_subgraphs=False,
+    max_workspace_size=1 << 30,
+):
+    """Partition the graph greedily offloading supported
+    operators to TensorRT.
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+    version : Optional[Tuple(int)]
+        TensorRT version to target as tuple of (major, minor, patch). If TVM 
is compiled with
+        USE_TENSORRT_GRAPH_RUNTIME=ON, the linked TensorRT version will be 
used instead.
+    use_implicit_batch : Optional[bool]
+        Use TensorRT implicit batch mode (default true). Setting to false will 
enable explicit batch
+        mode which will widen supported operators to include those which 
modify the batch dimension,
+        but may reduce performance for some models.
+    remove_no_mac_subgraphs : Optional[bool]
+        Removes subgraphs which have been partitioned for TensorRT if they do 
not have any
+        multiply-accumulate operations. The removed subgraphs will go through 
TVM's standard
+        compilation instead. Can improve performance.
+    max_workspace_size : Optional[int]
+        How many bytes of workspace size to allow each subgraph to use for 
TensorRT engine creation.
+        See TensorRT documentation for more info.
+    Returns
+    -------
+    mod : annotated and partitioned module.
+    config : "relay.ext.tensorrt.options" configuration which should be given 
to PassContext when
+             building.
+    """
+    config = {
+        "use_implicit_batch": use_implicit_batch,
+        "max_workspace_size": max_workspace_size,
+        "remove_no_mac_subgraphs": remove_no_mac_subgraphs,
+    }
+    if version:
+        assert isinstance(version, tuple) and len(version) == 3
+        config["tensorrt_version"] = version
+    else:
+        linked_version = 
tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+        if not linked_version:
+            logging.warning(
+                "TVM was not built against TensorRT and no version was 
provided to "
+                "partition_for_tensorrt. Defaulting to 6.0.1"
+            )
+            linked_version = (6, 0, 1)
+        config["tensorrt_version"] = linked_version
+
+    if params:
+        mod["main"] = bind_params_by_name(mod["main"], params)
+    seq = tvm.transform.Sequential(
+        [
+            transform.InferType(),
+            RemoveDropoutPass(),
+            transform.RemoveUnusedFunctions(),
+            transform.ConvertLayout(
+                {"nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", 
"default"]}
+            ),
+            transform.FoldConstant(),
+            transform.AnnotateTarget("tensorrt"),
+            transform.MergeCompilerRegions(),
+            transform.PartitionGraph(),
+            transform.InferType(),
+        ]
+    )
+    with tvm.transform.PassContext(opt_level=3, 
config={"relay.ext.tensorrt.options": config}):
+        mod = seq(mod)
+        mod = prune_tensorrt_subgraphs(mod)
+    return mod, config
+
+
+def _register_external_op_helper(op_name, supported=True):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            logging.info("Only float32 inputs are supported for TensorRT.")
+            return False
+        return supported
+
+    return _func_wrapper
+
+
+def _register_external_op_helper_func(op_name, func):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            logging.info("Only float32 inputs are supported for TensorRT.")
+            return False
+        return func(attrs, args, op_name)
+
+    return _func_wrapper
+
+
+# Ops which are always supported
+_register_external_op_helper("nn.relu")
+_register_external_op_helper("sigmoid")
+_register_external_op_helper("tanh")
+_register_external_op_helper("subtract")
+_register_external_op_helper("multiply")
+_register_external_op_helper("divide")
+_register_external_op_helper("power")
+_register_external_op_helper("maximum")
+_register_external_op_helper("minimum")
+_register_external_op_helper("exp")
+_register_external_op_helper("log")
+_register_external_op_helper("sqrt")
+_register_external_op_helper("abs")
+_register_external_op_helper("negative")
+_register_external_op_helper("nn.batch_flatten")
+_register_external_op_helper("clip")
+
+
[email protected]_op_attr("add", "target.tensorrt")
+def add_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if add is supported by TensorRT."""
+
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if (
+        not get_tensorrt_use_implicit_batch_mode()
+        and (isinstance(args[0], Constant) or isinstance(args[1], Constant))
+        and args[0].checked_type.shape[0] == args[1].checked_type.shape[0]
+        and args[0].checked_type.shape[0] != 1
+        and (len(args[0].checked_type.shape) > 3 or 
len(args[1].checked_type.shape) > 3)
+    ):
+        logging.info("add: bug in TRT with adding batched constants.")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.batch_norm", "target.tensorrt")
+def batch_norm_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.batch_norm is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if int(attrs.axis) not in (1, 3):
+        logging.info("nn.batch_norm: axis is %d but must be 1 or 3.", 
int(attrs.axis))
+        return False
+    return True
+
+
[email protected]_op_attr("nn.softmax", "target.tensorrt")
+def softmax_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.softmax is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and int(attrs.axis) == 0:
+        logging.info("nn.softmax: can't modify batch dimension.")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.conv2d", "target.tensorrt")
+def conv2d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.conv2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.data_layout != "NCHW":
+        logging.info("nn.conv2d: data_layout is %s but must be NCHW.", 
attrs.data_layout)
+        return False
+    if attrs.kernel_layout != "OIHW":
+        logging.info("nn.conv2d: kernel_layout is %s but must be OIHW.", 
attrs.kernel_layout)
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCHW":
+        logging.info("nn.conv2d: out_layout is %s but must be NCHW.", 
attrs.out_layout)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.dense", "target.tensorrt")
+def dense_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if dense is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    input_rank = len(args[0].checked_type.shape)
+    weight_rank = len(args[1].checked_type.shape)
+    if input_rank not in (2, 3, 4):
+        logging.info("nn.dense: input has rank %d but must be 2, 3 or 4.", 
input_rank)
+        return False
+    if weight_rank != 2:
+        logging.info("nn.dense: weight has rank %d but must be 2.", 
weight_rank)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.bias_add", "target.tensorrt")
+def bias_add_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.bias_add is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    input_rank = len(args[0].checked_type.shape)
+    if input_rank not in (2, 3, 4):
+        logging.info("nn.bias_add: input rank is %d but must be 2, 3 or 4.", 
input_rank)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.max_pool2d", "target.tensorrt")
+def max_pool_2d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.max_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        logging.info("nn.max_pool2d: layout is %s but must be NCHW.", 
attrs.layout)
+        return False
+    if attrs.ceil_mode and get_tensorrt_version() < (5, 1, 5):
+        logging.info("nn.avg_pool2d: ceil_mode=True requires TensorRT 5.1.5 or 
greater.")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.avg_pool2d", "target.tensorrt")
+def avg_pool_2d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.avg_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        logging.info("nn.avg_pool2d: layout is %d but must be NCHW.", 
attrs.layout)
+        return False
+    if attrs.count_include_pad and len(attrs.padding) == 4:
+        logging.info(
+            "nn.avg_pool2d: inclusive-counted blended or average "
+            "pooling is not supported in combination with asymmetric padding"
+        )
+        return False
+    if attrs.ceil_mode and get_tensorrt_version() < (5, 1, 5):
+        logging.info("nn.avg_pool2d: ceil_mode=True requires TensorRT 5.1.5 or 
greater.")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.global_max_pool2d", "target.tensorrt")
+def global_max_pool_2d_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.global_max_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        logging.info("nn.global_max_pool2d: layout is %s but must be NCHW.", 
attrs.layout)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.global_avg_pool2d", "target.tensorrt")
+def global_avg_pool_2d_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.global_avg_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        logging.info("nn.global_avg_pool2d: layout is %s but must be NCHW.", 
attrs.layout)
+        return False
+    return True
+
+
[email protected]_op_attr("expand_dims", "target.tensorrt")
+def expand_dims_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if expand_dims is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and int(attrs.axis) == 0:
+        logging.info("expand_dims: can't modify batch dimension.")
+        return False
+    return True
+
+
[email protected]_op_attr("squeeze", "target.tensorrt")
+def squeeze_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if squeeze is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not attrs.axis:
+        logging.info("squeeze: must explicitly set axis.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and any([axis == 0 for axis in 
map(int, attrs.axis)]):
+        logging.info("squeeze: can't modify batch dimension.")
+        return False
+    return True
+
+
[email protected]_op_attr("concatenate", "target.tensorrt")
+def concatenate_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if concatenate is supported by TensorRT."""
+    if any([x.dtype != "float32" for x in args[0].checked_type.fields]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not get_tensorrt_use_implicit_batch_mode():
+        return True
+    if int(attrs.axis) == 0:
+        logging.info("concatenate: can't modify batch dimension.")
+        return False
+    if isinstance(args[0], Tuple):
+        for tuple_input in args[0].fields:
+            if isinstance(tuple_input, Constant):
+                logging.info("concatenate: can't concatenate tensors with 
constants.")
+                return False
+    return True
+
+
[email protected]_op_attr("nn.conv2d_transpose", "target.tensorrt")
+def conv2d_transpose_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.conv2d_transpose is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.data_layout != "NCHW":
+        logging.info("nn.conv2d_transpose: data_layout is %s but must be 
NCHW.", attrs.data_layout)
+        return False
+    if attrs.kernel_layout != "OIHW":
+        logging.info(
+            "nn.conv2d_transpose: kernel_layout is %s but must be OIHW.", 
attrs.kernel_layout
+        )
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCHW":
+        logging.info("nn.conv2d_transpose: out_layout is %s but must be 
NCHW.", attrs.out_layout)
+        return False
+    if attrs.dilation and any([rate != 1 for rate in map(int, 
attrs.dilation)]):
+        logging.info("nn.conv2d_transpose: dilation rate must be 1.")
+        return False
+    return True
+
+
[email protected]_op_attr("transpose", "target.tensorrt")
+def transpose_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if transpose is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and int(attrs.axes[0]) != 0:
+        logging.info("transpose: can't modify batch dimension.")
+        return False
+    return True
+
+
[email protected]_op_attr("layout_transform", "target.tensorrt")
+def layout_transform_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if layout_transform is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if (attrs.src_layout, attrs.dst_layout) not in [
+        ("NCHW", "NHWC"),
+        ("NHWC", "NCHW"),
+        ("NDHWC", "NCDHW"),
+        ("NCDHW", "NDHWC"),
+    ]:
+        logging.info(
+            "layout_transform: %s to %s is not supported.", attrs.src_layout, 
attrs.dst_layout
+        )
+        return False
+    return True
+
+
[email protected]_op_attr("reshape", "target.tensorrt")
+def reshape_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if reshape is supported by TensorRT."""
+    if args[0].checked_type.dtype != "float32":
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if any([x < -1 for x in map(int, attrs.newshape)]):
+        logging.info("reshape: new shape dims must be explicit.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode():
+        shape = list(map(int, args[0].checked_type.shape))
+        new_shape = list(map(int, attrs.newshape))
+        if len(new_shape) == 0 or len(shape) == 0:
+            logging.info("reshape: Can't reshape to or from scalar.")
+            return False
+        # TRT cannot modify batch dimension.
+        original_volume = np.prod(shape)
+        # First, resolve 0.
+        for i, value in enumerate(new_shape):
+            if value == 0:
+                new_shape[i] = shape[i]
+        # Resolve -1.
+        for i, value in enumerate(new_shape):
+            if value == -1:
+                new_shape[i] = original_volume // np.prod([x for x in 
new_shape if x != -1])
+        if shape[0] != new_shape[0]:
+            logging.info("reshape: can't modify batch dimension.")
+            return False
+    return True
+
+
[email protected]_op_attr("nn.pad", "target.tensorrt")
+def pad_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.pad is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.pad_mode != "constant":
+        logging.info("nn.pad: pad mode is %s but must be constant.", 
attrs.pad_mode)
+        return False
+    if float(attrs.pad_value) != 0.0:
+        logging.info("nn.pad: pad value is %f but must be 0.0.", 
float(attrs.pad_value))
+        return False
+    if any([x != 0 for x in attrs.pad_width[0]]) or any([x != 0 for x in 
attrs.pad_width[1]]):
+        logging.info("nn.pad: can't pad batch or channel dimensions.")
+        return False
+    if len(attrs.pad_width) == 5 and any([x != 0 for x in attrs.pad_width[2]]):
+        logging.info("nn.pad: can only pad last two dimensions for 5D inputs.")
+    return True
+
+
+def reduce_annotate_fn(attrs, args, op_name):
+    """Helper for reduce operations."""
+    if not attrs.axis or len(attrs.axis) == 0:
+        logging.info("%s: cannot reduce to scalar.", op_name)
+        return False
+    if attrs.exclude:
+        logging.info("%s: exclude not supported.", op_name)
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and any([x == 0 for x in 
map(int, attrs.axis)]):
+        logging.info("%s: can't modify batch dimension.", op_name)
+        return False
+    return True
+
+
+_register_external_op_helper_func("sum", reduce_annotate_fn)
+_register_external_op_helper_func("prod", reduce_annotate_fn)
+_register_external_op_helper_func("max", reduce_annotate_fn)
+_register_external_op_helper_func("min", reduce_annotate_fn)
+_register_external_op_helper_func("mean", reduce_annotate_fn)
+
+
+def trt_5_1_5_annotate_fn(attrs, args, op_name):
+    """Helper for ops which require TRT 5.1.5 or greater."""
+    if get_tensorrt_version() < (5, 1, 5):
+        logging.info("%s: requires TensorRT version 5.1.5 or higher.", op_name)
+        return False
+    return True
+
+
+_register_external_op_helper_func("nn.leaky_relu", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("sin", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("cos", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("atan", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("ceil", trt_5_1_5_annotate_fn)
+
+
[email protected]_op_attr("strided_slice", "target.tensorrt")
+def strided_slice_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if strided_slice is supported by TensorRT."""
+    if args[0].checked_type.dtype != "float32":
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not trt_5_1_5_annotate_fn(attrs, args, "strided_slice"):
+        return False
+    if get_tensorrt_use_implicit_batch_mode():
+        batch_dim_begin_modified = attrs.begin[0] is not None and 
int(attrs.begin[0]) != 0
+        batch_dim_end_modified = (
+            attrs.end[0] is not None
+            and int(attrs.end[0]) != -1
+            and int(attrs.end[0]) != int(args[0].checked_type.shape[0])
+        )
+        if batch_dim_begin_modified or batch_dim_end_modified:
+            logging.info("strided_slice: can't modify batch dimension.")
+            return False
+    if any([x is not None and x <= 0 for x in attrs.strides]):
+        logging.info("strided_slice: stride must be positive")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.adaptive_max_pool2d", "target.tensorrt")
+def adapative_max_pool2d_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.adaptive_max_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if len(attrs.output_size) == 0 or any([size != 1 for size in map(int, 
attrs.output_size)]):
+        logging.info("nn.adaptive_max_pool2d: output size must be (1, 1).")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.adaptive_avg_pool2d", "target.tensorrt")
+def adapative_avg_pool2d_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.adaptive_avg_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if len(attrs.output_size) == 0 or any([size != 1 for size in map(int, 
attrs.output_size)]):
+        logging.info("nn.adaptive_avg_pool2d: output size must be (1, 1).")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.conv3d", "target.tensorrt")
+def conv3d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.conv3d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_version() < (6, 0, 1):

Review comment:
       It might be better to generalize `trt_5_1_5_annotate_fn` to 
`trt_version_annotate_fn(version, attrs, args, op_name)`.

##########
File path: docs/deploy/tensorrt.rst
##########
@@ -0,0 +1,267 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Relay TensorRT Integration
+==============================================
+**Author**: `Trevor Morris <https://github.com/trevor-m>`_
+
+Introduction
+------------
+
+NVIDIA TensorRT is a library for optimized deep learning inference. This 
integration will offload as
+many operators as possible from Relay to TensorRT, providing a performance 
boost on NVIDIA GPUs
+without the need to tune schedules.
+
+Installing TensorRT
+------------------------------
+
+In order to download TensorRT, you will need to create an NVIDIA Developer 
program account. Please
+see NVIDIA's documentation for more info:
+https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html. If you 
have a Jetson device
+such as a TX1, TX2, Xavier, or Nano, TensorRT will already be installed on the 
device via the
+JetPack SDK.
+
+There are two methods to install TensorRT:
+
+* System install via deb or rpm package.
+* Tar file installation.
+
+With the tar file installation method, you must provide the path of the 
extracted tar archive to
+USE_TENSORT_GRAPH_RUNTIME=/path/to/TensorRT. With the system install method,
+USE_TENSORT_GRAPH_RUNTIME=ON will automatically locate your installation.
+
+Building TVM with TensorRT support
+----------------------------------
+
+There are two separate build flags for TensorRT integration in TVM:
+
+* USE_TENSORT=ON/OFF - This flag will enable compiling a TensorRT module, 
which does not require any
+TensorRT library.
+* USE_TENSORT_GRAPH_RUNTIME=ON/OFF/path-to-TensorRT - This flag will enable 
the TensorRT runtime
+module. This will build TVM against the TensorRT libraries.
+
+Example setting in config.cmake file:
+
+.. code:: cmake
+
+    set(USE_TENSORRT ON)
+    set(USE_TENSORRT_GRAPH_RUNTIME /home/ubuntu/TensorRT-7.0.0.11)
+
+
+Usage
+-----
+
+.. note::
+
+    This section may not stay up-to-date with changes to the API.
+
+Create a relay graph from a MXNet ResNet18 model.
+
+.. code:: python
+
+    import tvm
+    from tvm import relay
+    import mxnet
+    from mxnet.gluon.model_zoo.vision import get_model
+
+    dtype = "float32"
+    input_shape = (1, 3, 224, 224)
+    block = get_model('resnet18_v1', pretrained=True)
+    mod, params = relay.frontend.from_mxnet(block, shape={'data': 
input_shape}, dtype=dtype)
+
+
+Annotate and partition the graph for TensorRT. All ops which are supported by 
the TensorRT
+integration will be marked and offloaded to TensorRT. The rest of the ops will 
go through the
+regular TVM CUDA compilation and code generation.
+
+.. code:: python
+
+    from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt
+    mod, config = partition_for_tensorrt(mod, params)
+
+
+Build the Relay graph, using the new module and config returned by 
partition_for_tensorrt. The
+target must always be "cuda".

Review comment:
       This statement is not 100% correct. It can also be "cuda -model=v100" 
for example.

##########
File path: docs/deploy/tensorrt.rst
##########
@@ -0,0 +1,267 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Relay TensorRT Integration
+==============================================
+**Author**: `Trevor Morris <https://github.com/trevor-m>`_
+
+Introduction
+------------
+
+NVIDIA TensorRT is a library for optimized deep learning inference. This 
integration will offload as
+many operators as possible from Relay to TensorRT, providing a performance 
boost on NVIDIA GPUs
+without the need to tune schedules.
+
+Installing TensorRT
+------------------------------
+
+In order to download TensorRT, you will need to create an NVIDIA Developer 
program account. Please
+see NVIDIA's documentation for more info:
+https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html. If you 
have a Jetson device
+such as a TX1, TX2, Xavier, or Nano, TensorRT will already be installed on the 
device via the
+JetPack SDK.
+
+There are two methods to install TensorRT:
+
+* System install via deb or rpm package.
+* Tar file installation.
+
+With the tar file installation method, you must provide the path of the 
extracted tar archive to
+USE_TENSORT_GRAPH_RUNTIME=/path/to/TensorRT. With the system install method,
+USE_TENSORT_GRAPH_RUNTIME=ON will automatically locate your installation.
+
+Building TVM with TensorRT support
+----------------------------------
+
+There are two separate build flags for TensorRT integration in TVM:
+
+* USE_TENSORT=ON/OFF - This flag will enable compiling a TensorRT module, 
which does not require any
+TensorRT library.
+* USE_TENSORT_GRAPH_RUNTIME=ON/OFF/path-to-TensorRT - This flag will enable 
the TensorRT runtime
+module. This will build TVM against the TensorRT libraries.
+
+Example setting in config.cmake file:
+
+.. code:: cmake
+
+    set(USE_TENSORRT ON)
+    set(USE_TENSORRT_GRAPH_RUNTIME /home/ubuntu/TensorRT-7.0.0.11)
+
+
+Usage

Review comment:
       Explicitly say what you are doing in this section, such as "Build and 
Deploy ResNet-18 with TensorRT"

##########
File path: python/tvm/relay/op/contrib/tensorrt.py
##########
@@ -0,0 +1,751 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""TensorRT supported operators."""
+import logging
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr_functor import ExprMutator
+
+
+def is_tensorrt_runtime_enabled():
+    """Check if the TensorRT graph runtime is present.
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    check_enabled = 
tvm.get_global_func("relay.op.is_tensorrt_runtime_enabled", True)
+    if check_enabled:
+        return check_enabled()
+    return False
+
+
+def get_tensorrt_version():
+    """Gets the version of TensorRT that TVM is built against or is targeting.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, the value set by set_tensorrt_version() is 
returned instead.
+    """
+    pass_ctx = tvm.transform.PassContext.current()
+    if "relay.ext.tensorrt.options" in pass_ctx.config:
+        return 
tuple(pass_ctx.config["relay.ext.tensorrt.options"].tensorrt_version)
+    return tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+
+
+def get_tensorrt_use_implicit_batch_mode():
+    pass_ctx = tvm.transform.PassContext.current()
+    return pass_ctx.config["relay.ext.tensorrt.options"].use_implicit_batch
+
+
+def get_tensorrt_remove_no_mac_subgraphs():
+    pass_ctx = tvm.transform.PassContext.current()
+    return 
pass_ctx.config["relay.ext.tensorrt.options"].remove_no_mac_subgraphs
+
+
+def partition_for_tensorrt(
+    mod,
+    params=None,
+    version=None,
+    use_implicit_batch=True,
+    remove_no_mac_subgraphs=False,
+    max_workspace_size=1 << 30,
+):
+    """Partition the graph greedily offloading supported
+    operators to TensorRT.
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+    version : Optional[Tuple(int)]
+        TensorRT version to target as tuple of (major, minor, patch). If TVM 
is compiled with
+        USE_TENSORRT_GRAPH_RUNTIME=ON, the linked TensorRT version will be 
used instead.
+    use_implicit_batch : Optional[bool]
+        Use TensorRT implicit batch mode (default true). Setting to false will 
enable explicit batch
+        mode which will widen supported operators to include those which 
modify the batch dimension,
+        but may reduce performance for some models.
+    remove_no_mac_subgraphs : Optional[bool]
+        Removes subgraphs which have been partitioned for TensorRT if they do 
not have any
+        multiply-accumulate operations. The removed subgraphs will go through 
TVM's standard
+        compilation instead. Can improve performance.
+    max_workspace_size : Optional[int]
+        How many bytes of workspace size to allow each subgraph to use for 
TensorRT engine creation.
+        See TensorRT documentation for more info.
+    Returns
+    -------
+    mod : annotated and partitioned module.
+    config : "relay.ext.tensorrt.options" configuration which should be given 
to PassContext when
+             building.
+    """
+    config = {
+        "use_implicit_batch": use_implicit_batch,
+        "max_workspace_size": max_workspace_size,
+        "remove_no_mac_subgraphs": remove_no_mac_subgraphs,
+    }
+    if version:
+        assert isinstance(version, tuple) and len(version) == 3
+        config["tensorrt_version"] = version
+    else:
+        linked_version = 
tuple(tvm.get_global_func("relay.op.get_tensorrt_version")())
+        if not linked_version:
+            logging.warning(
+                "TVM was not built against TensorRT and no version was 
provided to "
+                "partition_for_tensorrt. Defaulting to 6.0.1"
+            )
+            linked_version = (6, 0, 1)
+        config["tensorrt_version"] = linked_version
+
+    if params:
+        mod["main"] = bind_params_by_name(mod["main"], params)
+    seq = tvm.transform.Sequential(
+        [
+            transform.InferType(),
+            RemoveDropoutPass(),
+            transform.RemoveUnusedFunctions(),
+            transform.ConvertLayout(
+                {"nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", 
"default"]}
+            ),
+            transform.FoldConstant(),
+            transform.AnnotateTarget("tensorrt"),
+            transform.MergeCompilerRegions(),
+            transform.PartitionGraph(),
+            transform.InferType(),
+        ]
+    )
+    with tvm.transform.PassContext(opt_level=3, 
config={"relay.ext.tensorrt.options": config}):
+        mod = seq(mod)
+        mod = prune_tensorrt_subgraphs(mod)
+    return mod, config
+
+
+def _register_external_op_helper(op_name, supported=True):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            logging.info("Only float32 inputs are supported for TensorRT.")
+            return False
+        return supported
+
+    return _func_wrapper
+
+
+def _register_external_op_helper_func(op_name, func):
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(attrs, args):
+        if any([x.checked_type.dtype != "float32" for x in args]):
+            logging.info("Only float32 inputs are supported for TensorRT.")
+            return False
+        return func(attrs, args, op_name)
+
+    return _func_wrapper
+
+
+# Ops which are always supported
+_register_external_op_helper("nn.relu")
+_register_external_op_helper("sigmoid")
+_register_external_op_helper("tanh")
+_register_external_op_helper("subtract")
+_register_external_op_helper("multiply")
+_register_external_op_helper("divide")
+_register_external_op_helper("power")
+_register_external_op_helper("maximum")
+_register_external_op_helper("minimum")
+_register_external_op_helper("exp")
+_register_external_op_helper("log")
+_register_external_op_helper("sqrt")
+_register_external_op_helper("abs")
+_register_external_op_helper("negative")
+_register_external_op_helper("nn.batch_flatten")
+_register_external_op_helper("clip")
+
+
[email protected]_op_attr("add", "target.tensorrt")
+def add_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if add is supported by TensorRT."""
+
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if (
+        not get_tensorrt_use_implicit_batch_mode()
+        and (isinstance(args[0], Constant) or isinstance(args[1], Constant))
+        and args[0].checked_type.shape[0] == args[1].checked_type.shape[0]
+        and args[0].checked_type.shape[0] != 1
+        and (len(args[0].checked_type.shape) > 3 or 
len(args[1].checked_type.shape) > 3)
+    ):
+        logging.info("add: bug in TRT with adding batched constants.")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.batch_norm", "target.tensorrt")
+def batch_norm_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.batch_norm is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if int(attrs.axis) not in (1, 3):
+        logging.info("nn.batch_norm: axis is %d but must be 1 or 3.", 
int(attrs.axis))
+        return False
+    return True
+
+
[email protected]_op_attr("nn.softmax", "target.tensorrt")
+def softmax_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.softmax is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and int(attrs.axis) == 0:
+        logging.info("nn.softmax: can't modify batch dimension.")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.conv2d", "target.tensorrt")
+def conv2d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.conv2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.data_layout != "NCHW":
+        logging.info("nn.conv2d: data_layout is %s but must be NCHW.", 
attrs.data_layout)
+        return False
+    if attrs.kernel_layout != "OIHW":
+        logging.info("nn.conv2d: kernel_layout is %s but must be OIHW.", 
attrs.kernel_layout)
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCHW":
+        logging.info("nn.conv2d: out_layout is %s but must be NCHW.", 
attrs.out_layout)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.dense", "target.tensorrt")
+def dense_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if dense is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    input_rank = len(args[0].checked_type.shape)
+    weight_rank = len(args[1].checked_type.shape)
+    if input_rank not in (2, 3, 4):
+        logging.info("nn.dense: input has rank %d but must be 2, 3 or 4.", 
input_rank)
+        return False
+    if weight_rank != 2:
+        logging.info("nn.dense: weight has rank %d but must be 2.", 
weight_rank)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.bias_add", "target.tensorrt")
+def bias_add_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.bias_add is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    input_rank = len(args[0].checked_type.shape)
+    if input_rank not in (2, 3, 4):
+        logging.info("nn.bias_add: input rank is %d but must be 2, 3 or 4.", 
input_rank)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.max_pool2d", "target.tensorrt")
+def max_pool_2d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.max_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        logging.info("nn.max_pool2d: layout is %s but must be NCHW.", 
attrs.layout)
+        return False
+    if attrs.ceil_mode and get_tensorrt_version() < (5, 1, 5):
+        logging.info("nn.avg_pool2d: ceil_mode=True requires TensorRT 5.1.5 or 
greater.")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.avg_pool2d", "target.tensorrt")
+def avg_pool_2d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.avg_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        logging.info("nn.avg_pool2d: layout is %d but must be NCHW.", 
attrs.layout)
+        return False
+    if attrs.count_include_pad and len(attrs.padding) == 4:
+        logging.info(
+            "nn.avg_pool2d: inclusive-counted blended or average "
+            "pooling is not supported in combination with asymmetric padding"
+        )
+        return False
+    if attrs.ceil_mode and get_tensorrt_version() < (5, 1, 5):
+        logging.info("nn.avg_pool2d: ceil_mode=True requires TensorRT 5.1.5 or 
greater.")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.global_max_pool2d", "target.tensorrt")
+def global_max_pool_2d_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.global_max_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        logging.info("nn.global_max_pool2d: layout is %s but must be NCHW.", 
attrs.layout)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.global_avg_pool2d", "target.tensorrt")
+def global_avg_pool_2d_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.global_avg_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.layout != "NCHW":
+        logging.info("nn.global_avg_pool2d: layout is %s but must be NCHW.", 
attrs.layout)
+        return False
+    return True
+
+
[email protected]_op_attr("expand_dims", "target.tensorrt")
+def expand_dims_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if expand_dims is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and int(attrs.axis) == 0:
+        logging.info("expand_dims: can't modify batch dimension.")
+        return False
+    return True
+
+
[email protected]_op_attr("squeeze", "target.tensorrt")
+def squeeze_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if squeeze is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not attrs.axis:
+        logging.info("squeeze: must explicitly set axis.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and any([axis == 0 for axis in 
map(int, attrs.axis)]):
+        logging.info("squeeze: can't modify batch dimension.")
+        return False
+    return True
+
+
[email protected]_op_attr("concatenate", "target.tensorrt")
+def concatenate_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if concatenate is supported by TensorRT."""
+    if any([x.dtype != "float32" for x in args[0].checked_type.fields]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not get_tensorrt_use_implicit_batch_mode():
+        return True
+    if int(attrs.axis) == 0:
+        logging.info("concatenate: can't modify batch dimension.")
+        return False
+    if isinstance(args[0], Tuple):
+        for tuple_input in args[0].fields:
+            if isinstance(tuple_input, Constant):
+                logging.info("concatenate: can't concatenate tensors with 
constants.")
+                return False
+    return True
+
+
[email protected]_op_attr("nn.conv2d_transpose", "target.tensorrt")
+def conv2d_transpose_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.conv2d_transpose is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.data_layout != "NCHW":
+        logging.info("nn.conv2d_transpose: data_layout is %s but must be 
NCHW.", attrs.data_layout)
+        return False
+    if attrs.kernel_layout != "OIHW":
+        logging.info(
+            "nn.conv2d_transpose: kernel_layout is %s but must be OIHW.", 
attrs.kernel_layout
+        )
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCHW":
+        logging.info("nn.conv2d_transpose: out_layout is %s but must be 
NCHW.", attrs.out_layout)
+        return False
+    if attrs.dilation and any([rate != 1 for rate in map(int, 
attrs.dilation)]):
+        logging.info("nn.conv2d_transpose: dilation rate must be 1.")
+        return False
+    return True
+
+
[email protected]_op_attr("transpose", "target.tensorrt")
+def transpose_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if transpose is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and int(attrs.axes[0]) != 0:
+        logging.info("transpose: can't modify batch dimension.")
+        return False
+    return True
+
+
[email protected]_op_attr("layout_transform", "target.tensorrt")
+def layout_transform_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if layout_transform is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if (attrs.src_layout, attrs.dst_layout) not in [
+        ("NCHW", "NHWC"),
+        ("NHWC", "NCHW"),
+        ("NDHWC", "NCDHW"),
+        ("NCDHW", "NDHWC"),
+    ]:
+        logging.info(
+            "layout_transform: %s to %s is not supported.", attrs.src_layout, 
attrs.dst_layout
+        )
+        return False
+    return True
+
+
[email protected]_op_attr("reshape", "target.tensorrt")
+def reshape_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if reshape is supported by TensorRT."""
+    if args[0].checked_type.dtype != "float32":
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if any([x < -1 for x in map(int, attrs.newshape)]):
+        logging.info("reshape: new shape dims must be explicit.")
+        return False
+    if get_tensorrt_use_implicit_batch_mode():
+        shape = list(map(int, args[0].checked_type.shape))
+        new_shape = list(map(int, attrs.newshape))
+        if len(new_shape) == 0 or len(shape) == 0:
+            logging.info("reshape: Can't reshape to or from scalar.")
+            return False
+        # TRT cannot modify batch dimension.
+        original_volume = np.prod(shape)
+        # First, resolve 0.
+        for i, value in enumerate(new_shape):
+            if value == 0:
+                new_shape[i] = shape[i]
+        # Resolve -1.
+        for i, value in enumerate(new_shape):
+            if value == -1:
+                new_shape[i] = original_volume // np.prod([x for x in 
new_shape if x != -1])
+        if shape[0] != new_shape[0]:
+            logging.info("reshape: can't modify batch dimension.")
+            return False
+    return True
+
+
[email protected]_op_attr("nn.pad", "target.tensorrt")
+def pad_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.pad is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if attrs.pad_mode != "constant":
+        logging.info("nn.pad: pad mode is %s but must be constant.", 
attrs.pad_mode)
+        return False
+    if float(attrs.pad_value) != 0.0:
+        logging.info("nn.pad: pad value is %f but must be 0.0.", 
float(attrs.pad_value))
+        return False
+    if any([x != 0 for x in attrs.pad_width[0]]) or any([x != 0 for x in 
attrs.pad_width[1]]):
+        logging.info("nn.pad: can't pad batch or channel dimensions.")
+        return False
+    if len(attrs.pad_width) == 5 and any([x != 0 for x in attrs.pad_width[2]]):
+        logging.info("nn.pad: can only pad last two dimensions for 5D inputs.")
+    return True
+
+
+def reduce_annotate_fn(attrs, args, op_name):
+    """Helper for reduce operations."""
+    if not attrs.axis or len(attrs.axis) == 0:
+        logging.info("%s: cannot reduce to scalar.", op_name)
+        return False
+    if attrs.exclude:
+        logging.info("%s: exclude not supported.", op_name)
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and any([x == 0 for x in 
map(int, attrs.axis)]):
+        logging.info("%s: can't modify batch dimension.", op_name)
+        return False
+    return True
+
+
+_register_external_op_helper_func("sum", reduce_annotate_fn)
+_register_external_op_helper_func("prod", reduce_annotate_fn)
+_register_external_op_helper_func("max", reduce_annotate_fn)
+_register_external_op_helper_func("min", reduce_annotate_fn)
+_register_external_op_helper_func("mean", reduce_annotate_fn)
+
+
+def trt_5_1_5_annotate_fn(attrs, args, op_name):
+    """Helper for ops which require TRT 5.1.5 or greater."""
+    if get_tensorrt_version() < (5, 1, 5):
+        logging.info("%s: requires TensorRT version 5.1.5 or higher.", op_name)
+        return False
+    return True
+
+
+_register_external_op_helper_func("nn.leaky_relu", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("sin", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("cos", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("atan", trt_5_1_5_annotate_fn)
+_register_external_op_helper_func("ceil", trt_5_1_5_annotate_fn)
+
+
[email protected]_op_attr("strided_slice", "target.tensorrt")
+def strided_slice_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if strided_slice is supported by TensorRT."""
+    if args[0].checked_type.dtype != "float32":
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if not trt_5_1_5_annotate_fn(attrs, args, "strided_slice"):
+        return False
+    if get_tensorrt_use_implicit_batch_mode():
+        batch_dim_begin_modified = attrs.begin[0] is not None and 
int(attrs.begin[0]) != 0
+        batch_dim_end_modified = (
+            attrs.end[0] is not None
+            and int(attrs.end[0]) != -1
+            and int(attrs.end[0]) != int(args[0].checked_type.shape[0])
+        )
+        if batch_dim_begin_modified or batch_dim_end_modified:
+            logging.info("strided_slice: can't modify batch dimension.")
+            return False
+    if any([x is not None and x <= 0 for x in attrs.strides]):
+        logging.info("strided_slice: stride must be positive")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.adaptive_max_pool2d", "target.tensorrt")
+def adapative_max_pool2d_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.adaptive_max_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if len(attrs.output_size) == 0 or any([size != 1 for size in map(int, 
attrs.output_size)]):
+        logging.info("nn.adaptive_max_pool2d: output size must be (1, 1).")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.adaptive_avg_pool2d", "target.tensorrt")
+def adapative_avg_pool2d_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.adaptive_avg_pool2d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if len(attrs.output_size) == 0 or any([size != 1 for size in map(int, 
attrs.output_size)]):
+        logging.info("nn.adaptive_avg_pool2d: output size must be (1, 1).")
+        return False
+    return True
+
+
[email protected]_op_attr("nn.conv3d", "target.tensorrt")
+def conv3d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.conv3d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_version() < (6, 0, 1):
+        logging.info("nn.conv3d: requires TensorRT version 6.0.1 or higher.")
+        return False
+    if attrs.data_layout != "NCDHW":
+        logging.info("nn.conv3d: data_layout is %s but must be NCDHW.", 
attrs.data_layout)
+        return False
+    if attrs.kernel_layout != "OIDHW":
+        logging.info("nn.conv3d: kernel_layout is %s but must be OIDHW.", 
attrs.kernel_layout)
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCDHW":
+        logging.info("nn.conv3d: out_layout is %s but must be NCDHW.", 
attrs.out_layout)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.max_pool3d", "target.tensorrt")
+def max_pool_3d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.max_pool3d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_version() < (6, 0, 1):
+        logging.info("nn.max_pool3d: requires TensorRT version 6.0.1 or 
higher.")
+        return False
+    if attrs.layout != "NCDHW":
+        logging.info("nn.max_pool3d: layout is %s but must be NCDHW.", 
attrs.layout)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.avg_pool3d", "target.tensorrt")
+def avg_pool_3d_annotate_fn(attrs, args):  # pylint: disable=unused-variable
+    """Check if nn.avg_pool3d is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_version() < (6, 0, 1):
+        logging.info("nn.avg_pool3d: requires TensorRT version 6.0.1 or 
higher.")
+        return False
+    if attrs.layout != "NCDHW":
+        logging.info("nn.avg_pool3d: layout is %s but must be NCDHW.", 
attrs.layout)
+        return False
+    return True
+
+
[email protected]_op_attr("nn.conv3d_transpose", "target.tensorrt")
+def conv3d_transpose_annotate_fn(attrs, args):  # pylint: 
disable=unused-variable
+    """Check if nn.conv3d_transpose is supported by TensorRT."""
+    if any([x.checked_type.dtype != "float32" for x in args]):
+        logging.info("Only float32 inputs are supported for TensorRT.")
+        return False
+    if get_tensorrt_version() < (6, 0, 1):
+        logging.info("nn.conv3d_transpose: requires TensorRT version 6.0.1 or 
higher.")
+        return False
+    if attrs.data_layout != "NCDHW":
+        logging.info("nn.conv3d_transpose: data_layout is %s but must be 
NCDHW.", attrs.data_layout)
+        return False
+    if attrs.kernel_layout != "OIDHW":
+        logging.info(
+            "nn.conv3d_transpose: kernel_layout is %s but must be OIDHW.", 
attrs.kernel_layout
+        )
+        return False
+    if attrs.out_layout and attrs.out_layout != "NCDHW":
+        logging.info("nn.conv3d_transpose: out_layout is %s but must be 
NCDHW.", attrs.out_layout)
+        return False
+    if attrs.dilation and any([rate != 1 for rate in map(int, 
attrs.dilation)]):
+        logging.info("nn.conv3d_transpose: dilation rate must be 1.")
+        return False
+    if attrs.output_padding and any([x != 0 for x in map(int, 
attrs.output_padding)]):
+        logging.info("nn.conv3d_transpose: output padding is not supported.")
+        return False
+    return True
+
+
+def is_valid_subgraph(params, body):
+    """Final check on whether the subgraph is valid and should be offloaded to 
TensorRT."""
+    # Remove invalid subgraphs for implicit batch mode.
+    if get_tensorrt_use_implicit_batch_mode():
+        input_batch_sizes = []
+        for var in params:
+            # In implicit batch mode, all inputs must have same batch size
+            if isinstance(var.checked_type, relay.TupleType):
+                for tupe_type in var.checked_type.fields:
+                    # Scalar inputs not allowed
+                    if len(tupe_type.shape) == 0:
+                        logging.info("tensorrt: scalar inputs not supported")
+                        return False
+                    input_batch_sizes.append(int(tupe_type.shape[0]))
+            else:
+                # Scalar inputs not allowed
+                if len(var.checked_type.shape) == 0:
+                    logging.info("tensorrt: scalar inputs not supported")
+                    return False
+                input_batch_sizes.append(int(var.checked_type.shape[0]))
+        if len(input_batch_sizes) > 1 and len(set(input_batch_sizes)) != 1:
+            logging.info("tensorrt: inputs have different batch sizes")
+            return False
+    # Remove subgraphs with no multiply-accumulates
+    if get_tensorrt_remove_no_mac_subgraphs() and 
relay.analysis.get_total_mac_number(body) == 0:
+        return False
+    return True
+
+
+def prune_tensorrt_subgraphs(mod, target="tensorrt"):

Review comment:
       As it is already prune_"tensorrt"_subgraphs, it looks to me that you 
don't need to specify `target` in the arguments?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [incubator-tvm] comaniac commented on a change in pull request #6395: [BYOC][TensorRT] TensorRT BYOC integration

Reply via email to