trevor-m commented on a change in pull request #6395:
URL: https://github.com/apache/incubator-tvm/pull/6395#discussion_r492407348
##########
File path: CMakeLists.txt
##########
@@ -76,6 +76,8 @@ tvm_option(USE_COREML "Build with coreml support" OFF)
tvm_option(USE_TARGET_ONNX "Build with ONNX Codegen support" OFF)
tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF)
tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library
graph runtime" OFF)
+tvm_option(USE_TENSORRT "Build with TensorRT" OFF)
Review comment:
Thanks for the review Cody!
You're right, the names aren't really that clear here. Originally, I had
them as `USE_TENSORRT_CODEGEN` for codegen only and `USE_TENSORRT` for both
codegen and runtime. I changed them to match the ACL definitions.
##########
File path: src/runtime/contrib/tensorrt/tensorrt_runtime.cc
##########
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/tensorrt/tensorrt_runtime.cc
+ * \brief JSON runtime implementation for TensorRT.
+ */
+
+#include <dmlc/parameter.h>
+#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/registry.h>
+
+#include <fstream>
+
+#include "../../file_util.h"
+#include "../json/json_node.h"
+#include "../json/json_runtime.h"
+
+#ifdef TVM_GRAPH_RUNTIME_TENSORRT
+#include "NvInfer.h"
+#include "tensorrt_builder.h"
+#endif
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+using namespace tvm::runtime::json;
+
+class TensorRTRuntime : public JSONRuntimeBase {
+ public:
+ /*!
+ * \brief The TensorRT runtime module. Deserialize the provided functions
+ * on creation and store in the layer cache.
+ *
+ * \param symbol_name The name of the function.
+ * \param graph_json serialized JSON representation of a sub-graph.
+ * \param const_names The names of each constant in the sub-graph.
+ */
+ explicit TensorRTRuntime(const std::string& symbol_name, const std::string&
graph_json,
+ const Array<String>& const_names)
+ : JSONRuntimeBase(symbol_name, graph_json, const_names),
+ use_implicit_batch_(true),
+ max_workspace_size_(size_t(1) << 30) {}
+
+ /*!
+ * \brief The type key of the module.
+ *
+ * \return module type key.
+ */
+ const char* type_key() const override { return "tensorrt"; }
+
+ /*!
+ * \brief Initialize runtime. Create TensorRT layer from JSON
+ * representation.
+ *
+ * \param consts The constant params from compiled model.
+ */
+ void Init(const Array<NDArray>& consts) override {
+ CHECK_EQ(consts.size(), const_idx_.size())
+ << "The number of input constants must match the number of required.";
+ LoadGlobalAttributes();
+ if (GetCachedEnginesFromDisk()) return;
+ SetupConstants(consts);
+ BuildEngine();
+ CacheEngineToDisk();
+ }
+
+ void LoadGlobalAttributes() {
+ // These settings are global to the entire subgraph. Codegen will add them
as attributes to all
+ // op nodes. Read from first one.
+ for (size_t i = 0; i < nodes_.size(); ++i) {
+ if (nodes_[i].HasAttr("use_implicit_batch") &&
nodes_[i].HasAttr("max_workspace_size")) {
+ use_implicit_batch_ =
+
std::stoi(nodes_[i].GetAttr<std::vector<std::string>>("use_implicit_batch")[0]);
+ // Allow max_workspace_size to be overridden at runtime.
+ size_t runtime_max_workspace_size =
+ dmlc::GetEnv("TVM_TENSORRT_MAX_WORKSPACE_SIZE", size_t(0));
+ if (runtime_max_workspace_size != 0) {
+ max_workspace_size_ = runtime_max_workspace_size;
+ } else {
+ max_workspace_size_ =
+
std::stoul(nodes_[i].GetAttr<std::vector<std::string>>("max_workspace_size")[0]);
+ }
+ return;
+ }
+ }
+ }
+
+#ifdef TVM_GRAPH_RUNTIME_TENSORRT
+ /*! \brief Run inference using built engine. */
+ void Run() override {
+ auto& engine_and_context = trt_engine_cache_.at(symbol_name_);
+ auto engine = engine_and_context.engine;
+ auto context = engine_and_context.context;
+ std::vector<void*> bindings(engine->getNbBindings(), nullptr);
+
+ for (size_t i = 0; i < input_nodes_.size(); ++i) {
+ auto nid = input_nodes_[i];
+ if (nodes_[nid].GetOpType() == "input") {
Review comment:
There can also be weights when `GetOpType() == "const"`
##########
File path: src/runtime/contrib/tensorrt/tensorrt_ops.h
##########
@@ -0,0 +1,208 @@
+/* * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file runtime/contrib/tensorrt/tensorrt_ops.h
+ * \brief Converters from Relay ops into TensorRT layers. Converters should
+ * inherit from TrtOpConverter and implement the Convert() method.
+ */
+
+#ifndef TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_OPS_H_
+#define TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_OPS_H_
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "../json/json_node.h"
+#include "NvInfer.h"
+#include "tensorrt_utils.h"
+
+#if TRT_VERSION_GE(6, 0, 1)
+#define TRT_HAS_IMPLICIT_BATCH(params)
(params->network->hasImplicitBatchDimension())
+#else
+#define TRT_HAS_IMPLICIT_BATCH(params) (true)
+#endif
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+using JSONGraphNode = tvm::runtime::json::JSONGraphNode;
+
+/*!
+ * \brief An input to a op may be either kTensor in the case of
nvinfer::ITensor*
+ * or kWeight for nvinfer1::Weights.
+ */
+enum TrtInputType {
+ kTensor,
+ kWeight,
+};
+
+/*!
+ * \brief An input to a TrtOpConverter. The type of the input is either kTensor
+ * or kWeight. For kTensor, "tensor" contains the input tensor. For kWeight,
+ * "weight" contains the input weight and "weight_shape" contains the shape.
+ */
+struct TrtOpInput {
+ /*! \brief If type is kTensor, will store input tensor. */
+ nvinfer1::ITensor* tensor;
+
+ /*! \brief If type is kWeight, will store input weight. */
+ nvinfer1::Weights weight;
+
+ /*! \brief Whether the input is in tensor or weight. */
+ TrtInputType type;
+
+ /*! \brief If type is kWeight, will store weight shape. */
+ std::vector<int> weight_shape;
+
+ explicit TrtOpInput(nvinfer1::ITensor* tensor)
+ : tensor(tensor), weight({nvinfer1::DataType::kFLOAT, nullptr, 0}),
type(kTensor) {}
+ TrtOpInput(nvinfer1::Weights weight, const std::vector<int>& shape)
+ : tensor(nullptr), weight(weight), type(kWeight), weight_shape(shape) {}
+};
+
+/*! \brief Parameters to convert an Op from relay to TensorRT. */
+struct AddTrtLayerParams {
Review comment:
Renamed to `TensorRTOpConverterParams`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]