zheng-da commented on a change in pull request #11325: [MXNET-703] TensorRT
runtime integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r205225164
##########
File path: src/executor/graph_executor.cc
##########
@@ -941,6 +969,114 @@ void GraphExecutor::FinishInitGraph(nnvm::Symbol symbol,
this->InitOpSegs();
}
+/*!
+ * \brief This function is triggered after each tensorrt subgraph replacement
pass.
+ * Reset arguments of GraphExecutor::Init(...) as some variables (weights and
biases)
+ * are absorbed into the TRT engine it also it rerun attributes inferences
accordingly
+ * to the new topology.
+ */
+Graph GraphExecutor::ReinitGraph(Graph&& g, const Context &default_ctx,
+ const std::map<std::string, Context> &ctx_map,
+ std::vector<Context> *in_arg_ctxes,
+ std::vector<Context> *arg_grad_ctxes,
+ std::vector<Context> *aux_state_ctxes,
+ std::vector<OpReqType> *grad_req_types,
+ std::unordered_map<std::string, TShape>
*arg_shape_map,
+ std::unordered_map<std::string, int>
*arg_dtype_map,
+ std::unordered_map<std::string, int>
*arg_stype_map,
+ std::unordered_map<std::string, NDArray>
*params_map) {
+ std::unordered_set<std::string> to_remove_params;
+ for (auto& el : *params_map) {
+ to_remove_params.insert(el.first);
+ }
+
+ DFSVisit(g.outputs, [&to_remove_params](const nnvm::NodePtr n) {
+ to_remove_params.erase(n->attrs.name);
+ });
+
+ for (auto& el : to_remove_params) {
+ params_map->erase(el);
+ arg_shape_map->erase(el);
+ arg_dtype_map->erase(el);
+ arg_stype_map->erase(el);
+ }
+ const auto &idx = g.indexed_graph();
+ num_forward_inputs_ = idx.input_nodes().size();
+ in_arg_ctxes->resize(num_forward_inputs_ - idx.mutable_input_nodes().size());
+ arg_grad_ctxes->resize(num_forward_inputs_ -
idx.mutable_input_nodes().size());
+ grad_req_types->resize(num_forward_inputs_ -
idx.mutable_input_nodes().size());
+ aux_state_ctxes->resize(idx.mutable_input_nodes().size());
+
+ // create "device" and "context" attrs for the graph
+ g = AssignContext(g, default_ctx, ctx_map, *in_arg_ctxes, *arg_grad_ctxes,
+ *aux_state_ctxes, *grad_req_types, num_forward_inputs_,
+ num_forward_outputs_);
+
+ // get number of nodes used in forward pass
+ num_forward_nodes_ = 0;
+ for (size_t i = 0; i < num_forward_outputs_; ++i) {
+ num_forward_nodes_ = std::max(
+ num_forward_nodes_, static_cast<size_t>(idx.outputs()[i].node_id + 1));
+ }
+ nnvm::ShapeVector arg_shapes(idx.input_nodes().size(), TShape());
+ nnvm::DTypeVector arg_dtypes(idx.input_nodes().size(), -1);
+ StorageTypeVector arg_stypes(idx.input_nodes().size(), kUndefinedStorage);
+ for (size_t i = 0; i < num_forward_inputs_; ++i) {
+ const uint32_t nid = idx.input_nodes().at(i);
+ const std::string &name = idx[nid].source->attrs.name;
+ auto it1 = arg_shape_map->find(name);
+ if (arg_shape_map->end() != it1) {
+ arg_shapes[i] = it1->second;
+ }
+ auto it2 = arg_dtype_map->find(name);
+ if (arg_dtype_map->end() != it2) {
+ arg_dtypes[i] = it2->second;
+ }
+ auto it3 = arg_stype_map->find(name);
+ if (arg_stype_map->end() != it3) {
+ arg_stypes[i] = it3->second;
+ }
+ }
+ g = InferShape(std::move(g), std::move(arg_shapes), "__shape__");
+ if (g.GetAttr<size_t>("shape_num_unknown_nodes") != 0U) {
+ HandleInferShapeError(num_forward_inputs_, g.indexed_graph(),
+ g.GetAttr<nnvm::ShapeVector>("shape"));
+ }
+
+ g = InferType(std::move(g), std::move(arg_dtypes), "__dtype__");
+ if (g.GetAttr<size_t>("dtype_num_unknown_nodes") != 0U) {
+ HandleInferTypeError(num_forward_inputs_, g.indexed_graph(),
+ g.GetAttr<nnvm::DTypeVector>("dtype"));
+ }
+
+ g = InferStorageType(std::move(g), std::move(arg_stypes),
"__storage_type__");
+
+ if (g.GetAttr<size_t>("storage_type_num_unknown_nodes") != 0U) {
+ HandleInferStorageTypeError(num_forward_inputs_, g.indexed_graph(),
+ g.GetAttr<StorageTypeVector>("storage_type"));
+ }
+
+ return g;
+}
+
+/*!
+ * \brief Return the "optimized" symbol contained in _graph.
+ * For optimization pass such as TensorRT pass
+ */
+nnvm::Symbol GraphExecutor::GetOptimizedSymbol() {
+ Symbol ret;
+ ret.outputs = std::vector<nnvm::NodeEntry>(graph_.outputs.begin(),
+ graph_.outputs.begin() + num_forward_outputs_);
+ ret = ret.Copy();
+ static const Op* trt_op = Op::Get("_trt_op");
+ DFSVisit(ret.outputs, [](const nnvm::NodePtr n) {
+ if (n->op() == trt_op) {
+ n->attrs.dict.clear();
+ }
Review comment:
how does this optimize a graph?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services