[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-07-21 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r204224436
 
 

 ##
 File path: src/operator/contrib/nnvm_to_onnx.cc
 ##
 @@ -0,0 +1,545 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file trt.cc
+ * \brief TensorRT operation registration
+ * \author Marek Kolodziej, Clement Fuji Tsang
+*/
+
+#if MXNET_USE_TENSORRT
+
+#include "./nnvm_to_onnx-inl.h"
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./tensorrt-inl.h"
+#include "../../common/serialization.h"
+#include "../../common/utils.h"
+#include "../../ndarray/ndarray_function.h"
+#include "../../operator/nn/activation-inl.h"
+#include "../../operator/nn/batch_norm-inl.h"
+#include "../../operator/nn/convolution-inl.h"
+#include "../../operator/nn/fully_connected-inl.h"
+#include "../../operator/nn/pooling-inl.h"
+#include "../../operator/softmax_output-inl.h"
+
+// #include 
+
+namespace mxnet {
+namespace op {
+namespace nnvm_to_onnx {
+
+op::TRTParam ConvertNnvmGraphToOnnx(
+const nnvm::Graph& g,
+std::unordered_map* const shared_buffer) {
+op::TRTParam trt_param;
+op::tensorrt::NameToIdx_t trt_input_map;
+op::tensorrt::InferenceMap_t trt_output_map;
+
+  const nnvm::IndexedGraph& ig = g.indexed_graph();
+  const StorageTypeVector& storage_types =
+  g.GetAttr("storage_type");
+  const DTypeVector& dtypes = g.GetAttr("dtype");
+  const ShapeVector& shape_inputs = g.GetAttr("shape_inputs");
+
+  for (auto& e : storage_types) {
+if (e != mshadow::kFloat32) {
+  LOG(FATAL) << "ONNX converter does not support types other than float32 "
+"right now.";
+}
+  }
+
+  ModelProto model_proto;
+  // Need to determine IR versions and features to support
+  model_proto.set_ir_version(static_cast(2));
+  GraphProto* graph_proto = model_proto.mutable_graph();
+  //  graph_proto->set_name(graph_name);
+
+  std::unordered_map placeholder_shapes =
+  GetPlaceholderShapes(shape_inputs, ig);
+  std::unordered_map output_lookup = 
GetOutputLookup(ig);
+  uint32_t current_input = 0;
+
+  // can't do a foreach over IndexedGraph since it doesn't
+  // implement begin(), etc.
+  for (uint32_t node_idx = 0; node_idx < ig.num_nodes(); ++node_idx) {
+const IndexedGraph::Node& node = ig[node_idx];
+const nnvm::Node* source = node.source;
+const NodeAttrs& attrs = source->attrs;
+const Op* op = source->op();
+
+std::string node_name = attrs.name;
+// Here, "variable" actually means anything that's not an op,
+// i.e. a constant (weights) or a placeholder
+if (source->is_variable()) {
+  // Is this a placeholder?
+  if (shared_buffer->count(node_name) == 0) {
+// This fixes the problem with a SoftmaxOutput node during inference,
+// but it's hacky.
+// Need to figure out how to properly fix it.
+if (node_name.find("label") != std::string::npos) {
+  current_input++;
+  continue;
+}
+trt_input_map.emplace(node_name, current_input++);
+ConvertPlaceholder(node_name, placeholder_shapes, graph_proto);
+  } else {
+// If it's not a placeholder, then by exclusion it's a constant.
+ConvertConstant(graph_proto, node_name, shared_buffer);
+  }  // is_placeholder
+} else {
+  // It's an op, rather than a "variable" (constant or placeholder)
+  NodeProto* node_proto = graph_proto->add_node();
+  node_proto->set_name(node_name);
+  if (converter_map.count(op->name) == 0) {
+LOG(FATAL) << "Conversion for node of type " << op->name << " (node "
+   << node_name << ") "
+   << " is not supported yet.";
+  }
+  // Find function ptr to a converter based on the op name, and
+  // invoke the converter. This looks unsafe because find may not
+  // succeed, but it does because we're in the operator logic after
+  // testing that this node name does not represent a variable.
+  converter_map.find(op->name)->second(node_proto, attrs, 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-07-21 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r204224435
 
 

 ##
 File path: src/operator/contrib/nnvm_to_onnx.cc
 ##
 @@ -0,0 +1,545 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file trt.cc
+ * \brief TensorRT operation registration
+ * \author Marek Kolodziej, Clement Fuji Tsang
+*/
+
+#if MXNET_USE_TENSORRT
+
+#include "./nnvm_to_onnx-inl.h"
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./tensorrt-inl.h"
+#include "../../common/serialization.h"
+#include "../../common/utils.h"
+#include "../../ndarray/ndarray_function.h"
+#include "../../operator/nn/activation-inl.h"
+#include "../../operator/nn/batch_norm-inl.h"
+#include "../../operator/nn/convolution-inl.h"
+#include "../../operator/nn/fully_connected-inl.h"
+#include "../../operator/nn/pooling-inl.h"
+#include "../../operator/softmax_output-inl.h"
+
+// #include 
+
+namespace mxnet {
+namespace op {
+namespace nnvm_to_onnx {
+
+op::TRTParam ConvertNnvmGraphToOnnx(
+const nnvm::Graph& g,
+std::unordered_map* const shared_buffer) {
+op::TRTParam trt_param;
+op::tensorrt::NameToIdx_t trt_input_map;
+op::tensorrt::InferenceMap_t trt_output_map;
+
+  const nnvm::IndexedGraph& ig = g.indexed_graph();
+  const StorageTypeVector& storage_types =
+  g.GetAttr("storage_type");
+  const DTypeVector& dtypes = g.GetAttr("dtype");
+  const ShapeVector& shape_inputs = g.GetAttr("shape_inputs");
+
+  for (auto& e : storage_types) {
+if (e != mshadow::kFloat32) {
+  LOG(FATAL) << "ONNX converter does not support types other than float32 "
+"right now.";
+}
+  }
+
+  ModelProto model_proto;
+  // Need to determine IR versions and features to support
+  model_proto.set_ir_version(static_cast(2));
+  GraphProto* graph_proto = model_proto.mutable_graph();
+  //  graph_proto->set_name(graph_name);
+
+  std::unordered_map placeholder_shapes =
+  GetPlaceholderShapes(shape_inputs, ig);
+  std::unordered_map output_lookup = 
GetOutputLookup(ig);
+  uint32_t current_input = 0;
+
+  // can't do a foreach over IndexedGraph since it doesn't
+  // implement begin(), etc.
+  for (uint32_t node_idx = 0; node_idx < ig.num_nodes(); ++node_idx) {
+const IndexedGraph::Node& node = ig[node_idx];
+const nnvm::Node* source = node.source;
+const NodeAttrs& attrs = source->attrs;
+const Op* op = source->op();
+
+std::string node_name = attrs.name;
+// Here, "variable" actually means anything that's not an op,
+// i.e. a constant (weights) or a placeholder
+if (source->is_variable()) {
+  // Is this a placeholder?
+  if (shared_buffer->count(node_name) == 0) {
+// This fixes the problem with a SoftmaxOutput node during inference,
+// but it's hacky.
+// Need to figure out how to properly fix it.
+if (node_name.find("label") != std::string::npos) {
+  current_input++;
+  continue;
+}
+trt_input_map.emplace(node_name, current_input++);
+ConvertPlaceholder(node_name, placeholder_shapes, graph_proto);
+  } else {
+// If it's not a placeholder, then by exclusion it's a constant.
+ConvertConstant(graph_proto, node_name, shared_buffer);
+  }  // is_placeholder
+} else {
+  // It's an op, rather than a "variable" (constant or placeholder)
+  NodeProto* node_proto = graph_proto->add_node();
+  node_proto->set_name(node_name);
+  if (converter_map.count(op->name) == 0) {
+LOG(FATAL) << "Conversion for node of type " << op->name << " (node "
+   << node_name << ") "
+   << " is not supported yet.";
+  }
+  // Find function ptr to a converter based on the op name, and
+  // invoke the converter. This looks unsafe because find may not
+  // succeed, but it does because we're in the operator logic after
+  // testing that this node name does not represent a variable.
+  converter_map.find(op->name)->second(node_proto, attrs, 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-07-16 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r202838789
 
 

 ##
 File path: src/executor/graph_executor.cc
 ##
 @@ -1054,6 +1059,10 @@ Graph GraphExecutor::ReinitGraph(Graph&& g, const 
Context _ctx,
   return g;
 }
 
+/*!
+ * \brief Return the "optimzed" symbol contained in _graph.
 
 Review comment:
   @KellenSunderland Fixed, good catch!


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-07-16 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r202818415
 
 

 ##
 File path: src/executor/exec_pass.h
 ##
 @@ -198,6 +198,27 @@ Graph InferStorageType(Graph&& graph,
StorageTypeVector&& storage_type_inputs = 
StorageTypeVector(),
const std::string& storage_type_attr_key = "");
 
+/*! \brief The default storage type inference function, which assigns all 
undefined
+ * storage types to kDefaultStorage. If all of input and output 
storage types
+ * are kDefaultStorage, DispatchMode::kFCompute is assigned to 
dispatch_mode. Otherwise,
+ * DispatchMode::kFComputeFallback is assigned to dispatch_mode.
+ */
+bool DefaultStorageType(const nnvm::NodeAttrs& attrs,
+const int dev_mask,
+DispatchMode* dispatch_mode,
+std::vector *iattr,
+std::vector *oattr);
+
+/*!
+ * \brief Replace subgraphs by TRT (forward only)
+ */
+Graph ReplaceSubgraph(Graph&& g,
+  const std::unordered_set& set_subgraph,
+  std::unordered_map* const 
params_map);
+
+std::vector> GetTrtCompatibleSubsets(const 
Graph& g,
 
 Review comment:
   @KellenSunderland Would you still like that ifdef added at this point? Just 
checking if there is any action necessary before the PR is ready for merging.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-07-11 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r201778476
 
 

 ##
 File path: src/operator/contrib/tensorrt.cc
 ##
 @@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file trt.cc
+ * \brief TensorRT operation registration
+ * \author Marek Kolodziej, Clement Fuji Tsang
+*/
+
+#if MXNET_USE_TENSORRT
+
+#include "./tensorrt-inl.h"
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../../common/serialization.h"
+#include "../../common/utils.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(TRTParam);
+
+OpStatePtr GetPtrMapping(nvinfer1::ICudaEngine* trt_engine,
+ tensorrt::NameToIdx_t input_map,
+ tensorrt::NameToIdx_t output_map) {
+  TRTEngineParam param;
+  for (int b = 0; b < trt_engine->getNbBindings(); ++b) {
+const std::string& binding_name = trt_engine->getBindingName(b);
+if (trt_engine->bindingIsInput(b)) {
+  param.binding_map.emplace_back(input_map[binding_name],
+ tensorrt::TypeIO::Inputs);
+} else {
+  param.binding_map.emplace_back(output_map[binding_name],
+ tensorrt::TypeIO::Outputs);
+}
+  }
+  param.trt_executor = trt_engine->createExecutionContext();
+  return OpStatePtr::Create(param);
+}
+
+OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context ctx,
+  const std::vector& ishape,
+  const std::vector& itype) {
+  const TRTParam& node_param = nnvm::get(attrs.parsed);
+
+  ::onnx::ModelProto model_proto;
+  bool success = model_proto.ParseFromString(node_param.serialized_onnx_graph);
+  if (!success) {
+LOG(FATAL) << "Problems parsing serialized ONNX model.";
+  }
+  auto graph = model_proto.graph();
+  auto first_input_type = graph.input(0).type().tensor_type();
+  auto dim_value = first_input_type.shape().dim(0).dim_value();
+  uint64_t batch_size = static_cast(dim_value);
+  // Need to set up max workspace size based on device properties
+  nvinfer1::ICudaEngine* const trt_engine = ::onnx_to_tensorrt::onnxToTrtCtx(
+  node_param.serialized_onnx_graph, batch_size, 1 << 30);
+
+  LOG(INFO) << "TensorRT engine instantiated!!!";
+
+  tensorrt::NameToIdx_t output_map;
+  for (auto& el : node_param.output_map) {
+output_map[el.first] = std::get<0>(el.second);
+  }
+  return GetPtrMapping(trt_engine, node_param.input_map, output_map);
+}
+
+void TRTParamParser(nnvm::NodeAttrs* attrs) {
+  using namespace mshadow;
+
+  TRTParam param_;
+
+  try {
+param_.Init(attrs->dict);
+common::Deserialize(_.input_map, param_.serialized_input_map);
+common::Deserialize(_.output_map, param_.serialized_output_map);
+param_.onnx_pb_graph.ParseFromString(param_.serialized_onnx_graph);
+  } catch (const dmlc::ParamError& e) {
+std::ostringstream os;
+os << e.what();
+os << ", in operator " << attrs->op->name << "("
+   << "name=\"" << attrs->name << "\"";
+for (const auto& k : attrs->dict) {
+  os << ", " << k.first << "=\"" << k.second << "\"";
+}
+os << ")";
+throw dmlc::ParamError(os.str());
+  }
+
+  attrs->parsed = std::move(param_);
+}
+
+template <>
+void TRTCompute(const OpStatePtr& state, const OpContext& ctx,
+ const std::vector& inputs,
+ const std::vector& req,
+ const std::vector& outputs) {
+  LOG(FATAL) << "TRTCompute not implemented on the CPU";
+}
+
+inline bool TRTInferShape(const NodeAttrs& attrs, std::vector* 
in_shape,
+  std::vector* out_shape) {
+  const auto node_param = nnvm::get(attrs.parsed);
+  for (auto& el : node_param.output_map) {
+(*out_shape)[std::get<0>(el.second)] = std::get<1>(el.second);
+  }
+  return true;
+}
+
+inline bool TRTInferStorageType(const NodeAttrs& attrs, const int dev_mask,
+DispatchMode* dispatch_mode,
+std::vector* 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-07-11 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r201774632
 
 

 ##
 File path: Makefile
 ##
 @@ -94,6 +94,14 @@ else
 endif
 CFLAGS += -I$(TPARTYDIR)/mshadow/ -I$(TPARTYDIR)/dmlc-core/include -fPIC 
-I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -I$(TPARTYDIR)/tvm/include 
-Iinclude $(MSHADOW_CFLAGS)
 LDFLAGS = -pthread $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
+
+
+ifeq ($(USE_TENSORRT), 1)
 
 Review comment:
   @KellenSunderland ping ^^


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-07-10 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r201527141
 
 

 ##
 File path: src/operator/contrib/tensorrt.cc
 ##
 @@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file trt.cc
+ * \brief TensorRT operation registration
+ * \author Marek Kolodziej, Clement Fuji Tsang
+*/
+
+#if MXNET_USE_TENSORRT
+
+#include "./tensorrt-inl.h"
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../../common/serialization.h"
+#include "../../common/utils.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(TRTParam);
+
+OpStatePtr GetPtrMapping(nvinfer1::ICudaEngine* trt_engine,
+ tensorrt::NameToIdx_t input_map,
+ tensorrt::NameToIdx_t output_map) {
+  TRTEngineParam param;
+  for (int b = 0; b < trt_engine->getNbBindings(); ++b) {
+const std::string& binding_name = trt_engine->getBindingName(b);
+if (trt_engine->bindingIsInput(b)) {
+  param.binding_map.emplace_back(input_map[binding_name],
+ tensorrt::TypeIO::Inputs);
+} else {
+  param.binding_map.emplace_back(output_map[binding_name],
+ tensorrt::TypeIO::Outputs);
+}
+  }
+  param.trt_executor = trt_engine->createExecutionContext();
+  return OpStatePtr::Create(param);
+}
+
+OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context ctx,
+  const std::vector& ishape,
+  const std::vector& itype) {
+  const TRTParam& node_param = nnvm::get(attrs.parsed);
+
+  ::onnx::ModelProto model_proto;
+  bool success = model_proto.ParseFromString(node_param.serialized_onnx_graph);
+  if (!success) {
+LOG(FATAL) << "Problems parsing serialized ONNX model.";
+  }
+  auto graph = model_proto.graph();
+  auto first_input_type = graph.input(0).type().tensor_type();
+  auto dim_value = first_input_type.shape().dim(0).dim_value();
+  uint64_t batch_size = static_cast(dim_value);
+  // Need to set up max workspace size based on device properties
+  nvinfer1::ICudaEngine* const trt_engine = ::onnx_to_tensorrt::onnxToTrtCtx(
+  node_param.serialized_onnx_graph, batch_size, 1 << 30);
+
+  LOG(INFO) << "TensorRT engine instantiated!!!";
+
+  tensorrt::NameToIdx_t output_map;
+  for (auto& el : node_param.output_map) {
+output_map[el.first] = std::get<0>(el.second);
+  }
+  return GetPtrMapping(trt_engine, node_param.input_map, output_map);
+}
+
+void TRTParamParser(nnvm::NodeAttrs* attrs) {
+  using namespace mshadow;
+
+  TRTParam param_;
+
+  try {
+param_.Init(attrs->dict);
+common::Deserialize(_.input_map, param_.serialized_input_map);
+common::Deserialize(_.output_map, param_.serialized_output_map);
+param_.onnx_pb_graph.ParseFromString(param_.serialized_onnx_graph);
+  } catch (const dmlc::ParamError& e) {
+std::ostringstream os;
+os << e.what();
+os << ", in operator " << attrs->op->name << "("
+   << "name=\"" << attrs->name << "\"";
+for (const auto& k : attrs->dict) {
+  os << ", " << k.first << "=\"" << k.second << "\"";
+}
+os << ")";
+throw dmlc::ParamError(os.str());
+  }
+
+  attrs->parsed = std::move(param_);
+}
+
+template <>
+void TRTCompute(const OpStatePtr& state, const OpContext& ctx,
+ const std::vector& inputs,
+ const std::vector& req,
+ const std::vector& outputs) {
+  LOG(FATAL) << "TRTCompute not implemented on the CPU";
+}
+
+inline bool TRTInferShape(const NodeAttrs& attrs, std::vector* 
in_shape,
+  std::vector* out_shape) {
+  const auto node_param = nnvm::get(attrs.parsed);
+  for (auto& el : node_param.output_map) {
+(*out_shape)[std::get<0>(el.second)] = std::get<1>(el.second);
+  }
+  return true;
+}
+
+inline bool TRTInferStorageType(const NodeAttrs& attrs, const int dev_mask,
+DispatchMode* dispatch_mode,
+std::vector* 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-07-10 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r201476472
 
 

 ##
 File path: src/executor/tensorrt_pass.cc
 ##
 @@ -0,0 +1,583 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt_pass.cc
+ * \brief Replace TRT compatible subgraphs by TRT engines
+ * \author Clement Fuji Tsang
+ */
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./onnx_to_tensorrt.h"
+#include "./exec_pass.h"
+#include "../operator/contrib/nnvm_to_onnx-inl.h"
+
+namespace mxnet {
+namespace exec {
+
+using NodePtr = nnvm::NodePtr;
+
+/*!
+ * \brief Custom graph class, which will contain bi-directional nodes
+ * we need to compute DFS and reverse DFS for graph partitioning
+ */
+class BidirectionalGraph {
+ public:
+  struct Node {
+nnvm::Node* nnvmptr;
+std::vector inputs;
+std::vector outputs;
+  };
+  std::vector nodes;
+  std::unordered_map nnvm2nid;
+  std::vector outputs;
+  static const std::unordered_set unconditionalTRTop;
+
+  explicit BidirectionalGraph(const Graph ) {
+auto& idx = g.indexed_graph();
+auto num_nodes = idx.num_nodes();
+nodes.reserve(num_nodes);
+nnvm2nid.reserve(num_nodes);
 
 Review comment:
   @reminisce The order is the same as in the IndexedGraph, but we need to know 
both the children and the parents, instead of just the parents as in the 
IndexedGraph.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-30 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199333477
 
 

 ##
 File path: example/image-classification/tensorrt/test_tensorrt_resnet50.py
 ##
 @@ -0,0 +1,186 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+
+import os.path
+import subprocess
 
 Review comment:
   Fixed.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-30 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199333432
 
 

 ##
 File path: example/image-classification/tensorrt/test_tensorrt_resnet50.py
 ##
 @@ -0,0 +1,186 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+
+import os.path
+import subprocess
+import mxnet as mx
+import numpy as np
+from time import time
+import sys
+import urllib
 
 Review comment:
   It's not used anymore (it used to be). Good catch! It's fixed now.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-30 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199333403
 
 

 ##
 File path: src/operator/contrib/tensorrt-inl.h
 ##
 @@ -0,0 +1,140 @@
+#ifndef MXNET_OPERATOR_CONTRIB_TENSORRT_INL_H_
+#define MXNET_OPERATOR_CONTRIB_TENSORRT_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt-inl.h
+ * \brief TensorRT Operator
+ * \author Marek Kolodziej, Clement Fuji Tsang
+*/
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../operator_common.h"
+#include "../../common/utils.h"
+#include "../../common/serialization.h"
+#include "../../executor/exec_pass.h"
+#include "../../executor/graph_executor.h"
+#include "../../executor/onnx_to_tensorrt.h"
+
+namespace mxnet {
+namespace op {
+
+using namespace nnvm;
+using namespace ::onnx;
+using int64 = ::google::protobuf::int64;
+
+namespace tensorrt {
+  enum class TypeIO { Inputs = 0, Outputs = 1 };
+  using NameToIdx_t = std::map;
+  using InferenceTuple_t = std::tuple;
+  using InferenceMap_t = std::map;
+}  // namespace tensorrt
+
+using trt_name_to_idx = std::map;
+
+struct TRTParam : public dmlc::Parameter {
+  std::string serialized_onnx_graph;
+  std::string serialized_input_map;
+  std::string serialized_output_map;
+  tensorrt::NameToIdx_t input_map;
+  tensorrt::InferenceMap_t output_map;
+  ::onnx::ModelProto onnx_pb_graph;
+
+  TRTParam() {}
+
+  TRTParam(const ::onnx::ModelProto& onnx_graph,
+   const tensorrt::InferenceMap_t& input_map,
+   const tensorrt::NameToIdx_t& output_map) {
+common::Serialize(input_map, _input_map);
+common::Serialize(output_map, _output_map);
+onnx_graph.SerializeToString(_onnx_graph);
+  }
+
+DMLC_DECLARE_PARAMETER(TRTParam) {
+DMLC_DECLARE_FIELD(serialized_onnx_graph)
+.describe("Serialized ONNX graph");
+DMLC_DECLARE_FIELD(serialized_input_map)
+.describe("Map from inputs to topological order as input.");
+DMLC_DECLARE_FIELD(serialized_output_map)
+.describe("Map from outputs to order in g.outputs.");
+  }
+};
+
+struct TRTEngineParam {
+  nvinfer1::IExecutionContext* trt_executor;
+  std::vector > binding_map;
+};
+
+OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context ctx,
+  const std::vector& ishape,
+  const std::vector& itype);
+
+template
+void TRTCompute(const OpStatePtr& state, const OpContext& ctx,
 
 Review comment:
   Done.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-30 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r19994
 
 

 ##
 File path: src/executor/tensorrt_pass.cc
 ##
 @@ -0,0 +1,583 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt_pass.cc
+ * \brief Replace TRT compatible subgraphs by TRT engines
+ * \author Clement Fuji Tsang
+ */
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./onnx_to_tensorrt.h"
+#include "./exec_pass.h"
+#include "../operator/contrib/nnvm_to_onnx-inl.h"
+
+namespace mxnet {
+namespace exec {
+
+using NodePtr = nnvm::NodePtr;
+
+/*!
+ * \brief Custom graph class, which will contain bi-directional nodes
+ * we need to compute DFS and reverse DFS for graph partitioning
+ */
+class BidirectionalGraph {
+ public:
+  struct Node {
+nnvm::Node* nnvmptr;
+std::vector inputs;
+std::vector outputs;
+  };
+  std::vector nodes;
+  std::unordered_map nnvm2nid;
+  std::vector outputs;
+  static const std::unordered_set unconditionalTRTop;
+
+  explicit BidirectionalGraph(const Graph ) {
+auto& idx = g.indexed_graph();
+auto num_nodes = idx.num_nodes();
+nodes.reserve(num_nodes);
+nnvm2nid.reserve(num_nodes);
+outputs.reserve(idx.outputs().size());
+DFSVisit(g.outputs, [this](const nnvm::NodePtr& n) {
+  BidirectionalGraph::Node new_node;
+  new_node.nnvmptr = n.get();
+  nnvm2nid[n.get()] = static_cast(nodes.size());
+  nodes.emplace_back(std::move(new_node));
+});
+for (const auto& it : nnvm2nid) {
+  nnvm::Node* nnvmnode = it.first;
+  uint32_t nid = it.second;
+  for (auto& n : nnvmnode->inputs) {
+uint32_t input_nid = nnvm2nid[n.node.get()];
+nodes[input_nid].outputs.emplace_back([nid]);
+nodes[nid].inputs.emplace_back([input_nid]);
+  }
+}
+for (auto& e : g.outputs) {
+  uint32_t nid = nnvm2nid[e.node.get()];
+  outputs.emplace_back([nid]);
+}
+  }
+
+  template 
+  void DFS(const std::vector& heads, bool reverse, FVisit fvisit) {
+std::unordered_set visited;
+std::deque stack(heads.begin(), heads.end());
+visited.reserve(heads.size());
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (visited.count(vertex) == 0) {
+visited.insert(vertex);
+fvisit(vertex);
+std::vector nexts = reverse ? vertex->inputs : vertex->outputs;
+for (Node* node : nexts) {
+  if (visited.count(node) == 0) {
+stack.emplace_back(node);
+  }
+}
+  }
+}
+  }
+
+  using t_pairset = std::pair, 
std::unordered_set>;
+  using t_pairvec = std::pair, std::vector>;
+  using t_uncomp_map = std::unordered_map>;
+
+  std::unordered_set naive_grow_subgraph(Node* head,
+std::unordered_set* 
set_unused,
+t_uncomp_map* uncomp_map) {
+std::unordered_set subgraph;
+std::unordered_set uncomp_set;
+std::deque stack;
+stack.emplace_back(head);
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (set_unused->count(vertex) && !uncomp_set.count(vertex)) {
+set_unused->erase(vertex);
+subgraph.insert(vertex);
+uncomp_set.insert((*uncomp_map)[vertex].begin(), 
(*uncomp_map)[vertex].end());
+for (Node* input : vertex->inputs) {
+  if (set_unused->count(input) && !uncomp_set.count(input)) {
+stack.emplace_back(input);
+  }
+}
+for (Node* output : vertex->outputs) {
+  if (set_unused->count(output) && !uncomp_set.count(output)) {
+stack.emplace_back(output);
+  }
+}
+  }
+}
+return subgraph;
+  }
+
+  std::vector> get_subsets(
+std::unordered_map* const params_map) {
+std::vector> subgraphs;
+std::unordered_set set_nonTRTnodes;
+std::unordered_set set_allnodes(nodes.size());
+std::vector separation_sets;
+for (Node& node : nodes) 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-30 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r19988
 
 

 ##
 File path: src/operator/contrib/tensorrt-inl.h
 ##
 @@ -0,0 +1,140 @@
+#ifndef MXNET_OPERATOR_CONTRIB_TENSORRT_INL_H_
+#define MXNET_OPERATOR_CONTRIB_TENSORRT_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt-inl.h
+ * \brief TensorRT Operator
+ * \author Marek Kolodziej, Clement Fuji Tsang
+*/
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../operator_common.h"
+#include "../../common/utils.h"
+#include "../../common/serialization.h"
+#include "../../executor/exec_pass.h"
+#include "../../executor/graph_executor.h"
+#include "../../executor/onnx_to_tensorrt.h"
+
+namespace mxnet {
+namespace op {
+
+using namespace nnvm;
+using namespace ::onnx;
+using int64 = ::google::protobuf::int64;
+
+namespace tensorrt {
+  enum class TypeIO { Inputs = 0, Outputs = 1 };
+  using NameToIdx_t = std::map;
+  using InferenceTuple_t = std::tuple;
+  using InferenceMap_t = std::map;
+}  // namespace tensorrt
+
+using trt_name_to_idx = std::map;
+
+struct TRTParam : public dmlc::Parameter {
+  std::string serialized_onnx_graph;
+  std::string serialized_input_map;
+  std::string serialized_output_map;
+  tensorrt::NameToIdx_t input_map;
+  tensorrt::InferenceMap_t output_map;
+  ::onnx::ModelProto onnx_pb_graph;
+
+  TRTParam() {}
+
+  TRTParam(const ::onnx::ModelProto& onnx_graph,
+   const tensorrt::InferenceMap_t& input_map,
+   const tensorrt::NameToIdx_t& output_map) {
+common::Serialize(input_map, _input_map);
+common::Serialize(output_map, _output_map);
+onnx_graph.SerializeToString(_onnx_graph);
+  }
+
+DMLC_DECLARE_PARAMETER(TRTParam) {
+DMLC_DECLARE_FIELD(serialized_onnx_graph)
+.describe("Serialized ONNX graph");
+DMLC_DECLARE_FIELD(serialized_input_map)
+.describe("Map from inputs to topological order as input.");
+DMLC_DECLARE_FIELD(serialized_output_map)
+.describe("Map from outputs to order in g.outputs.");
+  }
+};
+
+struct TRTEngineParam {
+  nvinfer1::IExecutionContext* trt_executor;
+  std::vector > binding_map;
+};
+
+OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context ctx,
+  const std::vector& ishape,
+  const std::vector& itype);
+
+template
+void TRTCompute(const OpStatePtr& state, const OpContext& ctx,
+const std::vector& inputs, const 
std::vector& req,
+const std::vector& outputs);
+
+inline bool TRTInferShape(const NodeAttrs& attrs,
 
 Review comment:
   Done.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-30 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199331266
 
 

 ##
 File path: src/executor/graph_executor.cc
 ##
 @@ -940,6 +968,91 @@ void GraphExecutor::FinishInitGraph(nnvm::Symbol symbol,
   this->InitOpSegs();
 }
 
+
+Graph GraphExecutor::ReinitGraph(Graph&& g, const Context _ctx,
 
 Review comment:
   Done.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-29 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199231609
 
 

 ##
 File path: src/executor/tensorrt_pass.cc
 ##
 @@ -0,0 +1,583 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt_pass.cc
+ * \brief Replace TRT compatible subgraphs by TRT engines
+ * \author Clement Fuji Tsang
+ */
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./onnx_to_tensorrt.h"
+#include "./exec_pass.h"
+#include "../operator/contrib/nnvm_to_onnx-inl.h"
+
+namespace mxnet {
+namespace exec {
+
+using NodePtr = nnvm::NodePtr;
+
+/*!
+ * \brief Custom graph class, which will contain bi-directional nodes
+ * we need to compute DFS and reverse DFS for graph partitioning
+ */
+class BidirectionalGraph {
+ public:
+  struct Node {
+nnvm::Node* nnvmptr;
+std::vector inputs;
+std::vector outputs;
+  };
+  std::vector nodes;
+  std::unordered_map nnvm2nid;
+  std::vector outputs;
+  static const std::unordered_set unconditionalTRTop;
+
+  explicit BidirectionalGraph(const Graph ) {
+auto& idx = g.indexed_graph();
+auto num_nodes = idx.num_nodes();
+nodes.reserve(num_nodes);
+nnvm2nid.reserve(num_nodes);
+outputs.reserve(idx.outputs().size());
+DFSVisit(g.outputs, [this](const nnvm::NodePtr& n) {
+  BidirectionalGraph::Node new_node;
+  new_node.nnvmptr = n.get();
+  nnvm2nid[n.get()] = static_cast(nodes.size());
+  nodes.emplace_back(std::move(new_node));
+});
+for (const auto& it : nnvm2nid) {
+  nnvm::Node* nnvmnode = it.first;
+  uint32_t nid = it.second;
+  for (auto& n : nnvmnode->inputs) {
+uint32_t input_nid = nnvm2nid[n.node.get()];
+nodes[input_nid].outputs.emplace_back([nid]);
+nodes[nid].inputs.emplace_back([input_nid]);
+  }
+}
+for (auto& e : g.outputs) {
+  uint32_t nid = nnvm2nid[e.node.get()];
+  outputs.emplace_back([nid]);
+}
+  }
+
+  template 
+  void DFS(const std::vector& heads, bool reverse, FVisit fvisit) {
+std::unordered_set visited;
+std::deque stack(heads.begin(), heads.end());
+visited.reserve(heads.size());
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (visited.count(vertex) == 0) {
+visited.insert(vertex);
+fvisit(vertex);
+std::vector nexts = reverse ? vertex->inputs : vertex->outputs;
+for (Node* node : nexts) {
+  if (visited.count(node) == 0) {
+stack.emplace_back(node);
+  }
+}
+  }
+}
+  }
+
+  using t_pairset = std::pair, 
std::unordered_set>;
+  using t_pairvec = std::pair, std::vector>;
+  using t_uncomp_map = std::unordered_map>;
+
+  std::unordered_set naive_grow_subgraph(Node* head,
+std::unordered_set* 
set_unused,
+t_uncomp_map* uncomp_map) {
+std::unordered_set subgraph;
+std::unordered_set uncomp_set;
+std::deque stack;
+stack.emplace_back(head);
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (set_unused->count(vertex) && !uncomp_set.count(vertex)) {
+set_unused->erase(vertex);
+subgraph.insert(vertex);
+uncomp_set.insert((*uncomp_map)[vertex].begin(), 
(*uncomp_map)[vertex].end());
+for (Node* input : vertex->inputs) {
+  if (set_unused->count(input) && !uncomp_set.count(input)) {
+stack.emplace_back(input);
+  }
+}
+for (Node* output : vertex->outputs) {
+  if (set_unused->count(output) && !uncomp_set.count(output)) {
+stack.emplace_back(output);
+  }
+}
+  }
+}
+return subgraph;
+  }
+
+  std::vector> get_subsets(
+std::unordered_map* const params_map) {
+std::vector> subgraphs;
+std::unordered_set set_nonTRTnodes;
+std::unordered_set set_allnodes(nodes.size());
+std::vector separation_sets;
+for (Node& node : nodes) 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-28 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199008230
 
 

 ##
 File path: src/executor/tensorrt_pass.cc
 ##
 @@ -0,0 +1,583 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt_pass.cc
+ * \brief Replace TRT compatible subgraphs by TRT engines
+ * \author Clement Fuji Tsang
+ */
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./onnx_to_tensorrt.h"
+#include "./exec_pass.h"
+#include "../operator/contrib/nnvm_to_onnx-inl.h"
+
+namespace mxnet {
+namespace exec {
+
+using NodePtr = nnvm::NodePtr;
+
+/*!
+ * \brief Custom graph class, which will contain bi-directional nodes
+ * we need to compute DFS and reverse DFS for graph partitioning
+ */
+class BidirectionalGraph {
+ public:
+  struct Node {
+nnvm::Node* nnvmptr;
+std::vector inputs;
+std::vector outputs;
+  };
+  std::vector nodes;
+  std::unordered_map nnvm2nid;
+  std::vector outputs;
+  static const std::unordered_set unconditionalTRTop;
+
+  explicit BidirectionalGraph(const Graph ) {
+auto& idx = g.indexed_graph();
+auto num_nodes = idx.num_nodes();
+nodes.reserve(num_nodes);
+nnvm2nid.reserve(num_nodes);
+outputs.reserve(idx.outputs().size());
+DFSVisit(g.outputs, [this](const nnvm::NodePtr& n) {
+  BidirectionalGraph::Node new_node;
+  new_node.nnvmptr = n.get();
+  nnvm2nid[n.get()] = static_cast(nodes.size());
+  nodes.emplace_back(std::move(new_node));
+});
+for (const auto& it : nnvm2nid) {
+  nnvm::Node* nnvmnode = it.first;
+  uint32_t nid = it.second;
+  for (auto& n : nnvmnode->inputs) {
+uint32_t input_nid = nnvm2nid[n.node.get()];
+nodes[input_nid].outputs.emplace_back([nid]);
+nodes[nid].inputs.emplace_back([input_nid]);
+  }
+}
+for (auto& e : g.outputs) {
+  uint32_t nid = nnvm2nid[e.node.get()];
+  outputs.emplace_back([nid]);
+}
+  }
+
+  template 
+  void DFS(const std::vector& heads, bool reverse, FVisit fvisit) {
+std::unordered_set visited;
+std::deque stack(heads.begin(), heads.end());
 
 Review comment:
   Good catch, will change to std::stack in this case. There were other cases 
for std::deque in the code, but this wasn't one of them.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-28 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199006909
 
 

 ##
 File path: src/executor/graph_executor.cc
 ##
 @@ -940,6 +968,91 @@ void GraphExecutor::FinishInitGraph(nnvm::Symbol symbol,
   this->InitOpSegs();
 }
 
+
+Graph GraphExecutor::ReinitGraph(Graph&& g, const Context _ctx,
 
 Review comment:
   Will do.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-28 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r199005691
 
 

 ##
 File path: src/executor/exec_pass.h
 ##
 @@ -198,6 +198,27 @@ Graph InferStorageType(Graph&& graph,
StorageTypeVector&& storage_type_inputs = 
StorageTypeVector(),
const std::string& storage_type_attr_key = "");
 
+/*! \brief The default storage type inference function, which assigns all 
undefined
+ * storage types to kDefaultStorage. If all of input and output 
storage types
+ * are kDefaultStorage, DispatchMode::kFCompute is assigned to 
dispatch_mode. Otherwise,
+ * DispatchMode::kFComputeFallback is assigned to dispatch_mode.
+ */
+bool DefaultStorageType(const nnvm::NodeAttrs& attrs,
+const int dev_mask,
+DispatchMode* dispatch_mode,
+std::vector *iattr,
+std::vector *oattr);
+
+/*!
+ * \brief Replace subgraphs by TRT (forward only)
+ */
+Graph ReplaceSubgraph(Graph&& g,
+  std::unordered_set set_subgraph,
 
 Review comment:
   Changing to const&


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-26 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r198323659
 
 

 ##
 File path: example/image-classification/tensorrt/test_tensorrt_resnet50.py
 ##
 @@ -0,0 +1,186 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+
+import os.path
+import subprocess
 
 Review comment:
   It isn't anymore, used to be, good catch!


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-26 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r198308855
 
 

 ##
 File path: src/operator/contrib/tensorrt-inl.h
 ##
 @@ -0,0 +1,140 @@
+#ifndef MXNET_OPERATOR_CONTRIB_TENSORRT_INL_H_
+#define MXNET_OPERATOR_CONTRIB_TENSORRT_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt-inl.h
+ * \brief TensorRT Operator
+ * \author Marek Kolodziej, Clement Fuji Tsang
+*/
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../operator_common.h"
+#include "../../common/utils.h"
+#include "../../common/serialization.h"
+#include "../../executor/exec_pass.h"
+#include "../../executor/graph_executor.h"
+#include "../../executor/onnx_to_tensorrt.h"
+
+namespace mxnet {
+namespace op {
+
+using namespace nnvm;
+using namespace ::onnx;
+using int64 = ::google::protobuf::int64;
+
+namespace tensorrt {
+  enum class TypeIO { Inputs = 0, Outputs = 1 };
+  using NameToIdx_t = std::map;
+  using InferenceTuple_t = std::tuple;
+  using InferenceMap_t = std::map;
+}  // namespace tensorrt
+
+using trt_name_to_idx = std::map;
+
+struct TRTParam : public dmlc::Parameter {
+  std::string serialized_onnx_graph;
+  std::string serialized_input_map;
+  std::string serialized_output_map;
+  tensorrt::NameToIdx_t input_map;
+  tensorrt::InferenceMap_t output_map;
+  ::onnx::ModelProto onnx_pb_graph;
+
+  TRTParam() {}
+
+  TRTParam(const ::onnx::ModelProto& onnx_graph,
+   const tensorrt::InferenceMap_t& input_map,
+   const tensorrt::NameToIdx_t& output_map) {
+common::Serialize(input_map, _input_map);
+common::Serialize(output_map, _output_map);
+onnx_graph.SerializeToString(_onnx_graph);
+  }
+
+DMLC_DECLARE_PARAMETER(TRTParam) {
+DMLC_DECLARE_FIELD(serialized_onnx_graph)
+.describe("Serialized ONNX graph");
+DMLC_DECLARE_FIELD(serialized_input_map)
+.describe("Map from inputs to topological order as input.");
+DMLC_DECLARE_FIELD(serialized_output_map)
+.describe("Map from outputs to order in g.outputs.");
+  }
+};
+
+struct TRTEngineParam {
+  nvinfer1::IExecutionContext* trt_executor;
+  std::vector > binding_map;
+};
+
+OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context ctx,
+  const std::vector& ishape,
+  const std::vector& itype);
+
+template
+void TRTCompute(const OpStatePtr& state, const OpContext& ctx,
 
 Review comment:
   @reminisce @KellenSunderland Makes sense, thanks!


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-26 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r198306248
 
 

 ##
 File path: include/mxnet/executor.h
 ##
 @@ -152,19 +152,19 @@ class Executor {
   static Executor* SimpleBind(nnvm::Symbol symbol,
 
 Review comment:
   Thanks @piiswrong !


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-26 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r198281148
 
 

 ##
 File path: src/common/serialization.h
 ##
 @@ -0,0 +1,526 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file serialization.h
+ * \brief Serialization of some STL and nnvm data-structures
+ * \author Clement Fuji Tsang
+ */
+
+#ifndef MXNET_COMMON_SERIALIZATION_H_
+#define MXNET_COMMON_SERIALIZATION_H_
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+namespace mxnet {
+namespace common {
+
+template
+inline size_t serialized_size(const T& obj);
+
+template
+inline size_t serialized_size(const nnvm::Tuple& obj);
+
+template
+inline size_t serialized_size(const std::vector& obj);
+
+template
+inline size_t serialized_size(const std::pair& obj);
+
+template
+inline size_t serialized_size(const std::map& obj);
+
+template
+inline size_t serialized_size(const std::unordered_map& obj);
+
+template
+inline size_t serialized_size(const std::set& obj);
+
+template
+inline size_t serialized_size(const std::unordered_set& obj);
+
+template<>
+inline size_t serialized_size(const std::string& obj);
+
+template
+inline size_t serialized_size(const std::tuple& obj);
+
+template
+inline void serialize(const T& obj, char** buffer);
+
+template
+inline void serialize(const nnvm::Tuple& obj, char** buffer);
+
+template
+inline void serialize(const std::vector& obj, char** buffer);
+
+template
+inline void serialize(const std::pair& obj, char** buffer);
+
+template
+inline void serialize(const std::map& obj, char** buffer);
+
+template
+inline void serialize(const std::unordered_map& obj, char** buffer);
+
+template
+inline void serialize(const std::set& obj, char** buffer);
+
+template
+inline void serialize(const std::unordered_set& obj, char** buffer);
+
+template<>
+inline void serialize(const std::string& obj, char** buffer);
+
+template
+inline void serialize(const std::tuple& obj, char** buffer);
+
+template
+inline void deserialize(T* obj, const std::string& buffer, size_t* curr_pos);
+
+template
+inline void deserialize(nnvm::Tuple* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::vector* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::pair* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::map* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::unordered_map* obj, const std::string& 
buffer, size_t* curr_pos);
+
+template
+inline void deserialize(std::set* obj, const std::string& buffer, size_t* 
curr_pos);
+
+template
+inline void deserialize(std::unordered_set* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template<>
+inline void deserialize(std::string* obj, const std::string& buffer, size_t* 
curr_pos);
+
+template
+inline void deserialize(std::tuple* obj, const std::string& buffer, 
size_t* curr_pos);
+
+
+template
+struct is_cont {
+  static const bool value = !std::is_pod::value;
+};
+
+template
+inline size_t serialized_size(const T& obj) {
+  return sizeof(T);
+}
+
+template
+inline size_t serialized_size(const nnvm::Tuple& obj) {
+  if (is_cont::value) {
+size_t sum_val = 4;
+for (auto& el : obj) {
+  sum_val += serialized_size(el);
+}
+return sum_val;
+  } else {
+return 4 + (obj.ndim() * sizeof(T));
+  }
+}
+
+template
+inline size_t serialized_size(const std::vector& obj) {
+  if (is_cont::value) {
+size_t sum_val = 4;
+for (T i : obj) {
+  sum_val += serialized_size(i);
+}
+return sum_val;
+  } else {
+return sizeof(T) * obj.size() + 4;
+  }
+}
+
+template
+inline size_t serialized_size(const std::pair& obj) {
+  return serialized_size(obj.first) + serialized_size(obj.second);
+}
+
+template
+inline size_t serialized_size(const std::map& obj) {
+  size_t sum_val = 4;
+  if (is_cont::value && is_cont::value) {
+for (auto p : obj) {
+  sum_val += 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-26 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197967101
 
 

 ##
 File path: src/common/serialization.h
 ##
 @@ -0,0 +1,526 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file serialization.h
+ * \brief Serialization of some STL and nnvm data-structures
+ * \author Clement Fuji Tsang
+ */
+
+#ifndef MXNET_COMMON_SERIALIZATION_H_
+#define MXNET_COMMON_SERIALIZATION_H_
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+namespace mxnet {
+namespace common {
+
+template
+inline size_t serialized_size(const T& obj);
+
+template
+inline size_t serialized_size(const nnvm::Tuple& obj);
+
+template
+inline size_t serialized_size(const std::vector& obj);
+
+template
+inline size_t serialized_size(const std::pair& obj);
+
+template
+inline size_t serialized_size(const std::map& obj);
+
+template
+inline size_t serialized_size(const std::unordered_map& obj);
+
+template
+inline size_t serialized_size(const std::set& obj);
+
+template
+inline size_t serialized_size(const std::unordered_set& obj);
+
+template<>
+inline size_t serialized_size(const std::string& obj);
+
+template
+inline size_t serialized_size(const std::tuple& obj);
+
+template
+inline void serialize(const T& obj, char** buffer);
+
+template
+inline void serialize(const nnvm::Tuple& obj, char** buffer);
+
+template
+inline void serialize(const std::vector& obj, char** buffer);
+
+template
+inline void serialize(const std::pair& obj, char** buffer);
+
+template
+inline void serialize(const std::map& obj, char** buffer);
+
+template
+inline void serialize(const std::unordered_map& obj, char** buffer);
+
+template
+inline void serialize(const std::set& obj, char** buffer);
+
+template
+inline void serialize(const std::unordered_set& obj, char** buffer);
+
+template<>
+inline void serialize(const std::string& obj, char** buffer);
+
+template
+inline void serialize(const std::tuple& obj, char** buffer);
+
+template
+inline void deserialize(T* obj, const std::string& buffer, size_t* curr_pos);
+
+template
+inline void deserialize(nnvm::Tuple* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::vector* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::pair* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::map* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::unordered_map* obj, const std::string& 
buffer, size_t* curr_pos);
+
+template
+inline void deserialize(std::set* obj, const std::string& buffer, size_t* 
curr_pos);
+
+template
+inline void deserialize(std::unordered_set* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template<>
+inline void deserialize(std::string* obj, const std::string& buffer, size_t* 
curr_pos);
+
+template
+inline void deserialize(std::tuple* obj, const std::string& buffer, 
size_t* curr_pos);
+
+
+template
+struct is_cont {
+  static const bool value = !std::is_pod::value;
+};
+
+template
+inline size_t serialized_size(const T& obj) {
+  return sizeof(T);
+}
+
+template
+inline size_t serialized_size(const nnvm::Tuple& obj) {
+  if (is_cont::value) {
+size_t sum_val = 4;
+for (auto& el : obj) {
+  sum_val += serialized_size(el);
+}
+return sum_val;
+  } else {
+return 4 + (obj.ndim() * sizeof(T));
+  }
+}
+
+template
+inline size_t serialized_size(const std::vector& obj) {
+  if (is_cont::value) {
+size_t sum_val = 4;
+for (T i : obj) {
+  sum_val += serialized_size(i);
+}
+return sum_val;
+  } else {
+return sizeof(T) * obj.size() + 4;
+  }
+}
+
+template
+inline size_t serialized_size(const std::pair& obj) {
+  return serialized_size(obj.first) + serialized_size(obj.second);
+}
+
+template
+inline size_t serialized_size(const std::map& obj) {
+  size_t sum_val = 4;
+  if (is_cont::value && is_cont::value) {
+for (auto p : obj) {
+  sum_val += 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197972947
 
 

 ##
 File path: Makefile
 ##
 @@ -94,6 +94,14 @@ else
 endif
 CFLAGS += -I$(TPARTYDIR)/mshadow/ -I$(TPARTYDIR)/dmlc-core/include -fPIC 
-I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -I$(TPARTYDIR)/tvm/include 
-Iinclude $(MSHADOW_CFLAGS)
 LDFLAGS = -pthread $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
+
+
+ifeq ($(USE_TENSORRT), 1)
 
 Review comment:
   @KellenSunderland I agree. Should the CMake build be part of the initial PR 
or a subsequent one?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197972245
 
 

 ##
 File path: src/executor/tensorrt_pass.cc
 ##
 @@ -0,0 +1,583 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt_pass.cc
+ * \brief Replace TRT compatible subgraphs by TRT engines
+ * \author Clement Fuji Tsang
+ */
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./onnx_to_tensorrt.h"
+#include "./exec_pass.h"
+#include "../operator/contrib/nnvm_to_onnx-inl.h"
+
+namespace mxnet {
+namespace exec {
+
+using NodePtr = nnvm::NodePtr;
+
+/*!
+ * \brief Custom graph class, which will contain bi-directional nodes
+ * we need to compute DFS and reverse DFS for graph partitioning
+ */
+class BidirectionalGraph {
+ public:
+  struct Node {
+nnvm::Node* nnvmptr;
+std::vector inputs;
+std::vector outputs;
+  };
+  std::vector nodes;
+  std::unordered_map nnvm2nid;
+  std::vector outputs;
+  static const std::unordered_set unconditionalTRTop;
+
+  explicit BidirectionalGraph(const Graph ) {
+auto& idx = g.indexed_graph();
+auto num_nodes = idx.num_nodes();
+nodes.reserve(num_nodes);
+nnvm2nid.reserve(num_nodes);
+outputs.reserve(idx.outputs().size());
+DFSVisit(g.outputs, [this](const nnvm::NodePtr& n) {
+  BidirectionalGraph::Node new_node;
+  new_node.nnvmptr = n.get();
+  nnvm2nid[n.get()] = static_cast(nodes.size());
+  nodes.emplace_back(std::move(new_node));
+});
+for (const auto& it : nnvm2nid) {
+  nnvm::Node* nnvmnode = it.first;
+  uint32_t nid = it.second;
+  for (auto& n : nnvmnode->inputs) {
+uint32_t input_nid = nnvm2nid[n.node.get()];
+nodes[input_nid].outputs.emplace_back([nid]);
+nodes[nid].inputs.emplace_back([input_nid]);
+  }
+}
+for (auto& e : g.outputs) {
+  uint32_t nid = nnvm2nid[e.node.get()];
+  outputs.emplace_back([nid]);
+}
+  }
+
+  template 
+  void DFS(const std::vector& heads, bool reverse, FVisit fvisit) {
+std::unordered_set visited;
+std::deque stack(heads.begin(), heads.end());
+visited.reserve(heads.size());
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (visited.count(vertex) == 0) {
+visited.insert(vertex);
+fvisit(vertex);
+std::vector nexts = reverse ? vertex->inputs : vertex->outputs;
+for (Node* node : nexts) {
+  if (visited.count(node) == 0) {
+stack.emplace_back(node);
+  }
+}
+  }
+}
+  }
+
+  using t_pairset = std::pair, 
std::unordered_set>;
+  using t_pairvec = std::pair, std::vector>;
+  using t_uncomp_map = std::unordered_map>;
+
+  std::unordered_set naive_grow_subgraph(Node* head,
+std::unordered_set* 
set_unused,
+t_uncomp_map* uncomp_map) {
+std::unordered_set subgraph;
+std::unordered_set uncomp_set;
+std::deque stack;
+stack.emplace_back(head);
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (set_unused->count(vertex) && !uncomp_set.count(vertex)) {
+set_unused->erase(vertex);
+subgraph.insert(vertex);
+uncomp_set.insert((*uncomp_map)[vertex].begin(), 
(*uncomp_map)[vertex].end());
+for (Node* input : vertex->inputs) {
+  if (set_unused->count(input) && !uncomp_set.count(input)) {
+stack.emplace_back(input);
+  }
+}
+for (Node* output : vertex->outputs) {
+  if (set_unused->count(output) && !uncomp_set.count(output)) {
+stack.emplace_back(output);
+  }
+}
+  }
+}
+return subgraph;
+  }
+
+  std::vector> get_subsets(
+std::unordered_map* const params_map) {
+std::vector> subgraphs;
+std::unordered_set set_nonTRTnodes;
+std::unordered_set set_allnodes(nodes.size());
+std::vector separation_sets;
+for (Node& node : nodes) 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197972135
 
 

 ##
 File path: src/executor/tensorrt_pass.cc
 ##
 @@ -0,0 +1,583 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt_pass.cc
+ * \brief Replace TRT compatible subgraphs by TRT engines
+ * \author Clement Fuji Tsang
+ */
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./onnx_to_tensorrt.h"
+#include "./exec_pass.h"
+#include "../operator/contrib/nnvm_to_onnx-inl.h"
+
+namespace mxnet {
+namespace exec {
+
+using NodePtr = nnvm::NodePtr;
+
+/*!
+ * \brief Custom graph class, which will contain bi-directional nodes
+ * we need to compute DFS and reverse DFS for graph partitioning
+ */
+class BidirectionalGraph {
+ public:
+  struct Node {
+nnvm::Node* nnvmptr;
+std::vector inputs;
+std::vector outputs;
+  };
+  std::vector nodes;
+  std::unordered_map nnvm2nid;
+  std::vector outputs;
+  static const std::unordered_set unconditionalTRTop;
+
+  explicit BidirectionalGraph(const Graph ) {
+auto& idx = g.indexed_graph();
+auto num_nodes = idx.num_nodes();
+nodes.reserve(num_nodes);
+nnvm2nid.reserve(num_nodes);
+outputs.reserve(idx.outputs().size());
+DFSVisit(g.outputs, [this](const nnvm::NodePtr& n) {
+  BidirectionalGraph::Node new_node;
+  new_node.nnvmptr = n.get();
+  nnvm2nid[n.get()] = static_cast(nodes.size());
+  nodes.emplace_back(std::move(new_node));
+});
+for (const auto& it : nnvm2nid) {
+  nnvm::Node* nnvmnode = it.first;
+  uint32_t nid = it.second;
+  for (auto& n : nnvmnode->inputs) {
+uint32_t input_nid = nnvm2nid[n.node.get()];
+nodes[input_nid].outputs.emplace_back([nid]);
+nodes[nid].inputs.emplace_back([input_nid]);
+  }
+}
+for (auto& e : g.outputs) {
+  uint32_t nid = nnvm2nid[e.node.get()];
+  outputs.emplace_back([nid]);
+}
+  }
+
+  template 
+  void DFS(const std::vector& heads, bool reverse, FVisit fvisit) {
+std::unordered_set visited;
+std::deque stack(heads.begin(), heads.end());
+visited.reserve(heads.size());
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (visited.count(vertex) == 0) {
+visited.insert(vertex);
+fvisit(vertex);
+std::vector nexts = reverse ? vertex->inputs : vertex->outputs;
+for (Node* node : nexts) {
+  if (visited.count(node) == 0) {
+stack.emplace_back(node);
+  }
+}
+  }
+}
+  }
+
+  using t_pairset = std::pair, 
std::unordered_set>;
+  using t_pairvec = std::pair, std::vector>;
+  using t_uncomp_map = std::unordered_map>;
+
+  std::unordered_set naive_grow_subgraph(Node* head,
+std::unordered_set* 
set_unused,
+t_uncomp_map* uncomp_map) {
+std::unordered_set subgraph;
+std::unordered_set uncomp_set;
+std::deque stack;
+stack.emplace_back(head);
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (set_unused->count(vertex) && !uncomp_set.count(vertex)) {
+set_unused->erase(vertex);
+subgraph.insert(vertex);
+uncomp_set.insert((*uncomp_map)[vertex].begin(), 
(*uncomp_map)[vertex].end());
+for (Node* input : vertex->inputs) {
+  if (set_unused->count(input) && !uncomp_set.count(input)) {
+stack.emplace_back(input);
+  }
+}
+for (Node* output : vertex->outputs) {
+  if (set_unused->count(output) && !uncomp_set.count(output)) {
+stack.emplace_back(output);
+  }
+}
+  }
+}
+return subgraph;
+  }
+
+  std::vector> get_subsets(
+std::unordered_map* const params_map) {
+std::vector> subgraphs;
+std::unordered_set set_nonTRTnodes;
+std::unordered_set set_allnodes(nodes.size());
+std::vector separation_sets;
+for (Node& node : nodes) 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197971193
 
 

 ##
 File path: src/executor/tensorrt_pass.cc
 ##
 @@ -0,0 +1,583 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt_pass.cc
+ * \brief Replace TRT compatible subgraphs by TRT engines
+ * \author Clement Fuji Tsang
+ */
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./onnx_to_tensorrt.h"
+#include "./exec_pass.h"
+#include "../operator/contrib/nnvm_to_onnx-inl.h"
+
+namespace mxnet {
+namespace exec {
+
+using NodePtr = nnvm::NodePtr;
+
+/*!
+ * \brief Custom graph class, which will contain bi-directional nodes
+ * we need to compute DFS and reverse DFS for graph partitioning
+ */
+class BidirectionalGraph {
+ public:
+  struct Node {
+nnvm::Node* nnvmptr;
+std::vector inputs;
+std::vector outputs;
+  };
+  std::vector nodes;
+  std::unordered_map nnvm2nid;
+  std::vector outputs;
+  static const std::unordered_set unconditionalTRTop;
+
+  explicit BidirectionalGraph(const Graph ) {
+auto& idx = g.indexed_graph();
+auto num_nodes = idx.num_nodes();
+nodes.reserve(num_nodes);
+nnvm2nid.reserve(num_nodes);
+outputs.reserve(idx.outputs().size());
+DFSVisit(g.outputs, [this](const nnvm::NodePtr& n) {
+  BidirectionalGraph::Node new_node;
+  new_node.nnvmptr = n.get();
+  nnvm2nid[n.get()] = static_cast(nodes.size());
+  nodes.emplace_back(std::move(new_node));
+});
+for (const auto& it : nnvm2nid) {
+  nnvm::Node* nnvmnode = it.first;
+  uint32_t nid = it.second;
+  for (auto& n : nnvmnode->inputs) {
+uint32_t input_nid = nnvm2nid[n.node.get()];
+nodes[input_nid].outputs.emplace_back([nid]);
+nodes[nid].inputs.emplace_back([input_nid]);
+  }
+}
+for (auto& e : g.outputs) {
+  uint32_t nid = nnvm2nid[e.node.get()];
+  outputs.emplace_back([nid]);
+}
+  }
+
+  template 
+  void DFS(const std::vector& heads, bool reverse, FVisit fvisit) {
+std::unordered_set visited;
+std::deque stack(heads.begin(), heads.end());
+visited.reserve(heads.size());
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (visited.count(vertex) == 0) {
+visited.insert(vertex);
+fvisit(vertex);
+std::vector nexts = reverse ? vertex->inputs : vertex->outputs;
+for (Node* node : nexts) {
+  if (visited.count(node) == 0) {
+stack.emplace_back(node);
+  }
+}
+  }
+}
+  }
+
+  using t_pairset = std::pair, 
std::unordered_set>;
+  using t_pairvec = std::pair, std::vector>;
+  using t_uncomp_map = std::unordered_map>;
+
+  std::unordered_set naive_grow_subgraph(Node* head,
+std::unordered_set* 
set_unused,
+t_uncomp_map* uncomp_map) {
+std::unordered_set subgraph;
+std::unordered_set uncomp_set;
+std::deque stack;
+stack.emplace_back(head);
+while (!stack.empty()) {
+  Node* vertex = stack.back();
+  stack.pop_back();
+  if (set_unused->count(vertex) && !uncomp_set.count(vertex)) {
+set_unused->erase(vertex);
+subgraph.insert(vertex);
+uncomp_set.insert((*uncomp_map)[vertex].begin(), 
(*uncomp_map)[vertex].end());
+for (Node* input : vertex->inputs) {
+  if (set_unused->count(input) && !uncomp_set.count(input)) {
+stack.emplace_back(input);
+  }
+}
+for (Node* output : vertex->outputs) {
+  if (set_unused->count(output) && !uncomp_set.count(output)) {
+stack.emplace_back(output);
+  }
+}
+  }
+}
+return subgraph;
+  }
+
+  std::vector> get_subsets(
+std::unordered_map* const params_map) {
+std::vector> subgraphs;
+std::unordered_set set_nonTRTnodes;
+std::unordered_set set_allnodes(nodes.size());
+std::vector separation_sets;
+for (Node& node : nodes) 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197969467
 
 

 ##
 File path: include/mxnet/executor.h
 ##
 @@ -152,19 +152,19 @@ class Executor {
   static Executor* SimpleBind(nnvm::Symbol symbol,
   const Context& default_ctx,
   const std::map& group2ctx,
-  const std::vector& in_arg_ctxes,
-  const std::vector& arg_grad_ctxes,
-  const std::vector& aux_state_ctxes,
-  const std::unordered_map& 
arg_shape_map,
-  const std::unordered_map& 
arg_dtype_map,
-  const std::unordered_map& 
arg_stype_map,
-  const std::vector& grad_req_types,
-  const std::unordered_set& 
param_names,
+  std::vector* in_arg_ctxes,
 
 Review comment:
   @reminisce  Because if things are to be mutated, they need to be pointers, 
not non-const references (per the linter rules). Given your earlier comments 
about SimpleBindEx rather than modifying SimpleBind, this will be addressed 
there rather than modifying it here.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197969124
 
 

 ##
 File path: include/mxnet/executor.h
 ##
 @@ -152,19 +152,19 @@ class Executor {
   static Executor* SimpleBind(nnvm::Symbol symbol,
   const Context& default_ctx,
   const std::map& group2ctx,
-  const std::vector& in_arg_ctxes,
-  const std::vector& arg_grad_ctxes,
-  const std::vector& aux_state_ctxes,
-  const std::unordered_map& 
arg_shape_map,
-  const std::unordered_map& 
arg_dtype_map,
-  const std::unordered_map& 
arg_stype_map,
-  const std::vector& grad_req_types,
-  const std::unordered_set& 
param_names,
+  std::vector* in_arg_ctxes,
+  std::vector* arg_grad_ctxes,
+  std::vector* aux_state_ctxes,
+  std::unordered_map* 
arg_shape_map,
+  std::unordered_map* 
arg_dtype_map,
+  std::unordered_map* 
arg_stype_map,
+  std::vector* grad_req_types,
+  std::unordered_set* param_names,
   std::vector* in_args,
   std::vector* arg_grads,
   std::vector* aux_states,
   std::unordered_map*
-shared_data_arrays = nullptr,
+  shared_data_arrays = nullptr,
 
 Review comment:
   OK


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197967101
 
 

 ##
 File path: src/common/serialization.h
 ##
 @@ -0,0 +1,526 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file serialization.h
+ * \brief Serialization of some STL and nnvm data-structures
+ * \author Clement Fuji Tsang
+ */
+
+#ifndef MXNET_COMMON_SERIALIZATION_H_
+#define MXNET_COMMON_SERIALIZATION_H_
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+namespace mxnet {
+namespace common {
+
+template
+inline size_t serialized_size(const T& obj);
+
+template
+inline size_t serialized_size(const nnvm::Tuple& obj);
+
+template
+inline size_t serialized_size(const std::vector& obj);
+
+template
+inline size_t serialized_size(const std::pair& obj);
+
+template
+inline size_t serialized_size(const std::map& obj);
+
+template
+inline size_t serialized_size(const std::unordered_map& obj);
+
+template
+inline size_t serialized_size(const std::set& obj);
+
+template
+inline size_t serialized_size(const std::unordered_set& obj);
+
+template<>
+inline size_t serialized_size(const std::string& obj);
+
+template
+inline size_t serialized_size(const std::tuple& obj);
+
+template
+inline void serialize(const T& obj, char** buffer);
+
+template
+inline void serialize(const nnvm::Tuple& obj, char** buffer);
+
+template
+inline void serialize(const std::vector& obj, char** buffer);
+
+template
+inline void serialize(const std::pair& obj, char** buffer);
+
+template
+inline void serialize(const std::map& obj, char** buffer);
+
+template
+inline void serialize(const std::unordered_map& obj, char** buffer);
+
+template
+inline void serialize(const std::set& obj, char** buffer);
+
+template
+inline void serialize(const std::unordered_set& obj, char** buffer);
+
+template<>
+inline void serialize(const std::string& obj, char** buffer);
+
+template
+inline void serialize(const std::tuple& obj, char** buffer);
+
+template
+inline void deserialize(T* obj, const std::string& buffer, size_t* curr_pos);
+
+template
+inline void deserialize(nnvm::Tuple* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::vector* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::pair* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::map* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template
+inline void deserialize(std::unordered_map* obj, const std::string& 
buffer, size_t* curr_pos);
+
+template
+inline void deserialize(std::set* obj, const std::string& buffer, size_t* 
curr_pos);
+
+template
+inline void deserialize(std::unordered_set* obj, const std::string& buffer, 
size_t* curr_pos);
+
+template<>
+inline void deserialize(std::string* obj, const std::string& buffer, size_t* 
curr_pos);
+
+template
+inline void deserialize(std::tuple* obj, const std::string& buffer, 
size_t* curr_pos);
+
+
+template
+struct is_cont {
+  static const bool value = !std::is_pod::value;
+};
+
+template
+inline size_t serialized_size(const T& obj) {
+  return sizeof(T);
+}
+
+template
+inline size_t serialized_size(const nnvm::Tuple& obj) {
+  if (is_cont::value) {
+size_t sum_val = 4;
+for (auto& el : obj) {
+  sum_val += serialized_size(el);
+}
+return sum_val;
+  } else {
+return 4 + (obj.ndim() * sizeof(T));
+  }
+}
+
+template
+inline size_t serialized_size(const std::vector& obj) {
+  if (is_cont::value) {
+size_t sum_val = 4;
+for (T i : obj) {
+  sum_val += serialized_size(i);
+}
+return sum_val;
+  } else {
+return sizeof(T) * obj.size() + 4;
+  }
+}
+
+template
+inline size_t serialized_size(const std::pair& obj) {
+  return serialized_size(obj.first) + serialized_size(obj.second);
+}
+
+template
+inline size_t serialized_size(const std::map& obj) {
+  size_t sum_val = 4;
+  if (is_cont::value && is_cont::value) {
+for (auto p : obj) {
+  sum_val += 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197966899
 
 

 ##
 File path: docs/api/python/contrib/tensorrt.md
 ##
 @@ -0,0 +1,117 @@
+# MxNet-TensorRT Runtime Integration
+## What is this?
+
+This document described how to use the 
[MxNet](http://mxnet.incubator.apache.org/)-[TensorRT](https://developer.nvidia.com/tensorrt)
 runtime integration to accelerate model inference.
+
+## Why is TensorRT integration useful? 
+
+TensorRT can greatly speed up inference of deep learning models. One 
experiment on a Titan V (V100) GPU shows that with MxNet 1.2, we can get an 
approximately 3x speed-up when running inference of the ResNet-50 model on the 
CIFAR-10 dataset in single precision (fp32). As batch sizes and image sizes go 
up (for CNN inference), the benefit may be less, but in general, TensorRT helps 
especially in cases which have:
+- many bandwidth-bound layers (e.g. pointwise operations) that benefit from 
GPU kernel fusion
+- inference use cases which have tight latency requirements and where the 
client application can't wait for large batches to be queued up
+- embedded systems, where memory constraints are tighter than on servers
+- when performing inference in reduced precision, especially for integer (e.g. 
int8) inference. 
+
+In the past, the main hindrance for the user wishing to benefit from TensorRT 
was the fact that the model needed to be exported from the framework first. 
Once the model got exported through some means (NNVM to TensorRT graph rewrite, 
via ONNX, etc.), one had to then write a TensorRT client application, which 
would feed the data into the TensorRT engine. Since at that point the model was 
independent of the original framework, and since TensorRT could only compute 
the neural network layers but the user had to bring their own data pipeline, 
this increased the burden on the user and reduced the likelihood of 
reproducibility (e.g. different frameworks may have slightly different data 
pipelines, or flexibility of data pipeline operation ordering). Moreover, since 
frameworks typically support more operators than TensorRT, one could have to 
resort to TensorRT plugins for operations that aren't already available via the 
TensorRT graph API.  
+
+The current experimental runtime integration of TensorRT with MxNet resolves 
the above concerns by ensuring that:
+- the graph is still executed by MxNet
+- the MxNet data pipeline is preserved
+- the TensorRT runtime integration logic partitions the graph into subgraphs 
that are either TensorRT compatible or incompatible
+- the graph partitioner collects the TensorRT-compatible subgraphs, hands them 
over to TensorRT, and substitutes the TensorRT compatible subgraph with a 
TensorRT library call, represented as a TensorRT node in NNVM.
+- if a node is not TensorRT compatible, it won't be extracted and substituted 
with a TensorRT call, and will still execute within MxNet
+
+The above points ensure that we find a compromise between the flexibility of 
MxNet, and fast inference in TensorRT, without putting a burden on the user to 
learn how TensorRT APIs work, without the need to write one's own client 
application and data pipeline, etc.
+
+## How do I build MxNet with TensorRT integration?
+
+Building MxNet together with TensorRT is somewhat complex. The recipe will 
hopefully be simplified in the near future, but for now, it's easiest to build 
a Docker container with a Ubuntu 16.04 base. This Dockerfile can be found under 
the ci subdirectory of the MxNet repository. You can build the container as 
follows:
+
+```
+docker build -t ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt 
mxnet_with_tensorrt
+```
+
+Next, we can run this container as follows (don't forget to install 
[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)):
+
+```no-highlight
+nvidia-docker run -ti --rm mxnet_with_tensorrt
+```
+
+After starting the container, you will find yourself in the /opt/mxnet 
directory by default.
+
+## Running a "hello, world" model / unit test:
+
+You can then run the LeNet-5 unit test, which will train LeNet-5 on MNIST, and 
subsequently run inference in MxNet, as well as using the MxNet-TensorRT 
runtime integration, and compare the results. The test can be run as follows:
+
+```no-highlight
+python tests/python/tensorrt/test_tensorrt_lenet5.py
+```
+
+You should get a result similar to the following:
+
+```no-highlight
+Running inference in MxNet
+[03:31:18] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:107: Running 
performance tests to find the best convolution algorithm, this can take a 
while... (setting env variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable)
+Running inference in MxNet-TensorRT
+[03:31:18] src/operator/contrib/nnvm_to_onnx.cc:152: ONNX graph construction 
complete.
+Building TensorRT engine, FP16 available:1
+Max batch size: 1024
+Max workspace size: 1024 

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197966796
 
 

 ##
 File path: src/operator/contrib/nnvm_to_onnx-inl.h
 ##
 @@ -0,0 +1,156 @@
+#ifndef MXNET_OPERATOR_CONTRIB_NNVM_TO_ONNX_INL_H_
+#define MXNET_OPERATOR_CONTRIB_NNVM_TO_ONNX_INL_H_
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file tensorrt-inl.h
+ * \brief TensorRT Operator
+ * \author Marek Kolodziej, Clement Fuji Tsang
+*/
+
+#if MXNET_USE_TENSORRT
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "./tensorrt-inl.h"
+#include "../operator_common.h"
+#include "../../common/utils.h"
+#include "../../common/serialization.h"
+
+namespace mxnet {
+namespace op {
+namespace nnvm_to_onnx {
+
+using namespace nnvm;
+using namespace ::onnx;
+using int64 = ::google::protobuf::int64;
+
+std::unordered_map GetPlaceholderShapes(const 
ShapeVector& shape_inputs,
+const nnvm::IndexedGraph& ig);
+
+std::unordered_map GetOutputLookup(const 
nnvm::IndexedGraph& ig);
+
+void ConvertPlaceholder(
+  const std::string& node_name,
+  const std::unordered_map& placeholder_shapes,
+  GraphProto* const graph_proto);
+
+void ConvertConstant(GraphProto* const graph_proto,
+  const std::string& node_name,
+  std::unordered_map* const shared_buffer);
+
+void ConvertOutput(op::tensorrt::InferenceMap_t* const trt_output_map,
+   GraphProto* const graph_proto,
+   const std::unordered_map::iterator& 
out_iter,
+   const std::string& node_name,
+   const nnvm::Graph& g,
+   const StorageTypeVector& storage_types,
+   const DTypeVector& dtypes);
+
+typedef void (*ConverterFunction)(NodeProto *node_proto,
+  const NodeAttrs ,
+  const nnvm::IndexedGraph ,
+  const array_view 
);
+
+
+// Forward declarations
+void ConvertConvolution(
+NodeProto *node_proto,
+const NodeAttrs ,
+const nnvm::IndexedGraph ,
+const array_view );
+
+
+void ConvertPooling(NodeProto *node_proto,
+const NodeAttrs ,
+const nnvm::IndexedGraph ,
+const array_view );
+
+void ConvertActivation(NodeProto *node_proto,
+   const NodeAttrs ,
+   const nnvm::IndexedGraph ,
+   const array_view );
+
+void ConvertFullyConnected(NodeProto *node_proto,
+   const NodeAttrs ,
+   const nnvm::IndexedGraph ,
+   const array_view );
+
+void ConvertSoftmaxOutput(NodeProto *node_proto,
+  const NodeAttrs ,
+  const nnvm::IndexedGraph ,
+  const array_view );
+
+void ConvertFlatten(NodeProto *node_proto,
+const NodeAttrs ,
+const nnvm::IndexedGraph ,
+const array_view );
+
+void ConvertBatchNorm(NodeProto *node_proto,
+const NodeAttrs ,
+const nnvm::IndexedGraph ,
+const array_view );
+
+void ConvertElementwiseAdd(NodeProto *node_proto,
+const NodeAttrs ,
+const nnvm::IndexedGraph ,
+const array_view );
+
+TRTParam ConvertNnvmGraphToOnnx(
+const nnvm::Graph ,
+std::unordered_map *const shared_buffer);
+
+static const std::unordered_map converter_map 
= {
 
 Review comment:
   @eric-haibin-lin Yes, so far. TensorRT supports more operators, so the list 
will be expanded once the initial integration is in place.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the

[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197966636
 
 

 ##
 File path: src/common/serialization.h
 ##
 @@ -0,0 +1,526 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file serialization.h
+ * \brief Serialization of some STL and nnvm data-structures
+ * \author Clement Fuji Tsang
+ */
+
+#ifndef MXNET_COMMON_SERIALIZATION_H_
+#define MXNET_COMMON_SERIALIZATION_H_
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+namespace mxnet {
+namespace common {
+
+template
+inline size_t serialized_size(const T& obj);
 
 Review comment:
   @eric-haibin-lin It would make sense to increase test coverage for this 
independently. Will add it to the to-do list for polishing up the PR.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] mkolod commented on a change in pull request #11325: Added TensorRT runtime integration

2018-06-25 Thread GitBox
mkolod commented on a change in pull request #11325: Added TensorRT runtime 
integration
URL: https://github.com/apache/incubator-mxnet/pull/11325#discussion_r197966380
 
 

 ##
 File path: python/mxnet/cuda_utils.py
 ##
 @@ -0,0 +1,90 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Copyright (c) 2015 by Contributors
+# File: serialization.h
+# Purpose: Functions to query GPU count, arch, etc.
+# Author: Dick Carter
+
+"""Provides information on the visible CUDA GPUs on the system."""
+# pylint: disable=broad-except
+# As a stand-alone program, it prints a list of unique cuda SM architectures
+import ctypes as C
+from ctypes.util import find_library
+
+def cint(init_val=0):
 
 Review comment:
   @eric-haibin-lin Good point, the Ctypes utils could just be moved to base, 
and then reused in cuda_utils.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services