handoku opened a new issue #18231:
URL: https://github.com/apache/incubator-mxnet/issues/18231


   ## Description
   I was use ngc docker image mxnet-19.12. And I was trying use tensorrt to 
speed up gluoncv yolov3 following [Optimizing Deep Learning Computation Graphs 
with 
TensorRT](http://mxnet.incubator.apache.org/api/python/docs/tutorials/performance/backend/tensorrt/tensorrt.html).
   
   I replaced the `resnet-18` model  with `yolo3_darknet53_coco`, but when 
building the tensorrt engine, the program broke down. its output said 
   ` Found a cycle when BFS from node darknetv30_darknetbasicblockv31__plus0`
   
   ### Error Message
   ```
   Building TensorRT engine
   [14:03:02] src/operator/subgraph/build_subgraph.cc:691: start to execute 
TensorRT.
   [14:03:02] src/operator/subgraph/build_subgraph.cc:300: Found a cycle when 
BFS from node darknetv30_darknetbasicblockv31__plus0. Excluding nodes 
darknetv30_darknetbasicblockv32__plus0, and retrying
   Traceback (most recent call last):
     File "./yolo3_trt.py", line 43, in <module>
       trt_sym = sym.get_backend_symbol('TensorRT')
     File "/opt/mxnet/python/mxnet/symbol/symbol.py", line 2564, in 
get_backend_symbol
       check_call(_LIB.MXGenBackendSubgraph(self.handle, c_str(backend), 
ctypes.byref(out)))
     File "/opt/mxnet/python/mxnet/base.py", line 252, in check_call
       raise MXNetError(py_str(_LIB.MXGetLastError()))
   mxnet.base.MXNetError: [14:03:02] 
src/operator/subgraph/build_subgraph.cc:209: Check failed: count < 
indexed_graph.num_nodes() (727 vs. 727) : Finding ancestor failed. There is 
probably a loop in the graph
   Stack trace:
     [bt] (0) 
/usr/local/lib/libmxnet.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x43) 
[0x7fc82c58dd33]
     [bt] (1) 
/usr/local/lib/libmxnet.so(mxnet::op::sg::LabelSubgraph(nnvm::Graph const&, 
std::shared_ptr<mxnet::op::SubgraphSelectorV2>, int, unsigned long, 
std::vector<std::shared_ptr<mxnet::op::BiDirectedNode>, 
std::allocator<std::shared_ptr<mxnet::op::BiDirectedNode> > > const&, 
std::vector<mxnet::op::BiDirectedNode*, 
std::allocator<mxnet::op::BiDirectedNode*> >*, 
std::unordered_set<mxnet::op::BiDirectedNode const*, 
std::hash<mxnet::op::BiDirectedNode const*>, 
std::equal_to<mxnet::op::BiDirectedNode const*>, 
std::allocator<mxnet::op::BiDirectedNode const*> >*)+0x17a0) [0x7fc82e384010]
     [bt] (2) 
/usr/local/lib/libmxnet.so(mxnet::op::sg::PreSelectSubgraphNodes(nnvm::Graph 
const&, std::shared_ptr<mxnet::op::SubgraphSelectorV2>, int, unsigned long, 
std::vector<std::shared_ptr<mxnet::op::BiDirectedNode>, 
std::allocator<std::shared_ptr<mxnet::op::BiDirectedNode> > > const&, 
std::vector<mxnet::op::BiDirectedNode*, 
std::allocator<mxnet::op::BiDirectedNode*> >*)+0x167) [0x7fc82e385517]
     [bt] (3) 
/usr/local/lib/libmxnet.so(mxnet::op::sg::SelectSubgraphNodes(nnvm::Graph*, 
std::shared_ptr<mxnet::op::SubgraphSelectorV2>, 
std::vector<std::shared_ptr<mxnet::op::BiDirectedNode>, 
std::allocator<std::shared_ptr<mxnet::op::BiDirectedNode> > > const&, 
std::vector<std::vector<mxnet::op::BiDirectedNode*, 
std::allocator<mxnet::op::BiDirectedNode*> >, 
std::allocator<std::vector<mxnet::op::BiDirectedNode*, 
std::allocator<mxnet::op::BiDirectedNode*> > > >*, 
std::vector<std::shared_ptr<mxnet::op::SubgraphSelectorV2>, 
std::allocator<std::shared_ptr<mxnet::op::SubgraphSelectorV2> > >*, 
mxnet::op::BiDirectedNode const*, unsigned long, unsigned long*)+0x10f) 
[0x7fc82e385c4f]
     [bt] (4) 
/usr/local/lib/libmxnet.so(mxnet::op::sg::FindSubgraphs(nnvm::Graph*, 
mxnet::op::SubgraphProperty const&, 
std::vector<std::shared_ptr<mxnet::op::BiDirectedNode>, 
std::allocator<std::shared_ptr<mxnet::op::BiDirectedNode> > > const&, 
std::vector<std::vector<mxnet::op::BiDirectedNode*, 
std::allocator<mxnet::op::BiDirectedNode*> >, 
std::allocator<std::vector<mxnet::op::BiDirectedNode*, 
std::allocator<mxnet::op::BiDirectedNode*> > > >*, 
std::vector<std::shared_ptr<mxnet::op::SubgraphSelectorV2>, 
std::allocator<std::shared_ptr<mxnet::op::SubgraphSelectorV2> > >*)+0x317) 
[0x7fc82e386897]
     [bt] (5) 
/usr/local/lib/libmxnet.so(mxnet::op::BuildSubgraph(nnvm::Graph&&)+0x482) 
[0x7fc82e388872]
     [bt] (6) /usr/local/lib/libmxnet.so(std::_Function_handler<nnvm::Graph 
(nnvm::Graph), nnvm::Graph (*)(nnvm::Graph&&)>::_M_invoke(std::_Any_data 
const&, nnvm::Graph&&)+0x20) [0x7fc82c8d7940]
     [bt] (7) /usr/local/lib/libmxnet.so(nnvm::ApplyPasses(nnvm::Graph, 
std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, 
std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > > > const&)+0x1171) 
[0x7fc831f255a1]
     [bt] (8) /usr/local/lib/libmxnet.so(nnvm::ApplyPass(nnvm::Graph, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&)+0xbe) [0x7fc82ecdd6ee]
   ```
   
   ## To Reproduce
   (If you developed your own code, please provide a short script that 
reproduces the error. For existing examples, please provide link.)
   
   ### Steps to reproduce
   (Paste the commands you ran that produced the error.)
   
   1.prepare the script `yolo3_trt.py`
   ```
   from gluoncv import model_zoo, data, utils
   from matplotlib import pyplot as plt
   from gluoncv.utils import export_block
   import mxnet as mx
   from mxnet.contrib import onnx as onnx_mxnet
   import numpy as np
   import time
   
   OUTPUT = "./"
   DATA = "./cat.png"
   SIZE = 320
   MODEL = "yolo3_darknet53_coco"
   INPUT_SHAPE = (1, 3, SIZE, SIZE)
   net = model_zoo.get_model('yolo3_darknet53_coco', pretrained=True)
   net.hybridize()
   x, img = data.transforms.presets.yolo.load_test("./cat.png", short=SIZE)
   class_IDs, scores, bounding_boxs = net(x)
   net.export("yolo3_darknet53_coco")
   
   
   
   sym, arg_params, aux_params = mx.model.load_checkpoint(MODEL, 0)
   
   # Create sample input
   batch_shape = INPUT_SHAPE
   input = mx.nd.zeros(batch_shape)
   
   
   print('Building TensorRT engine')
   trt_sym = sym.get_backend_symbol('TensorRT')
   arg_params, aux_params = mx.contrib.tensorrt.init_tensorrt_params(trt_sym, 
arg_params, aux_params)
   mx.contrib.tensorrt.set_use_fp16(True)
   executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape,
                                  grad_req='null', force_rebind=True)
   executor.copy_params_from(arg_params, aux_params)
   
   #Warmup
   print('Warming up TensorRT')
   for i in range(0, 10):
       y_gen = executor.forward(is_train=False, data=input)
       y_gen[0].wait_to_read()
   
   # Timing
   print('Starting TensorRT timed run')
   start = time.process_time()
   for i in range(0, 300):
       y_gen = executor.forward(is_train=False, data=input)
       y_gen[0].wait_to_read()
   end = time.time()
   print(time.process_time() - start)
   ```
   2. run `python ./yolo3_trt.py`
   
   ## What have you tried to solve it?
   
   1. tried with mxnet of different version
   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to