tiandiao123 commented on a change in pull request #8808:
URL: https://github.com/apache/tvm/pull/8808#discussion_r697015121
##########
File path: src/runtime/contrib/tensorrt/tensorrt_runtime.cc
##########
@@ -267,13 +320,68 @@ class TensorRTRuntime : public JSONRuntimeBase {
}
// Build engine.
- trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] =
builder.BuildEngine();
- DLOG(INFO) << "Finished building TensorRT engine for subgraph " <<
symbol_name_
+ // trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] =
builder.BuildEngine();
+ const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0);
+ TensorRTEngineAndContext engine_and_context = builder.BuildEngine();
+ trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] =
engine_and_context;
+ if(use_int8 == true){
+ if(calibrator_ == nullptr){
+ this->CreateCalibratorIfUsingInt8(engine_and_context);
+ }
+
+ if(num_calibration_batches_remaining_ == 0){
+ engine_and_context.context->destroy();
+ engine_and_context.engine->destroy();
+
+ LOG(INFO)<<"rebuild builder using int8 mode";
+ TensorRTBuilder builder2(&logger_, data_entry_, max_workspace_size_,
use_implicit_batch_,
+ use_fp16, batch_size, calibrator_.get());
+ set_up_input_output(builder2);
+ TensorRTEngineAndContext new_engine_and_context =
builder2.BuildEngine();
+ trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] =
new_engine_and_context;
+ calibrator_.reset(nullptr);
+ LOG(INFO) <<"finished rebuilding using int8 mode ... ";
+ }
+
+ }
+
+ LOG(INFO) << "Finished building TensorRT engine for subgraph " <<
symbol_name_
<< " with batch size " << batch_size;
CacheEngineToDisk();
return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size));
}
+
+ void set_up_input_output(TensorRTBuilder& builder){
Review comment:
> Lets move all of the build engine functionality to here and rename to
BuildEngineFromJSON(). Currently this code is duplicating what is already in
`GetOrBuildEngine`. We can pass a flag if we want `BuildEngineFromJson()` to
consume the calibrator and build the int8 engine.
>
> Then we can just call `BuildEngineFromJson()` from `GetOrBuildEngine` when
we need to build a new engine.
ok, it makes sense to me
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]