pengzhao-intel commented on a change in pull request #14713: MKLDNN RNN
Inference Integration(fp32 LSTM and vRNN with tanh and relu)
URL: https://github.com/apache/incubator-mxnet/pull/14713#discussion_r286986672
##########
File path: src/operator/rnn-inl.h
##########
@@ -880,23 +902,78 @@ class RNNOp {
param_.p,
param_.mode);
} else {
- RNNForwardInference<DType>(work_cpu_space,
- param_.state_outputs,
- param_.num_layers,
- direction,
- param_.seq_length_,
- param_.batch_size_,
- param_.input_size_,
- param_.state_size,
- x.dptr_,
- hx.dptr_,
- cx_ptr,
- w.dptr_,
- b_ptr,
- y.dptr_,
- hy_ptr,
- cy_ptr,
- param_.mode);
+ #if MXNET_USE_MKLDNN == 1
+ if (dmlc::GetEnv("MXNET_USE_MKLDNN_RNN", 1) && param_.mode !=
rnn_enum::kGru) {
+ // TODO(zixuanweeei): MKLDNN GRU has precision issue. A stable one
+ // will be added to MXNet when we figure out the issue.
+ int dtype = in_data[rnn_enum::kData].type_flag_;
+ MKLDNNRNNForwardInference<DType>(param_.state_outputs,
+ param_.num_layers,
+ direction,
+ param_.seq_length_,
+ param_.batch_size_,
+ param_.input_size_,
+ param_.state_size,
+ x.dptr_,
+ hx.dptr_,
+ cx_ptr,
+ w.dptr_,
+ b_ptr,
+ y.dptr_,
+ hy_ptr,
+ cy_ptr,
+ &concat_weight_memory,
+ &concat_iter_memory,
+ &x_memory,
+ &hcx_memory,
+ &wx_memory,
+ &wh_memory,
+ &bias_memory,
+ &y_memory,
+ &hcy_memory,
+ &rnn_forward_prim,
+ &has_cache,
+ dtype,
+ ctx.is_train,
+ param_.mode);
+ } else {
+ #endif
+ // Before integrating MKLDNN GRU fp32 inference
+ // using below code for keep func being OK
+ const size_t work_cpu_space_size =
+ GetRNNWorkspaceSize(param_.seq_length_, param_.batch_size_,
+ param_.state_size, direction, param_.mode);
+ if (temp_init_space_ && temp_cpu_space_size_ < work_cpu_space_size) {
+ Storage::Get()->Free(temp_cpu_space_);
+ temp_init_space_ = false;
+ }
+ if (!temp_init_space_) {
+ temp_cpu_space_ = Storage::Get()->Alloc
+ (work_cpu_space_size * sizeof(DType), Context::CPU());
+ temp_cpu_space_size_ = work_cpu_space_size;
+ temp_init_space_ = true;
+ }
+ DType* work_cpu_space = static_cast<DType*>(temp_cpu_space_.dptr);
+ RNNForwardInference<DType>(work_cpu_space,
+ param_.state_outputs,
+ param_.num_layers,
+ direction,
+ param_.seq_length_,
+ param_.batch_size_,
+ param_.input_size_,
+ param_.state_size,
+ x.dptr_,
+ hx.dptr_,
+ cx_ptr,
+ w.dptr_,
+ b_ptr,
+ y.dptr_,
+ hy_ptr,
+ cy_ptr,
+ param_.mode);
+ #if MXNET_USE_MKLDNN == 1 && !defined(__CUDACC__)
Review comment:
@zixuanweeei And this one :(
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services