zhreshold commented on a change in pull request #14099: Performance improvement
in ToTensor GPU Kernel
URL: https://github.com/apache/incubator-mxnet/pull/14099#discussion_r255370675
##########
File path: src/operator/image/image_random-inl.h
##########
@@ -123,24 +135,50 @@ void ToTensorOpForward(const nnvm::NodeAttrs &attrs,
CHECK_EQ(outputs.size(), 1U);
CHECK_EQ(req.size(), 1U);
+ // We do not use temp buffer when performance the operation.
+ // Hence, this check is necessary.
CHECK_EQ(req[0], kWriteTo)
<< "`to_tensor` does not support inplace updates";
- // 3D Input - (h, w, c)
- if (inputs[0].ndim() == 3) {
+ const float normalize_factor = 255.0f;
+
+ if (std::is_same<xpu, gpu>::value) {
+ #if MXNET_USE_CUDA
+ mshadow::Stream<gpu> *s = ctx.get_stream<gpu>();
+ MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, {
+ MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+ if (inputs[0].ndim() == 3) {
+ Tensor<gpu, 3, DType> input = inputs[0].get<gpu, 3, DType>(s);
+ Tensor<gpu, 3, float> output = outputs[0].get<gpu, 3, float>(s);
+ ToTensorImplCUDA<DType, Tensor<gpu, 3, DType>, Tensor<gpu, 3,
float>>
+ (s, input, output, req_type, normalize_factor);
+ } else {
+ Tensor<gpu, 4, DType> input = inputs[0].get<gpu, 4, DType>(s);
+ Tensor<gpu, 4, float> output = outputs[0].get<gpu, 4, float>(s);
+ ToTensorImplCUDA<DType, Tensor<gpu, 4, DType>, Tensor<gpu, 4,
float>>
+ (s, input, output, req_type, normalize_factor);
+ }
+ });
+ });
+ #endif // MXNET_USE_CUDA
Review comment:
#elif raise error "build with USE_CUDA=1 is required bla bla"
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services