Adnios commented on a change in pull request #20339:
URL: https://github.com/apache/incubator-mxnet/pull/20339#discussion_r650317486
##########
File path: src/operator/mshadow_op.h
##########
@@ -415,11 +415,31 @@ MXNET_UNARY_MATH_OP(log_sigmoid, math::log(1.0f / (1.0f +
math::exp(-a))));
MXNET_UNARY_MATH_OP(log_sigmoid_grad, 1.0f / (1.0f + math::exp(a)));
-MXNET_UNARY_MATH_OP(mish, a * math::tanh(math::log(1.0f + math::exp(a))));
+struct mish : public mxnet_op::tunable {
+ template<typename DType>
+ MSHADOW_XINLINE static DType Map(DType a) {
+ // reference softrelu
+ auto softrelu = math::log1p(math::exp(a));
Review comment:
Hi, @ptrendx . There is no `softrelu` in
`src/operator/math_functions-inl.h`(namespace math).
##########
File path: src/common/cuda/rtc/forward_functions-inl.h
##########
@@ -697,9 +697,11 @@ __device__ inline DType log_sigmoid(const DType val) {
template <typename DType>
__device__ inline DType mish(const DType val) {
if (type_util::has_double_or_integral<DType>::value) {
- return val * ::tanh(::log(1 + ::exp(val)));
+ const auto softrelu = (val > 20) ? val : ::log(1 + ::exp(val));
+ return val * ::tanh(softrelu);
} else {
- return val * ::tanhf(logf(1 + expf(val)));
+ const auto softrelu = (val > 20) ? val : logf(1 + expf(val));
+ return val * ::tanhf(softrelu);
Review comment:
Done
##########
File path: src/common/cuda/rtc/backward_functions-inl.h
##########
@@ -53,10 +53,9 @@ backward_log_sigmoid(const DTypeGrad grad, const DType val) {
template <typename DType, typename DTypeGrad>
__device__ inline mixed_type<DTypeGrad, DType>
backward_mish(const DTypeGrad grad, const DType val) {
- const mixed_type<DTypeGrad, DType> v = val;
- const auto softrelu = op::log(1 + exp(v));
- const auto tanh = op::tanh(softrelu);
- return grad * (tanh + v * sigmoid(v) * (1 - tanh * tanh));
+ const auto softrelu = (val > 20) ? val : op::log(1 + op::exp(val));
Review comment:
Done
##########
File path: src/common/cuda/rtc/forward_functions-inl.h
##########
@@ -697,9 +697,11 @@ __device__ inline DType log_sigmoid(const DType val) {
template <typename DType>
__device__ inline DType mish(const DType val) {
if (type_util::has_double_or_integral<DType>::value) {
- return val * ::tanh(::log(1 + ::exp(val)));
+ const auto softrelu = (val > 20) ? val : ::log(1 + ::exp(val));
+ return val * ::tanh(softrelu);
} else {
- return val * ::tanhf(logf(1 + expf(val)));
+ const auto softrelu = (val > 20) ? val : logf(1 + expf(val));
+ return val * ::tanhf(softrelu);
Review comment:
Hi, @ptrendx . There are errors when using `op::softrelu(val)` or
`softrelue(val)`.
```
[2021-06-12T03:43:50.408Z] E unary_kernel_kernel.cu(1724): error:
namespace "op" has no member "tanh"
[2021-06-12T03:43:50.408Z] E
[2021-06-12T03:43:50.408Z] E unary_kernel_kernel.cu(1724): error:
namespace "op" has no member "softrelu"
```
```
[2021-06-12T05:40:39.340Z] E
FusedKernel_mish_elemwise_mul__kernel.cu(1724): error: identifier "softrelu" is
undefined
[2021-06-12T05:40:39.340Z] E detected during
instantiation of "DType op::mish(DType) [with DType=DType_input_0]"
[2021-06-12T05:40:39.340Z] E (3123): here
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]