[GitHub] anirudhacharya closed pull request #12748: [MXNET-1028] Adding CELU Activation function

GitBox Mon, 19 Nov 2018 16:39:02 -0800

anirudhacharya closed pull request #12748: [MXNET-1028] Adding CELU Activation 
function
URL: https://github.com/apache/incubator-mxnet/pull/12748


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/docs/api/python/gluon/nn.md b/docs/api/python/gluon/nn.md
index 25c82f06668..3e2f786c0ef 100644
--- a/docs/api/python/gluon/nn.md
+++ b/docs/api/python/gluon/nn.md
@@ -79,6 +79,7 @@ This document lists the neural network blocks in Gluon:
     PReLU
     ELU
     SELU
+    CELU
     Swish
 ```
 
diff --git a/python/mxnet/gluon/nn/activations.py 
b/python/mxnet/gluon/nn/activations.py
index fa8eee9d298..4d0369317d2 100644
--- a/python/mxnet/gluon/nn/activations.py
+++ b/python/mxnet/gluon/nn/activations.py
@@ -18,7 +18,7 @@
 # coding: utf-8
 # pylint: disable= arguments-differ
 """Basic neural network layers."""
-__all__ = ['Activation', 'LeakyReLU', 'PReLU', 'ELU', 'SELU', 'Swish']
+__all__ = ['Activation', 'LeakyReLU', 'PReLU', 'ELU', 'SELU', 'CELU', 'Swish']
 
 from ... import initializer
 from ..block import HybridBlock
@@ -181,6 +181,39 @@ def hybrid_forward(self, F, x):
         return F.LeakyReLU(x, act_type='selu', name='fwd')
 
 
+class CELU(HybridBlock):
+    r"""
+    Continuous Exponential Linear Unit. (CELU)
+        https://arxiv.org/abs/1704.07483
+
+    .. math::
+
+        f\left(x\right) = \left\{
+            \begin{array}{lr}
+                \alpha (\exp(x/\alpha) - 1) & : x \lt 0 \\
+                x & : x \geq 0 \\
+            \end{array}
+        \right.\\
+
+    Parameters
+    ----------
+    alpha : float
+        slope coefficient for the negative half axis.
+
+    Inputs:
+        - **data**: input tensor with arbitrary shape.
+
+    Outputs:
+        - **out**: output tensor with the same shape as `data`.
+    """
+    def __init__(self, alpha=0.25, **kwargs):
+        super(CELU, self).__init__(**kwargs)
+        self._alpha = alpha
+
+    def hybrid_forward(self, F, x):
+        return F.LeakyReLU(x, slope=self._alpha, act_type='celu', name='fwd')
+
+
 class Swish(HybridBlock):
     r"""
     Swish Activation function
diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h
index fe2668959af..df0c7b694d4 100644
--- a/src/operator/leaky_relu-inl.h
+++ b/src/operator/leaky_relu-inl.h
@@ -47,7 +47,7 @@ namespace op {
 namespace leakyrelu {
 enum LeakyReLUOpInputs {kData, kGamma};
 enum LeakyReLUOpOutputs {kOut, kMask};
-enum LeakyReLUOpType {kLeakyReLU, kPReLU, kRReLU, kELU, kSELU};
+enum LeakyReLUOpType {kLeakyReLU, kPReLU, kRReLU, kELU, kSELU, kCELU};
 enum LeakyReLUOpResource {kRandom};
 }  // namespace leakyrelu
 
@@ -64,9 +64,10 @@ struct LeakyReLUParam : public 
dmlc::Parameter<LeakyReLUParam> {
     .add_enum("prelu", leakyrelu::kPReLU)
     .add_enum("elu", leakyrelu::kELU)
     .add_enum("selu", leakyrelu::kSELU)
+    .add_enum("celu", leakyrelu::kCELU)
     .describe("Activation function to be applied.");
     DMLC_DECLARE_FIELD(slope).set_default(0.25f)
-    .describe("Init slope for the activation. (For leaky and elu only)");
+    .describe("Init slope for the activation. (For leaky, elu and celu only)");
     DMLC_DECLARE_FIELD(lower_bound).set_default(0.125f)
     .describe("Lower bound of random slope. (For rrelu only)");
     DMLC_DECLARE_FIELD(upper_bound).set_default(0.334f)
@@ -190,8 +191,16 @@ class LeakyReLUOp : public Operator {
         });
         break;
       }
+      case leakyrelu::kCELU: {
+        MXNET_ASSIGN_REQ_SWITCH(req[leakyrelu::kOut], Req, {
+          mxnet_op::Kernel<mxnet_op::op_with_req<mshadow_op::celu, Req>, 
xpu>::Launch(
+            s, out.size(0) * out.size(1) * out.size(2), out.dptr_, data.dptr_,
+            DType(param_.slope));
+        });
+        break;
+      }
       default:
-        LOG(FATAL) << "Not implmented";
+        LOG(FATAL) << "Not implemented";
     }
   }
 
@@ -287,8 +296,17 @@ class LeakyReLUOp : public Operator {
         });
         break;
       }
+      case leakyrelu::kCELU: {
+        MXNET_ASSIGN_REQ_SWITCH(req[leakyrelu::kData], Req, {
+          mxnet_op::Kernel<mxnet_op::op_with_req<
+            mxnet_op::backward_grad_tuned<mshadow_op::celu_grad>, Req>, 
xpu>::Launch(
+              s, gdata.size(0) * gdata.size(1) * gdata.size(2), gdata.dptr_, 
grad.dptr_,
+              output.dptr_, DType(param_.slope));
+        });
+        break;
+      }
       default:
-        LOG(FATAL) << "Not implmented";
+        LOG(FATAL) << "Not implemented";
     }
   }
 
diff --git a/src/operator/leaky_relu.cc b/src/operator/leaky_relu.cc
index 45f9511c908..a07500ac548 100644
--- a/src/operator/leaky_relu.cc
+++ b/src/operator/leaky_relu.cc
@@ -61,6 +61,8 @@ The following modified ReLU Activation functions are 
supported:
 - *rrelu*: Randomized ReLU. same as *leaky* but the `slope` is uniformly and 
randomly chosen from
   *[lower_bound, upper_bound)* for training, while fixed to be
   *(lower_bound+upper_bound)/2* for inference.
+- *celu*: Continuous exponential linear unit. `y = x >= 0 ? x : slope * 
(exp(x/slope)-1)` as
+  described in https://arxiv.org/abs/1704.07483
 
 )code" ADD_FILELINE)
 .add_argument("data", "NDArray-or-Symbol", "Input data to activation 
function.")
diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h
index 06a223dda39..60eb59f22d0 100644
--- a/src/operator/mshadow_op.h
+++ b/src/operator/mshadow_op.h
@@ -148,6 +148,11 @@ MXNET_BINARY_MATH_OP_NC(elu, a > DType(0) ? a :
 
 MXNET_BINARY_MATH_OP_NC(elu_grad, a > DType(0) ? DType(1) : DType(b + a));
 
+MXNET_BINARY_MATH_OP_NC(celu, a >= DType(0) ? a :
+                        DType(math::id(b) * math::expm1(a/b)));
+
+MXNET_BINARY_MATH_OP_NC(celu_grad, a >= DType(0) ? DType(1) : DType((b + a) / 
b));
+
 MXNET_SIMPLE_UNARY_MATH_OP(tanh);
 
 MXNET_UNARY_MATH_OP(tanh_grad, 1.0f - math::sqr(a));
diff --git a/src/operator/operator_tune.cc b/src/operator/operator_tune.cc
index cf5412f9824..975db225138 100644
--- a/src/operator/operator_tune.cc
+++ b/src/operator/operator_tune.cc
@@ -320,12 +320,14 @@ 
IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::power);  // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rpower);  // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::xelu); // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::elu); // NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::celu); // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::power_grad);  // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rpower_grad);  // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::power_rgrad);  // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::xelu_grad); // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::prelu_grad); // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::elu_grad); // NOLINT()
+IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::celu_grad); // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::maximum);  // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::minimum);  // NOLINT()
 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::hypot);  // NOLINT()
diff --git a/tests/python/unittest/test_gluon.py 
b/tests/python/unittest/test_gluon.py
index 3049674821c..cd1fd230a46 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -1036,6 +1036,16 @@ def elu(x):
     for test_point, ref_point in zip(elu_test(point_to_validate), 
elu(point_to_validate)):
         assert test_point == ref_point
 
+    celu = mx.gluon.nn.CELU()
+    def celu_test(x):
+        def celu(x):
+            alpha = 0.25
+            return alpha * (mx.nd.exp(x / alpha) - 1) if x < 0 else x
+        return [celu(x_i) for x_i in x]
+
+    for test_point, ref_point in zip(celu_test(point_to_validate), 
celu(point_to_validate)):
+        assert test_point == ref_point
+
     selu = mx.gluon.nn.SELU()
     def selu_test(x):
         def selu(x):
@@ -1043,7 +1053,7 @@ def selu(x):
             return scale * x if x >= 0 else alpha * mx.nd.exp(x) - alpha
         return [selu(x_i) for x_i in x]
 
-    for test_point, ref_point in zip(selu(point_to_validate), 
selu(point_to_validate)):
+    for test_point, ref_point in zip(selu_test(point_to_validate), 
selu(point_to_validate)):
         assert test_point == ref_point
 
     prelu = mx.gluon.nn.PReLU()
diff --git a/tests/python/unittest/test_operator.py 
b/tests/python/unittest/test_operator.py
index 80a83df45da..64c8fd1ce90 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -735,6 +735,8 @@ def fleaky_relu(x, act_type, slope=0.25):
             out[neg_indices] = slope * np.expm1(out[neg_indices])
         elif act_type == 'leaky':
             out[neg_indices] = slope * out[neg_indices]
+        elif act_type == 'celu':
+            out[neg_indices] = slope * np.expm1(out[neg_indices] / slope)
         return out
     def fleaky_relu_grad(grad, x, y, act_type, slope=0.25):
         neg_indices = x < 0
@@ -743,6 +745,8 @@ def fleaky_relu_grad(grad, x, y, act_type, slope=0.25):
             out[neg_indices] = y[neg_indices] + slope
         elif act_type == 'leaky':
             out[neg_indices] = slope
+        elif act_type == 'celu':
+            out[neg_indices] = (y[neg_indices] + slope) / slope
         return out * grad
     for ndim in range(1, 4):
         shape = rand_shape_nd(ndim)
@@ -754,7 +758,7 @@ def fleaky_relu_grad(grad, x, y, act_type, slope=0.25):
             rtol = 1e-2
             atol = 1e-3
             xa[abs(xa) < eps] = 1.0
-            for act_type in ['elu', 'leaky']:
+            for act_type in ['elu', 'leaky', 'celu']:
                 y = mx.symbol.LeakyReLU(data=x, slope=slp, act_type=act_type)
                 ya = fleaky_relu(xa, slope=slp, act_type=act_type)
                 ga = fleaky_relu_grad(np.ones(shape), xa, ya, slope=slp, 
act_type=act_type)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] anirudhacharya closed pull request #12748: [MXNET-1028] Adding CELU Activation function

Reply via email to