Repository: systemml
Updated Branches:
  refs/heads/master a26957e16 -> 2cee9bb9f


[SYSTEMML-1491] Add an ELU activation function.

This adds an "exponential linear unit" (ELU) to the `nn` deep learning
library.

Closes #721.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/91b040d6
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/91b040d6
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/91b040d6

Branch: refs/heads/master
Commit: 91b040d60cf8bf38000871747801ca7247326869
Parents: a26957e
Author: Krishna Kalyan <[email protected]>
Authored: Thu Mar 8 23:11:10 2018 -0800
Committer: Mike Dusenberry <[email protected]>
Committed: Thu Mar 8 23:11:10 2018 -0800

----------------------------------------------------------------------
 scripts/nn/layers/elu.dml      | 61 +++++++++++++++++++++++++++++++++++++
 scripts/nn/test/grad_check.dml | 42 ++++++++++++++++++++++++-
 scripts/nn/test/run_tests.dml  |  2 ++
 scripts/nn/test/test.dml       | 43 +++++++++++++++++++++++---
 4 files changed, 142 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/91b040d6/scripts/nn/layers/elu.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/elu.dml b/scripts/nn/layers/elu.dml
new file mode 100644
index 0000000..51ab925
--- /dev/null
+++ b/scripts/nn/layers/elu.dml
@@ -0,0 +1,61 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Exponential Linear Units (ELUs) nonlinearity layer.
+ */
+
+forward = function(matrix[double] X, int alpha)
+    return (matrix[double] out) {
+  /*
+   * Computes the forward pass for a ELUs nonlinearity layer.
+   * Reference paper https://arxiv.org/abs/1511.07289v1
+   * Performs an element-wise evaluation of
+   * `f(x) = x if x ≥ 0 else α (exp(x) − 1)`.
+   *
+   * Inputs:
+   *  - X: Inputs, of shape (any, any).
+   *  - alpha: Input, minimum value that the ELU can reach
+   *           Typical value 1
+   *
+   * Outputs:
+   *  - out: Outputs, of same shape as `X`.
+   */
+  out = max(0, X) + min(0, alpha * (exp(X) - 1))
+}
+
+backward = function(matrix[double] dout, matrix[double] X, int alpha)
+    return (matrix[double] dX) {
+  /*
+   * Computes the backward pass for a ELU nonlinearity layer.
+   *
+   * Inputs:
+   *  - dout: Gradient wrt `out` from upstream, of same shape as `X`.
+   *  - X: Previous input data matrix, of shape (any, any).
+   *  - alpha: Minimum value that the ELU can reach
+   *           Typical value 1
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of same shape as `X`.
+   */
+  dX = ((X > 0) + (X < 0) * (alpha * exp(X))) * dout
+}
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/91b040d6/scripts/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/grad_check.dml b/scripts/nn/test/grad_check.dml
index 515bc1f..6150287 100644
--- a/scripts/nn/test/grad_check.dml
+++ b/scripts/nn/test/grad_check.dml
@@ -57,6 +57,7 @@ source("nn/test/conv2d_simple.dml") as conv2d_simple
 source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple
 source("nn/test/util.dml") as test_util
 source("nn/util.dml") as util
+source("nn/layers/elu.dml") as elu
 
 affine = function() {
   /*
@@ -196,7 +197,7 @@ low_rank_affine = function() {
       rel_error = test_util::check_rel_grad_error(as.scalar(dU[i,j]), dU_num, 
lossph, lossmh)
     }
   }
-  
+
   print(" - Grad checking V.")
   for (i in 1:nrow(V)) {
     for (j in 1:ncol(V)) {
@@ -2458,3 +2459,42 @@ two_layer_affine_l2_net_backward = 
function(matrix[double] X, matrix[double] y,
   [dX, dW1, db1] = affine::backward(dhout, X, W1, b1)
 }
 
+elu = function() {
+  /*
+   * Gradient check for ELU nonlinearity
+   * layer.
+   */
+   print("Grad checking ELU nonlinearity layer with L2 loss.")
+   # Generate data
+   N = 3 # num examples
+   M = 10 # num neurons
+
+   X = rand(rows=N, cols=M)
+   y = rand(rows=N, cols=M)
+
+   out = elu::forward(X, 1)
+   dout = l2_loss::backward(out, y)
+   dX = elu::backward(dout, X, 1)
+
+   # Grad check
+   h = 1e-6
+   print(" - Grad checking X.")
+   for (i in 1:nrow(X)) {
+     for (j in 1:ncol(X)) {
+       # Compute numerical derivative
+       old = as.scalar(X[i,j])
+       X[i,j] = old - h
+       outmh = elu::forward(X, 1)
+       lossmh = l2_loss::forward(outmh, y)
+       X[i,j] = old + h
+       outph = elu::forward(X, 1)
+       lossph = l2_loss::forward(outph, y)
+       X[i,j] = old  # reset
+       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
+
+       # Check error
+       rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, 
lossph, lossmh)
+     }
+   }
+}
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/91b040d6/scripts/nn/test/run_tests.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/run_tests.dml b/scripts/nn/test/run_tests.dml
index fd6e18e..36f1583 100644
--- a/scripts/nn/test/run_tests.dml
+++ b/scripts/nn/test/run_tests.dml
@@ -51,6 +51,7 @@ grad_check::conv2d_depthwise()
 grad_check::conv2d_transpose()
 grad_check::conv2d_transpose_depthwise()
 grad_check::dropout()
+grad_check::elu()
 grad_check::fm()
 grad_check::lstm()
 grad_check::max_pool2d()
@@ -99,6 +100,7 @@ test::conv2d_transpose()
 test::conv2d_transpose_depthwise()
 test::cross_entropy_loss()
 test::cross_entropy_loss2d()
+test::elu()
 test::im2col()
 test::max_pool2d()
 test::padding()

http://git-wip-us.apache.org/repos/asf/systemml/blob/91b040d6/scripts/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/test.dml b/scripts/nn/test/test.dml
index 391c7f0..e3e136f 100644
--- a/scripts/nn/test/test.dml
+++ b/scripts/nn/test/test.dml
@@ -40,6 +40,8 @@ source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple
 source("nn/test/util.dml") as test_util
 source("nn/util.dml") as util
 source("nn/layers/sigmoid.dml") as sigmoid
+source("nn/layers/elu.dml") as elu
+
 
 batch_norm1d = function() {
   /*
@@ -838,10 +840,10 @@ compare_tanh_builtin_forward_with_old = function() {
   X = rand(rows=N, cols=C, pdf="normal")
 
   out = tanh::forward(X)
-  
+
   sigma2X = sigmoid::forward(2*X)
   out_ref = 2*sigma2X - 1
-  
+
   # Equivalency check
   for (i in 1:nrow(out)) {
     for (j in 1:ncol(out)) {
@@ -862,13 +864,13 @@ compare_tanh_builtin_backward_with_old = function() {
   C = 3  # num channels
   X = rand(rows=N, cols=C, pdf="normal")
   dout = rand(rows=N, cols=C, pdf="normal")
-  
+
   sigma2X = sigmoid::forward(2*X)
   out = 2*sigma2X - 1
   out_ref = (1-out^2) * dout
-  
+
   out = tanh::backward(dout, X)
-  
+
   # Equivalency check
   for (i in 1:nrow(out)) {
     for (j in 1:ncol(out)) {
@@ -1093,3 +1095,34 @@ softmax2d = function() {
   }
 }
 
+elu = function() {
+  /*
+   * Test for ELU function.
+   */
+  print("Testing ELU function.")
+
+  X = matrix("0.3923 -0.2236 -0.3195 -1.2050  1.0445 -0.6332  0.5731  0.5409 
-0.3919 -1.0427", rows = 10, cols = 1)
+
+  print(" - Testing forward")
+  out = elu::forward(X, 1)
+  out_ref = matrix("0.3923 -0.2003 -0.2735 -0.7003  1.0445 -0.4691  0.5731  
0.5409 -0.3242 -0.6475", rows = 10, cols = 1)
+
+  for (i in 1:nrow(out)) {
+    for(j in 1:ncol(out)) {
+      rel_error = test_util::check_rel_error(as.scalar(out[i,j]),
+          as.scalar(out_ref[i,j]), 1e-3, 1e-3)
+    }
+  }
+
+  print(" - Testing backward")
+  out = elu::backward(X, X, 1)
+  out_ref = matrix("0.3923 -0.1788 -0.2321 -0.3611  1.0445 -0.3362  0.5731  
0.5409 -0.2648 -0.3676", rows = 10, cols = 1)
+
+  for (i in 1:nrow(out)) {
+    for(j in 1:ncol(out)) {
+      rel_error = test_util::check_rel_error(as.scalar(out[i,j]),
+          as.scalar(out_ref[i,j]), 1e-3, 1e-3)
+    }
+  }
+}
+

Reply via email to