Repository: systemml Updated Branches: refs/heads/master a26957e16 -> 2cee9bb9f
[SYSTEMML-1491] Add an ELU activation function. This adds an "exponential linear unit" (ELU) to the `nn` deep learning library. Closes #721. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/91b040d6 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/91b040d6 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/91b040d6 Branch: refs/heads/master Commit: 91b040d60cf8bf38000871747801ca7247326869 Parents: a26957e Author: Krishna Kalyan <[email protected]> Authored: Thu Mar 8 23:11:10 2018 -0800 Committer: Mike Dusenberry <[email protected]> Committed: Thu Mar 8 23:11:10 2018 -0800 ---------------------------------------------------------------------- scripts/nn/layers/elu.dml | 61 +++++++++++++++++++++++++++++++++++++ scripts/nn/test/grad_check.dml | 42 ++++++++++++++++++++++++- scripts/nn/test/run_tests.dml | 2 ++ scripts/nn/test/test.dml | 43 +++++++++++++++++++++++--- 4 files changed, 142 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/91b040d6/scripts/nn/layers/elu.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/elu.dml b/scripts/nn/layers/elu.dml new file mode 100644 index 0000000..51ab925 --- /dev/null +++ b/scripts/nn/layers/elu.dml @@ -0,0 +1,61 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Exponential Linear Units (ELUs) nonlinearity layer. + */ + +forward = function(matrix[double] X, int alpha) + return (matrix[double] out) { + /* + * Computes the forward pass for a ELUs nonlinearity layer. + * Reference paper https://arxiv.org/abs/1511.07289v1 + * Performs an element-wise evaluation of + * `f(x) = x if x ⥠0 else α (exp(x) â 1)`. + * + * Inputs: + * - X: Inputs, of shape (any, any). + * - alpha: Input, minimum value that the ELU can reach + * Typical value 1 + * + * Outputs: + * - out: Outputs, of same shape as `X`. + */ + out = max(0, X) + min(0, alpha * (exp(X) - 1)) +} + +backward = function(matrix[double] dout, matrix[double] X, int alpha) + return (matrix[double] dX) { + /* + * Computes the backward pass for a ELU nonlinearity layer. + * + * Inputs: + * - dout: Gradient wrt `out` from upstream, of same shape as `X`. + * - X: Previous input data matrix, of shape (any, any). + * - alpha: Minimum value that the ELU can reach + * Typical value 1 + * + * Outputs: + * - dX: Gradient wrt `X`, of same shape as `X`. + */ + dX = ((X > 0) + (X < 0) * (alpha * exp(X))) * dout +} + http://git-wip-us.apache.org/repos/asf/systemml/blob/91b040d6/scripts/nn/test/grad_check.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/grad_check.dml b/scripts/nn/test/grad_check.dml index 515bc1f..6150287 100644 --- a/scripts/nn/test/grad_check.dml +++ b/scripts/nn/test/grad_check.dml @@ -57,6 +57,7 @@ source("nn/test/conv2d_simple.dml") as conv2d_simple source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple source("nn/test/util.dml") as test_util source("nn/util.dml") as util +source("nn/layers/elu.dml") as elu affine = function() { /* @@ -196,7 +197,7 @@ low_rank_affine = function() { rel_error = test_util::check_rel_grad_error(as.scalar(dU[i,j]), dU_num, lossph, lossmh) } } - + print(" - Grad checking V.") for (i in 1:nrow(V)) { for (j in 1:ncol(V)) { @@ -2458,3 +2459,42 @@ two_layer_affine_l2_net_backward = function(matrix[double] X, matrix[double] y, [dX, dW1, db1] = affine::backward(dhout, X, W1, b1) } +elu = function() { + /* + * Gradient check for ELU nonlinearity + * layer. + */ + print("Grad checking ELU nonlinearity layer with L2 loss.") + # Generate data + N = 3 # num examples + M = 10 # num neurons + + X = rand(rows=N, cols=M) + y = rand(rows=N, cols=M) + + out = elu::forward(X, 1) + dout = l2_loss::backward(out, y) + dX = elu::backward(dout, X, 1) + + # Grad check + h = 1e-6 + print(" - Grad checking X.") + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + outmh = elu::forward(X, 1) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + outph = elu::forward(X, 1) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph-lossmh) / (2*h) # numerical derivative + + # Check error + rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } +} + http://git-wip-us.apache.org/repos/asf/systemml/blob/91b040d6/scripts/nn/test/run_tests.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/run_tests.dml b/scripts/nn/test/run_tests.dml index fd6e18e..36f1583 100644 --- a/scripts/nn/test/run_tests.dml +++ b/scripts/nn/test/run_tests.dml @@ -51,6 +51,7 @@ grad_check::conv2d_depthwise() grad_check::conv2d_transpose() grad_check::conv2d_transpose_depthwise() grad_check::dropout() +grad_check::elu() grad_check::fm() grad_check::lstm() grad_check::max_pool2d() @@ -99,6 +100,7 @@ test::conv2d_transpose() test::conv2d_transpose_depthwise() test::cross_entropy_loss() test::cross_entropy_loss2d() +test::elu() test::im2col() test::max_pool2d() test::padding() http://git-wip-us.apache.org/repos/asf/systemml/blob/91b040d6/scripts/nn/test/test.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/test.dml b/scripts/nn/test/test.dml index 391c7f0..e3e136f 100644 --- a/scripts/nn/test/test.dml +++ b/scripts/nn/test/test.dml @@ -40,6 +40,8 @@ source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple source("nn/test/util.dml") as test_util source("nn/util.dml") as util source("nn/layers/sigmoid.dml") as sigmoid +source("nn/layers/elu.dml") as elu + batch_norm1d = function() { /* @@ -838,10 +840,10 @@ compare_tanh_builtin_forward_with_old = function() { X = rand(rows=N, cols=C, pdf="normal") out = tanh::forward(X) - + sigma2X = sigmoid::forward(2*X) out_ref = 2*sigma2X - 1 - + # Equivalency check for (i in 1:nrow(out)) { for (j in 1:ncol(out)) { @@ -862,13 +864,13 @@ compare_tanh_builtin_backward_with_old = function() { C = 3 # num channels X = rand(rows=N, cols=C, pdf="normal") dout = rand(rows=N, cols=C, pdf="normal") - + sigma2X = sigmoid::forward(2*X) out = 2*sigma2X - 1 out_ref = (1-out^2) * dout - + out = tanh::backward(dout, X) - + # Equivalency check for (i in 1:nrow(out)) { for (j in 1:ncol(out)) { @@ -1093,3 +1095,34 @@ softmax2d = function() { } } +elu = function() { + /* + * Test for ELU function. + */ + print("Testing ELU function.") + + X = matrix("0.3923 -0.2236 -0.3195 -1.2050 1.0445 -0.6332 0.5731 0.5409 -0.3919 -1.0427", rows = 10, cols = 1) + + print(" - Testing forward") + out = elu::forward(X, 1) + out_ref = matrix("0.3923 -0.2003 -0.2735 -0.7003 1.0445 -0.4691 0.5731 0.5409 -0.3242 -0.6475", rows = 10, cols = 1) + + for (i in 1:nrow(out)) { + for(j in 1:ncol(out)) { + rel_error = test_util::check_rel_error(as.scalar(out[i,j]), + as.scalar(out_ref[i,j]), 1e-3, 1e-3) + } + } + + print(" - Testing backward") + out = elu::backward(X, X, 1) + out_ref = matrix("0.3923 -0.1788 -0.2321 -0.3611 1.0445 -0.3362 0.5731 0.5409 -0.2648 -0.3676", rows = 10, cols = 1) + + for (i in 1:nrow(out)) { + for(j in 1:ncol(out)) { + rel_error = test_util::check_rel_error(as.scalar(out[i,j]), + as.scalar(out_ref[i,j]), 1e-3, 1e-3) + } + } +} +
