Repository: systemml Updated Branches: refs/heads/master e7cfcadc9 -> d56c05ece
[SYSTEMML-1677] Add a new 2D cross-entropy loss layer to the `nn` lib Computes the forward pass for a 2D cross-entropy loss function. The inputs consist of N examples, each of shape (C, Hin, Win), where each pixel has C dimensions corresponding to normalized probabilities of C classes. The loss is applied to each pixel location, and then averaged over all pixels and all examples. Closes #556. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d56c05ec Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d56c05ec Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d56c05ec Branch: refs/heads/master Commit: d56c05ecefe588050099a0219c04a21cd1359e85 Parents: e7cfcad Author: Fei Hu <[email protected]> Authored: Mon Jul 3 13:21:51 2017 -0700 Committer: Mike Dusenberry <[email protected]> Committed: Mon Jul 3 13:21:51 2017 -0700 ---------------------------------------------------------------------- scripts/nn/layers/cross_entropy_loss2d.dml | 105 ++++++++++++++++++++++++ scripts/nn/layers/softmax2d.dml | 4 +- scripts/nn/test/grad_check.dml | 48 ++++++++++- scripts/nn/test/run_tests.dml | 2 + scripts/nn/test/test.dml | 58 +++++++++++++ 5 files changed, 210 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/d56c05ec/scripts/nn/layers/cross_entropy_loss2d.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/cross_entropy_loss2d.dml b/scripts/nn/layers/cross_entropy_loss2d.dml new file mode 100644 index 0000000..a76d5b0 --- /dev/null +++ b/scripts/nn/layers/cross_entropy_loss2d.dml @@ -0,0 +1,105 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * 2D Cross-Entropy loss function. + */ + source("nn/util.dml") as util + source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss + +forward = function(matrix[double] pred, matrix[double] y, int C) + return (double loss) { + /* + * Computes the forward pass for a 2D cross-entropy loss function. The + * inputs consist of N examples, each of shape (C, Hin, Win), where + * each pixel has C dimensions corresponding to normalized + * probabilities of C classes. The loss is applied to each pixel + * location, and then averaged over all pixels and all examples. + * + * ``` + * L_ijk = -y_ijk^T * log(pred_ijk) + * L = (1/N*H*W) sum(L_ijk) for i=1 to N, j=1 to H, k=1 to W. + * ``` + * + * In these equations, `L` is the total loss, `L_ijk` is the loss for + * the pixel `j, k` in the example 'i', `y_ijk` is the C-dimensional + * vector of target class probabilities, `pred_ijk` is C-dimensional + * vector of predicted class probabilities, and `N` is the number of + * examples. + * + * For each pixel location, this can be interpreted as the negative + * log-likelihood assuming a Bernoulli distribution generalized to C + * dimensions, or a Multinomial with one observation. + * + * Inputs: + * - pred: Predictions, of shape (N, C*Win*Hin). + * - y: Targets, of shape (N, C*Win*Hin). + * - C: Number of input channels (dimensionality of input depth). + * + * Outputs: + * - loss: Average loss. + */ + N = nrow(y) + + #Transpose the matrix from (N, C*H*W) to (N*H*W, C) + pred_C_NHW = util::transpose_NCHW_to_CNHW(pred, C) + pred_NHW_C = t(pred_C_NHW) + + #Transpose the matrix from (N, C*H*W) to (N*H*W, C) + y_C_NHW = util::transpose_NCHW_to_CNHW(y, C) + y_NHW_C = t(y_C_NHW) + + loss = cross_entropy_loss::forward(pred_NHW_C, y_NHW_C) +} + +backward = function(matrix[double] pred, matrix[double] y, int C) + return (matrix[double] dpred) { + /* + * Computes the backward pass of a 2D cross-entropy loss function. The + * inputs consist of N examples with a shape (Hin, Win), each pixel in + * the 2d-example with C dimensions corresponding to normalized + * probabilities of C classes. + * + * Inputs: + * - pred: Predictions, of shape (N, C*Win*Hin). + * - y: Targets, of shape (N, C*Win*Hin). + * - C: Number of input channels (dimensionality of input depth). + * + * Outputs: + * - dpred: Gradient wrt `pred`, of shape (N, C*Win*Hin). + */ + N = nrow(y) + + #Transpose the matrix from (N, C*H*W) to (N*H*W, C) + pred_C_NHW = util::transpose_NCHW_to_CNHW(pred, C) + pred_NHW_C = t(pred_C_NHW) + + #Transpose the matrix from (N, C*H*W) to (N*H*W, C) + y_C_NHW = util::transpose_NCHW_to_CNHW(y, C) + y_NHW_C = t(y_C_NHW) + + dpred_NHW_C = cross_entropy_loss::backward(pred_NHW_C, y_NHW_C) + + #Transpose the matrix from (N*H*W, C) to (N, C*H*W) + dpred_C_NHW = t(dpred_NHW_C) + dpred = util::transpose_NCHW_to_CNHW(dpred_C_NHW, N) +} + http://git-wip-us.apache.org/repos/asf/systemml/blob/d56c05ec/scripts/nn/layers/softmax2d.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/softmax2d.dml b/scripts/nn/layers/softmax2d.dml index aad587d..0207ac4 100644 --- a/scripts/nn/layers/softmax2d.dml +++ b/scripts/nn/layers/softmax2d.dml @@ -22,7 +22,6 @@ /* * 2D Softmax classifier layer. */ - source("nn/util.dml") as util source("nn/layers/softmax.dml") as softmax @@ -52,7 +51,6 @@ * Outputs: * - probs: Outputs, of shape (N, C*Hin*Win). */ - # For numerical stability, we subtract the max score of an example from all scores for that # example. This is equivalent to the original formulation: # e^scores_ijk / sum(e^scores_ijk) == C*e^scores_ijk / C*sum(e^scores_ijk) @@ -97,7 +95,6 @@ backward = function(matrix[double] dprobs, matrix[double] scores, int C) * Outputs: * - dscores: Gradient wrt `scores`, of shape (N, C*Win*Hin). */ - N = nrow(scores) #Transpose the matrix from (N, C*H*W) to (N*H*W, C) @@ -114,3 +111,4 @@ backward = function(matrix[double] dprobs, matrix[double] scores, int C) dscores_C_NHW = t(dscores_NHW_C) dscores = util::transpose_NCHW_to_CNHW(dscores_C_NHW, N) } + http://git-wip-us.apache.org/repos/asf/systemml/blob/d56c05ec/scripts/nn/test/grad_check.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/grad_check.dml b/scripts/nn/test/grad_check.dml index 6844f40..c969a98 100644 --- a/scripts/nn/test/grad_check.dml +++ b/scripts/nn/test/grad_check.dml @@ -31,6 +31,7 @@ source("nn/layers/conv2d_depthwise.dml") as conv2d_depthwise source("nn/layers/conv2d_transpose.dml") as conv2d_transpose source("nn/layers/conv2d_transpose_depthwise.dml") as conv2d_transpose_depthwise source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss +source("nn/layers/cross_entropy_loss2d.dml") as cross_entropy_loss2d source("nn/layers/dropout.dml") as dropout source("nn/layers/l1_loss.dml") as l1_loss source("nn/layers/l1_reg.dml") as l1_reg @@ -922,12 +923,12 @@ cross_entropy_loss = function() { print("Grad checking the cross-entropy loss function.") # Generate data - N = 3 # num examples - K = 10 # num targets + N = 3 # num examples + K = 10 # num targets pred = rand(rows=N, cols=K, min=0, max=1, pdf="uniform") - pred = pred / rowSums(pred) # normalized probs + pred = softmax::forward(pred) # normalized probs y = rand(rows=N, cols=K, min=0, max=1, pdf="uniform") - y = y / rowSums(y) # normalized probs + y = softmax::forward(y) # normalized probs # Compute analytical gradient dpred = cross_entropy_loss::backward(pred, y) @@ -951,6 +952,45 @@ cross_entropy_loss = function() { } } +cross_entropy_loss2d = function() { + /* + * Gradient check for the 2D cross-entropy loss function. + */ + print("Grad checking the 2D cross-entropy loss function.") + + # Generate data + N = 3 # num examples + C = 10 # num targets + Hin = 5 # example height + Win = 5 # example width + pred = rand(rows=N, cols=C*Hin*Win, min=0, max=1, pdf="uniform") + pred = softmax2d::forward(pred, C) # normalized probs + + y = rand(rows=N, cols=C*Hin*Win, min=0, max=1, pdf="uniform") + y = softmax2d::forward(y, C) # normalized probs + + # Compute analytical gradient + dpred = cross_entropy_loss2d::backward(pred, y, C) + + # Grad check + h = 1e-6 + for (i in 1:nrow(pred)) { + for (j in 1:ncol(pred)) { + # Compute numerical derivative + old = as.scalar(pred[i,j]) + pred[i,j] = old - h + lossmh = cross_entropy_loss2d::forward(pred, y, C) + pred[i,j] = old + h + lossph = cross_entropy_loss2d::forward(pred, y, C) + pred[i,j] = old # reset W[i,j] + dpred_num = (lossph-lossmh) / (2*h) # numerical derivative + + # Check error + rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) + } + } +} + dropout = function() { /* * Gradient check for the (inverted) dropout layer. http://git-wip-us.apache.org/repos/asf/systemml/blob/d56c05ec/scripts/nn/test/run_tests.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/run_tests.dml b/scripts/nn/test/run_tests.dml index 0662ffa..0f42816 100644 --- a/scripts/nn/test/run_tests.dml +++ b/scripts/nn/test/run_tests.dml @@ -31,6 +31,7 @@ print("---") # Loss & loss-related functions grad_check::cross_entropy_loss() +grad_check::cross_entropy_loss2d() grad_check::l1_loss() grad_check::l1_reg() grad_check::l2_loss() @@ -93,6 +94,7 @@ test::conv2d_depthwise() test::conv2d_transpose() test::conv2d_transpose_depthwise() test::cross_entropy_loss() +test::cross_entropy_loss2d() test::im2col() test::max_pool2d() test::padding() http://git-wip-us.apache.org/repos/asf/systemml/blob/d56c05ec/scripts/nn/test/test.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/test.dml b/scripts/nn/test/test.dml index adaef5c..e63639c 100644 --- a/scripts/nn/test/test.dml +++ b/scripts/nn/test/test.dml @@ -30,6 +30,7 @@ source("nn/layers/conv2d_depthwise.dml") as conv2d_depthwise source("nn/layers/conv2d_transpose.dml") as conv2d_transpose source("nn/layers/conv2d_transpose_depthwise.dml") as conv2d_transpose_depthwise source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss +source("nn/layers/cross_entropy_loss2d.dml") as cross_entropy_loss2d source("nn/layers/max_pool2d.dml") as max_pool2d source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin source("nn/layers/tanh.dml") as tanh @@ -411,6 +412,63 @@ cross_entropy_loss = function() { } } +cross_entropy_loss2d = function() { + /* + * Test for the 2D cross-entropy loss function. + */ + print("Testing the 2D cross-entropy loss function.") + + # Generate data + N = 2 # num examples + C = 3 # num targets + Hin = 3 # example height + Win = 3 # example width + loss_expected = 0.0770996 + + # pred data after the softmax + pred = matrix("9.99909163e-01 4.99988675e-01 4.53958055e-05 + 9.99909163e-01 4.53958055e-05 4.53958055e-05 + 9.99909163e-01 4.53958055e-05 4.53958055e-05 + 4.53958055e-05 4.99988675e-01 4.53958055e-05 + 4.53958055e-05 9.99909163e-01 4.53958055e-05 + 4.53958055e-05 9.99909163e-01 4.53958055e-05 + 4.53958055e-05 2.26994507e-05 9.99909163e-01 + 4.53958055e-05 4.53958055e-05 9.99909163e-01 + 4.53958055e-05 4.53958055e-05 9.99909163e-01 + 9.99909163e-01 4.99988675e-01 4.53958055e-05 + 9.99909163e-01 4.53958055e-05 4.53958055e-05 + 9.99909163e-01 4.53958055e-05 4.53958055e-05 + 4.53958055e-05 4.99988675e-01 4.53958055e-05 + 4.53958055e-05 9.99909163e-01 4.53958055e-05 + 4.53958055e-05 9.99909163e-01 4.53958055e-05 + 4.53958055e-05 2.26994507e-05 9.99909163e-01 + 4.53958055e-05 4.53958055e-05 9.99909163e-01 + 4.53958055e-05 4.53958055e-05 9.99909163e-01", rows=N, cols=C*Hin*Win) + y = matrix("1 0 0 + 1 0 0 + 1 0 0 + 0 1 0 + 0 1 0 + 0 1 0 + 0 0 1 + 0 0 1 + 0 0 1 + 1 0 0 + 1 0 0 + 1 0 0 + 0 1 0 + 0 1 0 + 0 1 0 + 0 0 1 + 0 0 1 + 0 0 1", rows=N, cols=C*Hin*Win) + + loss = cross_entropy_loss2d::forward(pred, y, C) + + # Equivalency check + rel_error = test_util::check_rel_error(loss, loss_expected, 1e-3, 1e-4) +} + im2col = function() { /* * Test for the `im2col` and `col2im` functions.
