Repository: incubator-systemml Updated Branches: refs/heads/master 42ebc9620 -> 7c33b7ef8
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3dc8f21c/projects/breast_cancer/softmax_clf.dml ---------------------------------------------------------------------- diff --git a/projects/breast_cancer/softmax_clf.dml b/projects/breast_cancer/softmax_clf.dml new file mode 100644 index 0000000..e106a36 --- /dev/null +++ b/projects/breast_cancer/softmax_clf.dml @@ -0,0 +1,207 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Breast Cancer Softmax Model + */ +# Imports +source("nn/layers/affine.dml") as affine +source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss +source("nn/layers/softmax.dml") as softmax +#source("nn/optim/adam.dml") as adam +source("nn/optim/sgd_nesterov.dml") as sgd_nesterov + +train = function(matrix[double] X, matrix[double] Y, + matrix[double] X_val, matrix[double] Y_val, + double lr, double mu, double decay, + int batch_size, int epochs, int log_interval) + return (matrix[double] W, matrix[double] b) { + /* + * Trains a softmax classifier. + * + * The input matrix, X, has N examples, each with D features. + * The targets, Y, have K classes, and are one-hot encoded. + * + * Inputs: + * - X: Input data matrix, of shape (N, D). + * - Y: Target matrix, of shape (N, K). + * - X_val: Input validation data matrix, of shape (N, C*Hin*Win). + * - Y_val: Target validation matrix, of shape (N, K). + * - lr: Learning rate. + * - mu: Momentum value. + * Typical values are in the range of [0.5, 0.99], usually + * started at the lower end and annealed towards the higher end. + * - decay: Learning rate decay rate. + * - batch_size: Size of mini-batches to train on. + * - epochs: Total number of full training loops over the full data set. + * - log_interval: Interval, in iterations, between log outputs. + * + * Outputs: + * - W: Weights (parameters) matrix, of shape (D, K). + * - b: Biases vector, of shape (1, K). + */ + N = nrow(Y) # num examples + D = ncol(X) # num features + K = ncol(Y) # num classes + + # Create softmax classifier: + # affine -> softmax + [W, b] = affine::init(D, K) + W = W / sqrt(2.0/(D)) * sqrt(1/(D)) + + # Initialize SGD w/ Nesterov momentum optimizer + vW = sgd_nesterov::init(W) # optimizer momentum state for W + vb = sgd_nesterov::init(b) # optimizer momentum state for b + #[mW, vW] = adam::init(W) # optimizer 1st & 2nd moment state for W + #[mb, vb] = adam::init(b) # optimizer 1st & 2nd moment state for b + + # Starting validation loss & accuracy + probs_val = predict(X_val, W, b) + loss_val = cross_entropy_loss::forward(probs_val, Y_val) + accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(Y_val)) + # Output results + print("Start: Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val) + + # Optimize + print("Starting optimization") + iters = ceil(N / batch_size) + for (e in 1:epochs) { + for(i in 1:iters) { + # Get next batch + beg = ((i-1) * batch_size) %% N + 1 + end = min(N, beg + batch_size - 1) + #print("Epoch: " + e + ", Iter: " + i + ", X[" + beg + ":" + end + ",]") + X_batch = X[beg:end,] + Y_batch = Y[beg:end,] + + # Compute forward pass + ## affine & softmax: + out = affine::forward(X_batch, W, b) + probs = softmax::forward(out) + + # Compute backward pass + ## loss: + dprobs = cross_entropy_loss::backward(probs, Y_batch) + ## affine & softmax: + dout = softmax::backward(dprobs, out) + [dX_batch, dW, db] = affine::backward(dout, X_batch, W, b) + + # Optimize with SGD w/ Nesterov momentum + [W, vW] = sgd_nesterov::update(W, dW, lr, mu, vW) + [b, vb] = sgd_nesterov::update(b, db, lr, mu, vb) + #[W, mW, vW] = adam::update(W, dW, lr, 0.9, 0.999, 1e-8, e*i-1, mW, vW) + #[b, mb, vb] = adam::update(b, db, lr, 0.9, 0.999, 1e-8, e*i-1, mb, vb) + + # Compute loss & accuracy for training & validation data every `log_interval` iterations. + if (i %% log_interval == 0) { + #print("Eval time! - i: " + i) + # Compute training loss & accuracy + loss = cross_entropy_loss::forward(probs, Y_batch) + accuracy = mean(rowIndexMax(probs) == rowIndexMax(Y_batch)) + + # Compute validation loss & accuracy + probs_val = predict(X_val, W, b) + loss_val = cross_entropy_loss::forward(probs_val, Y_val) + accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(Y_val)) + + # Output results + print("Epoch: " + e + "/" + epochs + ", Iter: " + i + "/" + iters + + ", Train Loss: " + loss + ", Train Accuracy: " + accuracy + ", Val Loss: " + + loss_val + ", Val Accuracy: " + accuracy_val + ", lr: " + lr + ", mu " + mu) + } + } + # Anneal momentum towards 0.999 + mu = mu + (0.999 - mu)/(1+epochs-e) + # Decay learning rate + lr = lr * decay + } +} + +predict = function(matrix[double] X, matrix[double] W, matrix[double] b) + return (matrix[double] probs) { + /* + * Computes the class probability predictions of a softmax classifier. + * + * The input matrix, X, has N examples, each with D features. + * + * Inputs: + * - X: Input data matrix, of shape (N, D). + * - W: Weights (parameters) matrix, of shape (D, K). + * - b: Biases vector, of shape (1, K). + * + * Outputs: + * - probs: Class probabilities, of shape (N, K). + */ + N = nrow(X) # num examples + K = ncol(W) # num classes + + # Compute forward pass + ## affine & softmax: + out = affine::forward(X, W, b) + probs = softmax::forward(out) +} + +eval = function(matrix[double] probs, matrix[double] Y) + return (double loss, double accuracy) { + /* + * Evaluates a softmax classifier. + * + * The probs matrix contains the class probability predictions + * of K classes over N examples. The targets, Y, have K classes, + * and are one-hot encoded. + * + * Inputs: + * - probs: Class probabilities, of shape (N, K). + * - Y: Target matrix, of shape (N, K). + * + * Outputs: + * - loss: Scalar loss, of shape (1). + * - accuracy: Scalar accuracy, of shape (1). + */ + # Compute loss & accuracy + loss = cross_entropy_loss::forward(probs, Y) + correct_pred = rowIndexMax(probs) == rowIndexMax(Y) + accuracy = mean(correct_pred) +} + +generate_dummy_data = function() + return (matrix[double] X, matrix[double] Y, int C, int Hin, int Win) { + /* + * Generate a dummy dataset similar to the MNIST dataset. + * + * Outputs: + * - X: Input data matrix, of shape (N, D). + * - Y: Target matrix, of shape (N, K). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + */ + # Generate dummy input data + N = 1024 # num examples + C = 1 # num input channels + Hin = 28 # input height + Win = 28 # input width + T = 10 # num targets + X = rand(rows=N, cols=C*Hin*Win, pdf="normal") + classes = round(rand(rows=N, cols=1, min=1, max=T, pdf="uniform")) + Y = table(seq(1, N), classes) # one-hot encoding +} +