Repository: incubator-systemml
Updated Branches:
  refs/heads/master 42ebc9620 -> 7c33b7ef8


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3dc8f21c/projects/breast_cancer/softmax_clf.dml
----------------------------------------------------------------------
diff --git a/projects/breast_cancer/softmax_clf.dml 
b/projects/breast_cancer/softmax_clf.dml
new file mode 100644
index 0000000..e106a36
--- /dev/null
+++ b/projects/breast_cancer/softmax_clf.dml
@@ -0,0 +1,207 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Breast Cancer Softmax Model
+ */
+# Imports
+source("nn/layers/affine.dml") as affine
+source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
+source("nn/layers/softmax.dml") as softmax
+#source("nn/optim/adam.dml") as adam
+source("nn/optim/sgd_nesterov.dml") as sgd_nesterov
+
+train = function(matrix[double] X, matrix[double] Y,
+                 matrix[double] X_val, matrix[double] Y_val,
+                 double lr, double mu, double decay,
+                 int batch_size, int epochs, int log_interval)
+    return (matrix[double] W, matrix[double] b) {
+  /*
+   * Trains a softmax classifier.
+   *
+   * The input matrix, X, has N examples, each with D features.
+   * The targets, Y, have K classes, and are one-hot encoded.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, D).
+   *  - Y: Target matrix, of shape (N, K).
+   *  - X_val: Input validation data matrix, of shape (N, C*Hin*Win).
+   *  - Y_val: Target validation matrix, of shape (N, K).
+   *  - lr: Learning rate.
+   *  - mu: Momentum value.
+   *      Typical values are in the range of [0.5, 0.99], usually
+   *      started at the lower end and annealed towards the higher end.
+   *  - decay: Learning rate decay rate.
+   *  - batch_size: Size of mini-batches to train on.
+   *  - epochs: Total number of full training loops over the full data set.
+   *  - log_interval: Interval, in iterations, between log outputs.
+   *
+   * Outputs:
+   *  - W: Weights (parameters) matrix, of shape (D, K).
+   *  - b: Biases vector, of shape (1, K).
+   */
+  N = nrow(Y)  # num examples
+  D = ncol(X)  # num features
+  K = ncol(Y)  # num classes
+
+  # Create softmax classifier:
+  # affine -> softmax
+  [W, b] = affine::init(D, K)
+  W = W / sqrt(2.0/(D)) * sqrt(1/(D))
+
+  # Initialize SGD w/ Nesterov momentum optimizer
+  vW = sgd_nesterov::init(W)  # optimizer momentum state for W
+  vb = sgd_nesterov::init(b)  # optimizer momentum state for b
+  #[mW, vW] = adam::init(W)  # optimizer 1st & 2nd moment state for W
+  #[mb, vb] = adam::init(b)  # optimizer 1st & 2nd moment state for b
+
+  # Starting validation loss & accuracy
+  probs_val = predict(X_val, W, b)
+  loss_val = cross_entropy_loss::forward(probs_val, Y_val)
+  accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(Y_val))
+  # Output results
+  print("Start: Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val)
+  
+  # Optimize
+  print("Starting optimization")
+  iters = ceil(N / batch_size)
+  for (e in 1:epochs) {
+    for(i in 1:iters) {
+      # Get next batch
+      beg = ((i-1) * batch_size) %% N + 1
+      end = min(N, beg + batch_size - 1)
+      #print("Epoch: " + e + ", Iter: " + i + ", X[" + beg + ":" + end + ",]")
+      X_batch = X[beg:end,]
+      Y_batch = Y[beg:end,]
+
+      # Compute forward pass
+      ## affine & softmax:
+      out = affine::forward(X_batch, W, b)
+      probs = softmax::forward(out)
+
+      # Compute backward pass
+      ## loss:
+      dprobs = cross_entropy_loss::backward(probs, Y_batch)
+      ## affine & softmax:
+      dout = softmax::backward(dprobs, out)
+      [dX_batch, dW, db] = affine::backward(dout, X_batch, W, b)
+
+      # Optimize with SGD w/ Nesterov momentum
+      [W, vW] = sgd_nesterov::update(W, dW, lr, mu, vW)
+      [b, vb] = sgd_nesterov::update(b, db, lr, mu, vb)
+      #[W, mW, vW] = adam::update(W, dW, lr, 0.9, 0.999, 1e-8, e*i-1, mW, vW)
+      #[b, mb, vb] = adam::update(b, db, lr, 0.9, 0.999, 1e-8, e*i-1, mb, vb)
+
+      # Compute loss & accuracy for training & validation data every 
`log_interval` iterations.
+      if (i %% log_interval == 0) {
+        #print("Eval time! - i: " + i)
+        # Compute training loss & accuracy
+        loss = cross_entropy_loss::forward(probs, Y_batch)
+        accuracy = mean(rowIndexMax(probs) == rowIndexMax(Y_batch))
+
+        # Compute validation loss & accuracy
+        probs_val = predict(X_val, W, b)
+        loss_val = cross_entropy_loss::forward(probs_val, Y_val)
+        accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(Y_val))
+
+        # Output results
+        print("Epoch: " + e + "/" + epochs + ", Iter: " + i + "/" + iters
+              + ", Train Loss: " + loss + ", Train Accuracy: " + accuracy + ", 
Val Loss: "
+              + loss_val + ", Val Accuracy: " + accuracy_val + ", lr: " + lr + 
", mu " + mu)
+      }
+    }
+    # Anneal momentum towards 0.999
+    mu = mu + (0.999 - mu)/(1+epochs-e)
+    # Decay learning rate
+    lr = lr * decay
+  }
+}
+
+predict = function(matrix[double] X, matrix[double] W, matrix[double] b)
+    return (matrix[double] probs) {
+  /*
+   * Computes the class probability predictions of a softmax classifier.
+   *
+   * The input matrix, X, has N examples, each with D features.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, D).
+   *  - W: Weights (parameters) matrix, of shape (D, K).
+   *  - b: Biases vector, of shape (1, K).
+   *
+   * Outputs:
+   *  - probs: Class probabilities, of shape (N, K).
+   */
+  N = nrow(X)  # num examples
+  K = ncol(W)  # num classes
+  
+  # Compute forward pass
+  ## affine & softmax:
+  out = affine::forward(X, W, b)
+  probs = softmax::forward(out)
+}
+
+eval = function(matrix[double] probs, matrix[double] Y)
+    return (double loss, double accuracy) {
+  /*
+   * Evaluates a softmax classifier.
+   *
+   * The probs matrix contains the class probability predictions
+   * of K classes over N examples.  The targets, Y, have K classes,
+   * and are one-hot encoded.
+   *
+   * Inputs:
+   *  - probs: Class probabilities, of shape (N, K).
+   *  - Y: Target matrix, of shape (N, K).
+   *
+   * Outputs:
+   *  - loss: Scalar loss, of shape (1).
+   *  - accuracy: Scalar accuracy, of shape (1).
+   */
+  # Compute loss & accuracy
+  loss = cross_entropy_loss::forward(probs, Y)
+  correct_pred = rowIndexMax(probs) == rowIndexMax(Y)
+  accuracy = mean(correct_pred)
+}
+
+generate_dummy_data = function()
+    return (matrix[double] X, matrix[double] Y, int C, int Hin, int Win) {
+  /*
+   * Generate a dummy dataset similar to the MNIST dataset.
+   *
+   * Outputs:
+   *  - X: Input data matrix, of shape (N, D).
+   *  - Y: Target matrix, of shape (N, K).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   */
+  # Generate dummy input data
+  N = 1024  # num examples
+  C = 1  # num input channels
+  Hin = 28  # input height
+  Win = 28  # input width
+  T = 10  # num targets
+  X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
+  classes = round(rand(rows=N, cols=1, min=1, max=T, pdf="uniform"))
+  Y = table(seq(1, N), classes)  # one-hot encoding
+}
+

Reply via email to