Minor cleanup and formatting of the new Factorization Machines code

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/9970fd81
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/9970fd81
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/9970fd81

Branch: refs/heads/master
Commit: 9970fd8148e835a65ed45d66e7ebaff57af3b01b
Parents: be3c1a6
Author: Mike Dusenberry <[email protected]>
Authored: Wed Jan 31 11:24:13 2018 -0800
Committer: Mike Dusenberry <[email protected]>
Committed: Wed Jan 31 11:24:13 2018 -0800

----------------------------------------------------------------------
 scripts/nn/layers/fm.dml | 132 +++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/9970fd81/scripts/nn/layers/fm.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/fm.dml b/scripts/nn/layers/fm.dml
index 17987b2..228ec41 100644
--- a/scripts/nn/layers/fm.dml
+++ b/scripts/nn/layers/fm.dml
@@ -38,93 +38,93 @@ forward = function(matrix[double] X, matrix[double] w0, 
matrix[double] W, matrix
    *  - V : factorized interaction terms, of shape (d, k).
    *
    * Outputs:
-   *  - out : target vector, of shape (n, 1)
+   *  - out : target vector, of shape (n, 1).
    */
-
-   out = (X %*% W) + (0.5 * rowSums((X %*% V)^2 - (X^2 %*% V^2)) ) + w0; # 
target vector, shape (n, 1)
+  out = (X %*% W) + (0.5 * rowSums((X %*% V)^2 - (X^2 %*% V^2)) ) + w0  # 
shape (n, 1)
 }
 
-backward = function(matrix[double] dout, matrix[double] X, matrix[double] w0, 
matrix[double] W, matrix[double] V)
+backward = function(matrix[double] dout, matrix[double] X, matrix[double] w0, 
matrix[double] W,
+                    matrix[double] V)
     return (matrix[double] dw0, matrix[double] dW, matrix[double] dV) {
+  /*
+   * This function accepts the upstream gradients w.r.t. output target
+   * vector, and returns the gradients of the loss w.r.t. the
+   * parameters.
+   *
+   * Inputs:
+   *  - dout : the gradient of the loss function w.r.t y, of
+   *     shape (n, 1).
+   *  - X, w0, W, V are as mentioned in the above forward function.
+   *
+   * Outputs:
+   *  - dX : the gradient of loss function w.r.t  X, of shape (n, d).
+   *  - dw0: the gradient of loss function w.r.t w0, of shape (1,).
+   *  - dW : the gradient of loss function w.r.t  W, of shape (d, 1).
+   *  - dV : the gradient of loss function w.r.t  V, of shape (d, k).
+   */
+  n = nrow(X)
+  d = ncol(X)
+  k = ncol(V)
 
-   /*
-    * This function accepts the upstream gradients w.r.t output target vector, 
and
-    * returns the gradients of the loss w.r.t the parameters
-    *
-    * Inputs:
-    *  - dout : the gradient of the loss function w.r.t y, of shape (n, 1).
-    *  - X, w0, W, V are as mentioned in the above forward function.
-    *
-    * Outputs:
-    *  - dX : the gradient of loss function w.r.t  X, of shape (n, d).
-    *  - dw0: the gradient of loss function w.r.t w0, of shape (1,).
-    *  - dW : the gradient of loss function w.r.t  W, of shape (d, 1).
-    *  - dV : the gradient of loss function w.r.t  V, of shape (d, k).
-    */
-    n = nrow(X);
-    d = ncol(X);
-    k = ncol(V);
-
-    # 1. gradient of target vector w.r.t. w0
-    g_w0 = as.matrix(1);  # shape (1, 1)
-
-    ## gradient of loss function w.r.t. w0
-    dw0  = colSums(dout) ;  # shape (1, 1)
+  # 1. gradient of target vector w.r.t. w0
+  g_w0 = as.matrix(1)  # shape (1, 1)
 
-    # 2. gradient target vector w.r.t. W
-    g_W = X ; # shape (n, d)
+  ## gradient of loss function w.r.t. w0
+  dw0  = colSums(dout)  # shape (1, 1)
 
-    ## gradient of loss function w.r.t. W
-    dW  =  t(g_W) %*% dout; # shape (d, 1)
+  # 2. gradient target vector w.r.t. W
+  g_W = X  # shape (n, d)
 
-    # 3. gradient of target vector w.r.t. V
-    # First term -> g_V1 = t(X) %*% (X %*% V); # shape (d, k)
+  ## gradient of loss function w.r.t. W
+  dW  =  t(g_W) %*% dout  # shape (d, 1)
 
-    ## gradient of loss function w.r.t. V
-    # First term -> t(X) %*% X %*% V
+  # TODO: VECTORIZE THE FOLLOWING CODE 
(https://issues.apache.org/jira/browse/SYSTEMML-2102)
+  # 3. gradient of target vector w.r.t. V
+  # First term -> g_V1 = t(X) %*% (X %*% V)  # shape (d, k)
 
+  ## gradient of loss function w.r.t. V
+  # First term -> t(X) %*% X %*% V
 
-    # Second term -> V(i,f) * (X(i))^2
-    Xt = t( X^2 ) %*% dout # of shape (d,1)
 
-    g_V2 = Xt[1,] %*% V[1,]
+  # Second term -> V(i,f) * (X(i))^2
+  Xt = t( X^2 ) %*% dout  # shape (d,1)
 
-    for (i in 2:d) {
-      tmp = Xt[i,] %*% V[i,]
-      g_V2 = rbind(g_V2, tmp)
-    }
+  g_V2 = Xt[1,] %*% V[1,]
 
-    xv = X %*% V
+  for (i in 2:d) {
+    tmp = Xt[i,] %*% V[i,]
+    g_V2 = rbind(g_V2, tmp)
+  }
 
-    g_V1 = dout[,1] * xv[,1]
+  xv = X %*% V
 
-    for (j in 2:k) {
-      tmp1 = dout[,1] * xv[,k]
-      g_V1 = cbind(g_V1, tmp1)
-    }
+  g_V1 = dout[,1] * xv[,1]
 
-    dV = (t(X) %*% g_V1) - g_V2
-    # dV = mean(dout) * (t(X) %*% X %*%V) - g_V2
+  for (j in 2:k) {
+    tmp1 = dout[,1] * xv[,k]
+    g_V1 = cbind(g_V1, tmp1)
+  }
 
+  dV = (t(X) %*% g_V1) - g_V2
+  # dV = mean(dout) * (t(X) %*% X %*%V) - g_V2
 }
 
 init = function(int n, int d, int k)
     return (matrix[double] w0, matrix[double] W, matrix[double] V) {
-   /*
-    * This function initializes the parameters.
-    *
-    * Inputs:
-    *  - d: the number of features, is an integer.
-    *  - k: the factorization dimensionality, is an integer.
-    *
-    * Outputs:
-    *  - w0: the global bias, of shape (1,).
-    *  - W : the strength of each feature, of shape (d, 1).
-    *  - V : factorized interaction terms, of shape (d, k).
-    */
-
-    w0 = matrix(0, rows=1, cols=1)
-    W  = matrix(0, rows=d, cols=1)
-    V  = rand(rows=d, cols=k, min=0.0, max=1.0, pdf="uniform", sparsity=.08)
+  /*
+   * This function initializes the parameters.
+   *
+   * Inputs:
+   *  - d: the number of features, is an integer.
+   *  - k: the factorization dimensionality, is an integer.
+   *
+   * Outputs:
+   *  - w0: the global bias, of shape (1,).
+   *  - W : the strength of each feature, of shape (d, 1).
+   *  - V : factorized interaction terms, of shape (d, k).
+   */
+  w0 = matrix(0, rows=1, cols=1)
+  W  = matrix(0, rows=d, cols=1)
+  V  = rand(rows=d, cols=k, min=0.0, max=1.0, pdf="uniform", sparsity=.08)
 }
 

Reply via email to