Repository: systemml
Updated Branches:
  refs/heads/master 56205d025 -> 9970fd814


[SYSTEMML-1437] Factorization Machines: Core Layer Module

This implements the core model of a factorization machine (Rendle 2010)
as a layer in the `nn` library.  The `forward` function accepts the
input data `X` and the parameters `w0`, `W`, and `V`, and returns the FM
output `y`, which is a vector containing a single output for each
example. The `backward` function accepts the upstream gradients w.r.t.
`y` (i.e., `dloss/dy`, the gradient of the loss function w.r.t. `y`),
which is the same shape as `y`, as well as `X`, `w0`, `W`, and `V`, and
returns the gradients of the loss w.r.t. `X` and the parameters, i.e.,
`dX`, `dw0`, `dW`, and `dV`. The `init` function accepts the number of
features `d` and the factorization dimensionality `k`, and returns
initialized `w0`, `W`, and `V` values.

In the future, scripts can be written that use this `fm` layer along
with other layer functions to implement factorization machines for
regression, classification, etc.

Closes #696.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/be3c1a6a
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/be3c1a6a
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/be3c1a6a

Branch: refs/heads/master
Commit: be3c1a6a0bf96429333ed80233d68cac97ad9284
Parents: 56205d0
Author: Janardhan Pulivarthi <[email protected]>
Authored: Wed Jan 31 11:24:01 2018 -0800
Committer: Mike Dusenberry <[email protected]>
Committed: Wed Jan 31 11:24:01 2018 -0800

----------------------------------------------------------------------
 scripts/nn/layers/fm.dml       | 130 ++++++++++++++++++++++++++++++++++++
 scripts/nn/test/grad_check.dml |  81 ++++++++++++++++++++++
 scripts/nn/test/run_tests.dml  |   1 +
 3 files changed, 212 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/be3c1a6a/scripts/nn/layers/fm.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/fm.dml b/scripts/nn/layers/fm.dml
new file mode 100644
index 0000000..17987b2
--- /dev/null
+++ b/scripts/nn/layers/fm.dml
@@ -0,0 +1,130 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Factorization Machines.
+ */
+
+forward = function(matrix[double] X, matrix[double] w0, matrix[double] W, 
matrix[double] V)
+    return (matrix[double] out) {
+  /*
+   * Computes the model.
+   *
+   * Reference:
+   *  - Factorization Machines, Steffen Rendle.
+   *
+   * Inputs:
+   *  - X : n examples with d features, of shape (n, d).
+   *  - w0: the global bias, of shape (1,).
+   *  - W : the strength of each feature, of shape (d, 1).
+   *  - V : factorized interaction terms, of shape (d, k).
+   *
+   * Outputs:
+   *  - out : target vector, of shape (n, 1)
+   */
+
+   out = (X %*% W) + (0.5 * rowSums((X %*% V)^2 - (X^2 %*% V^2)) ) + w0; # 
target vector, shape (n, 1)
+}
+
+backward = function(matrix[double] dout, matrix[double] X, matrix[double] w0, 
matrix[double] W, matrix[double] V)
+    return (matrix[double] dw0, matrix[double] dW, matrix[double] dV) {
+
+   /*
+    * This function accepts the upstream gradients w.r.t output target vector, 
and
+    * returns the gradients of the loss w.r.t the parameters
+    *
+    * Inputs:
+    *  - dout : the gradient of the loss function w.r.t y, of shape (n, 1).
+    *  - X, w0, W, V are as mentioned in the above forward function.
+    *
+    * Outputs:
+    *  - dX : the gradient of loss function w.r.t  X, of shape (n, d).
+    *  - dw0: the gradient of loss function w.r.t w0, of shape (1,).
+    *  - dW : the gradient of loss function w.r.t  W, of shape (d, 1).
+    *  - dV : the gradient of loss function w.r.t  V, of shape (d, k).
+    */
+    n = nrow(X);
+    d = ncol(X);
+    k = ncol(V);
+
+    # 1. gradient of target vector w.r.t. w0
+    g_w0 = as.matrix(1);  # shape (1, 1)
+
+    ## gradient of loss function w.r.t. w0
+    dw0  = colSums(dout) ;  # shape (1, 1)
+
+    # 2. gradient target vector w.r.t. W
+    g_W = X ; # shape (n, d)
+
+    ## gradient of loss function w.r.t. W
+    dW  =  t(g_W) %*% dout; # shape (d, 1)
+
+    # 3. gradient of target vector w.r.t. V
+    # First term -> g_V1 = t(X) %*% (X %*% V); # shape (d, k)
+
+    ## gradient of loss function w.r.t. V
+    # First term -> t(X) %*% X %*% V
+
+
+    # Second term -> V(i,f) * (X(i))^2
+    Xt = t( X^2 ) %*% dout # of shape (d,1)
+
+    g_V2 = Xt[1,] %*% V[1,]
+
+    for (i in 2:d) {
+      tmp = Xt[i,] %*% V[i,]
+      g_V2 = rbind(g_V2, tmp)
+    }
+
+    xv = X %*% V
+
+    g_V1 = dout[,1] * xv[,1]
+
+    for (j in 2:k) {
+      tmp1 = dout[,1] * xv[,k]
+      g_V1 = cbind(g_V1, tmp1)
+    }
+
+    dV = (t(X) %*% g_V1) - g_V2
+    # dV = mean(dout) * (t(X) %*% X %*%V) - g_V2
+
+}
+
+init = function(int n, int d, int k)
+    return (matrix[double] w0, matrix[double] W, matrix[double] V) {
+   /*
+    * This function initializes the parameters.
+    *
+    * Inputs:
+    *  - d: the number of features, is an integer.
+    *  - k: the factorization dimensionality, is an integer.
+    *
+    * Outputs:
+    *  - w0: the global bias, of shape (1,).
+    *  - W : the strength of each feature, of shape (d, 1).
+    *  - V : factorized interaction terms, of shape (d, k).
+    */
+
+    w0 = matrix(0, rows=1, cols=1)
+    W  = matrix(0, rows=d, cols=1)
+    V  = rand(rows=d, cols=k, min=0.0, max=1.0, pdf="uniform", sparsity=.08)
+}
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/be3c1a6a/scripts/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/grad_check.dml b/scripts/nn/test/grad_check.dml
index 9c551b8..47c6499 100644
--- a/scripts/nn/test/grad_check.dml
+++ b/scripts/nn/test/grad_check.dml
@@ -34,6 +34,7 @@ source("nn/layers/conv2d_transpose_depthwise.dml") as 
conv2d_transpose_depthwise
 source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
 source("nn/layers/cross_entropy_loss2d.dml") as cross_entropy_loss2d
 source("nn/layers/dropout.dml") as dropout
+source("nn/layers/fm.dml") as fm
 source("nn/layers/l1_loss.dml") as l1_loss
 source("nn/layers/l1_reg.dml") as l1_reg
 source("nn/layers/l2_loss.dml") as l2_loss
@@ -1131,6 +1132,86 @@ dropout = function() {
   }
 }
 
+fm = function() {
+  /*
+   * Gradient check for the factorization machines.
+   */
+  print("Grad checking the factorization machines with L2 loss.")
+
+  # Generate data
+  n = 5# num examples
+  d = 100 # num features
+  k = 2 # factorization dimensionality
+  X = rand(rows=n, cols=d)
+  y = rand(rows=n, cols=1)
+  [w0, W, V] = fm::init(n, d, k)
+
+  # Compute analytical gradients of loss wrt parameters
+  out = fm::forward(X, w0, W, V)
+  dout = l2_loss::backward(out, y)
+  [dw0, dW, dV] = fm::backward(dout, X, w0, W, V)
+
+  # Grad check
+  h = 1e-5
+
+  print(" - Grad checking w0.")
+  for (i in 1:nrow(w0)) {
+    for (j in 1:ncol(w0)) {
+      # Compute numerical derivative
+      old = as.scalar(w0[i,j])
+      w0[i,j] = old - h  # h = 1e-5
+      outmh = fm::forward(X, w0, W, V)
+      lossmh = l2_loss::forward(outmh, y)
+      w0[i,j] = old + h  # h = 1e-5
+      outph = fm::forward(X, w0, W, V)
+      lossph = l2_loss::forward(outph, y)
+      w0[i,j] = old  # reset
+      dw0_num = (lossph-lossmh) / (2*h) # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(dw0[i,j]), 
dw0_num, lossph, lossmh)
+    }
+  }
+
+  print(" - Grad checking W.")
+  for (i in 1:nrow(W)) {
+    for (j in 1:ncol(W)) {
+      # Compute numerical derivative
+      old = as.scalar(W[i,j])
+      W[i,j] = old - h  # h = 1e-5
+      outmh = fm::forward(X, w0, W, V)
+      lossmh = l2_loss::forward(outmh, y)
+      W[i,j] = old + h  # h = 1e-5
+      outph = fm::forward(X, w0, W, V)
+      lossph = l2_loss::forward(outph, y)
+      W[i,j] = old  # reset
+      dW_num = (lossph-lossmh) / (2*h) # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, 
lossph, lossmh)
+    }
+  }
+
+  print(" - Grad checking V.")
+  for (i in 1:nrow(V)) {
+    for(i in 1:ncol(V)) {
+      # Compute numerical derivative
+      old = as.scalar(V[i,j])
+      V[i,j] = old - h  # h = 1e-5
+      outmh = fm::forward(X, w0, W, V)
+      lossmh = l2_loss::forward(outmh, y)
+      V[i,j] = old + h  # h= 1e-5
+      outph = fm::forward(X, w0, W, V)
+      lossph = l2_loss::forward(outph, y)
+      V[i,j] = old  # reset
+      dV_num = (lossph-lossmh) / (2*h) # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(dV[i,j]), dV_num, 
lossph, lossmh)
+    }
+  }
+}
+
 l1_loss = function() {
   /*
    * Gradient check for the L1 loss function.

http://git-wip-us.apache.org/repos/asf/systemml/blob/be3c1a6a/scripts/nn/test/run_tests.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/run_tests.dml b/scripts/nn/test/run_tests.dml
index f70701b..fd4f0fa 100644
--- a/scripts/nn/test/run_tests.dml
+++ b/scripts/nn/test/run_tests.dml
@@ -51,6 +51,7 @@ grad_check::conv2d_depthwise()
 grad_check::conv2d_transpose()
 grad_check::conv2d_transpose_depthwise()
 grad_check::dropout()
+grad_check::fm()
 grad_check::lstm()
 grad_check::max_pool2d()
 grad_check::max_pool2d_builtin()

Reply via email to