This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 4fb8ad1  [SYSTEMDS-2804] New built-in functions ALS-predict, 
ALS-topk-predict
4fb8ad1 is described below

commit 4fb8ad1eed144db052640f021dd3e460f0898328
Author: gabrielaozegovic <[email protected]>
AuthorDate: Sat Jan 23 22:16:47 2021 +0100

    [SYSTEMDS-2804] New built-in functions ALS-predict, ALS-topk-predict
    
    DIA project WS2020/21.
    Closes #1162.
    
    Co-Authored-By: Sven Celin <[email protected]>
---
 scripts/builtin/alsPredict.dml                     | 58 +++++++++++++++++
 scripts/builtin/alsTopkPredict.dml                 | 63 ++++++++++++++++++
 .../java/org/apache/sysds/common/Builtins.java     |  2 +
 .../functions/builtin/BuiltinALSPredictTest.java   | 76 ++++++++++++++++++++++
 src/test/scripts/functions/builtin/alsPredict.dml  | 26 ++++++++
 .../scripts/functions/builtin/alsTopkPredict.dml   | 26 ++++++++
 6 files changed, 251 insertions(+)

diff --git a/scripts/builtin/alsPredict.dml b/scripts/builtin/alsPredict.dml
new file mode 100644
index 0000000..1cbd571
--- /dev/null
+++ b/scripts/builtin/alsPredict.dml
@@ -0,0 +1,58 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script computes the rating/scores for a given list of userIDs 
+# using 2 factor matrices L and R. We assume that all users have rates 
+# at least once and all items have been rates at least once.
+#
+# INPUT   PARAMETERS:
+# 
---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT  MEANING
+# 
---------------------------------------------------------------------------------------------
+# userIDs Matrix   ---      Column vector of user-ids (n x 1)
+# I       Matrix   ---      Indicator matrix user-id x user-id to exclude from 
scoring
+# L       Matrix   ---      The factor matrix L: user-id x feature-id
+# R       Matrix   ---      The factor matrix R: feature-id x item-id
+# 
---------------------------------------------------------------------------------------------
+# OUTPUT:
+# Y       Matrix   ---      The output user-id/item-id/score
+
+m_alsPredict = function(Matrix[Double] userIDs, Matrix[Double] I, 
Matrix[Double] L, Matrix[Double] R)
+  return (Matrix[Double] Y)
+{
+  n = nrow(userIDs)
+  X_user_max = max(userIDs);
+
+  if (X_user_max > nrow(L))
+    stop ("Predictions cannot be provided. Maximum user-id exceeds the number 
of users.");
+  if (ncol(L) != nrow(R))
+    stop ("Predictions cannot be provided. Number of columns of L don't match 
the number of columns of R.");
+
+  # creates projection matrix to select users
+  P = table(seq(1,n), userIDs, n, nrow(L));
+
+  # selects users from factor L and exclude list
+  Usel = P %*% L;
+  Isel = P %*% I;
+
+  # calculates scores for selected users and filter exclude list
+  Y = (Isel == 0) * (Usel %*% R);
+}
diff --git a/scripts/builtin/alsTopkPredict.dml 
b/scripts/builtin/alsTopkPredict.dml
new file mode 100644
index 0000000..de6c9ce
--- /dev/null
+++ b/scripts/builtin/alsTopkPredict.dml
@@ -0,0 +1,63 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script computes the top-K rating/scores for a given list of userIDs 
+# using 2 factor matrices L and R. We assume that all users have rates 
+# at least once and all items have been rates at least once.
+#
+# INPUT   PARAMETERS:
+# 
---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT  MEANING
+# 
---------------------------------------------------------------------------------------------
+# userIDs Matrix   ---      Column vector of user-ids (n x 1)
+# I       Matrix   ---      Indicator matrix user-id x user-id to exclude from 
scoring
+# L       Matrix   ---      The factor matrix L: user-id x feature-id
+# R       Matrix   ---      The factor matrix R: feature-id x item-id
+# K       Int      5        The number of top-K items
+# 
---------------------------------------------------------------------------------------------
+# OUTPUT:
+# TopIxs  Matrix   ---      A matrix containing the top-K item-ids with 
highest predicted ratings 
+#                           for the specified users (rows)
+# TopVals Matrix   ---      A matrix containing the top-K predicted ratings 
for the specified users (rows)
+
+m_alsTopkPredict = function(Matrix[Double] userIDs, Matrix[Double] I, 
Matrix[Double] L, Matrix[Double] R, Integer K = 5)
+  return (Matrix[Double] TopIxs, Matrix[Double] TopVals)
+{
+  zero_cols_ind = (colSums (R != 0)) == 0;
+  K = min (ncol(R) - sum (zero_cols_ind), K);
+
+  Y = alsPredict(userIDs=userIDs, I=I, L=L, R=R)
+
+  # stores sorted movies for selected users
+  TopIxs = matrix(0, rows = nrow (userIDs), cols = K);
+  TopVals = matrix(0, rows = nrow (userIDs), cols = K);
+
+  # uses rowIndexMax/rowMaxs to update kth ratings for all users (assumes no 
duplicates)
+  # (alternatively, we could sort the scores per user, but likely 
nrow(userIDs)>>K)
+  for (i in 1:K) {
+    TopIxs[,i] = rowIndexMax(Y);
+    TopVals[,i] = rowMaxs(Y);
+    Y = Y * (table(seq(1,nrow(Y)), rowIndexMax(Y), nrow(Y), ncol(Y)) != 0);
+  }
+
+  # post-processing to handle edge cases
+  TopIxs = TopIxs * (TopVals > 0);
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java 
b/src/main/java/org/apache/sysds/common/Builtins.java
index 03b9afd..5010c21 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -46,6 +46,8 @@ public enum Builtins {
        ALS("als", true),
        ALS_CG("alsCG", true),
        ALS_DS("alsDS", true),
+       ALS_PREDICT("alsPredict", true),
+       ALS_TOPK_PREDICT("alsTopkPredict", true),
        ASIN("asin", false),
        ATAN("atan", false),
        AUTOENCODER2LAYER("autoencoder_2layer", true),
diff --git 
a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinALSPredictTest.java
 
b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinALSPredictTest.java
new file mode 100644
index 0000000..cfb5c7e
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinALSPredictTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class BuiltinALSPredictTest extends AutomatedTestBase {
+       private final static String TEST_NAME1 = "alsPredict";
+       private final static String TEST_NAME2 = "alsTopkPredict";
+       private final static String TEST_DIR = "functions/builtin/";
+       private static final String TEST_CLASS_DIR = TEST_DIR + 
BuiltinALSPredictTest.class.getSimpleName() + "/";
+
+       @Override
+       public void setUp() {
+               addTestConfiguration(TEST_NAME1,new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[]{"B"}));
+               addTestConfiguration(TEST_NAME2,new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[]{"B"}));
+       }
+
+       @Test
+       public void testALSPredict() {
+               runtestALSPredict(TEST_NAME1);
+       }
+       
+       @Test
+       public void testALSTopkPredict() {
+               runtestALSPredict(TEST_NAME2);
+       }
+
+       private void runtestALSPredict(String testname) {
+               loadTestConfiguration(getTestConfiguration(testname));
+               String HOME = SCRIPT_DIR + TEST_DIR;
+               fullDMLScriptName = HOME + testname + ".dml";
+               List<String> proArgs = new ArrayList<>();
+
+               proArgs.add("-stats");
+               proArgs.add("-args");
+               proArgs.add(input("X"));
+               proArgs.add(input("L"));
+               proArgs.add(input("R"));
+               proArgs.add(output("Y"));
+               programArgs = proArgs.toArray(new String[proArgs.size()]);
+
+               double[][] X = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}};
+               writeInputMatrixWithMTD("X", X, true);
+
+               double[][] L = {{1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 
5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}};
+               writeInputMatrixWithMTD("L", L, true);
+
+               double[][] R = {{1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 
5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}};
+               writeInputMatrixWithMTD("R", R, true);
+
+               runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+       }
+}
diff --git a/src/test/scripts/functions/builtin/alsPredict.dml 
b/src/test/scripts/functions/builtin/alsPredict.dml
new file mode 100644
index 0000000..3da84ec
--- /dev/null
+++ b/src/test/scripts/functions/builtin/alsPredict.dml
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1)
+L = read($2)
+R = read($3)
+Y = alsPredict(userIDs=X, I=matrix(0,nrow(L),ncol(R)), L=L, R=R)
+write(Y, $4)
\ No newline at end of file
diff --git a/src/test/scripts/functions/builtin/alsTopkPredict.dml 
b/src/test/scripts/functions/builtin/alsTopkPredict.dml
new file mode 100644
index 0000000..2ae995c
--- /dev/null
+++ b/src/test/scripts/functions/builtin/alsTopkPredict.dml
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1)
+L = read($2)
+R = read($3)
+[TopIxs, TopVals] = alsTopkPredict(userIDs=X, I=matrix(0,nrow(L),ncol(R)), 
L=L, R=R)
+write(TopVals, $4)
\ No newline at end of file

Reply via email to