This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 4fb8ad1 [SYSTEMDS-2804] New built-in functions ALS-predict,
ALS-topk-predict
4fb8ad1 is described below
commit 4fb8ad1eed144db052640f021dd3e460f0898328
Author: gabrielaozegovic <[email protected]>
AuthorDate: Sat Jan 23 22:16:47 2021 +0100
[SYSTEMDS-2804] New built-in functions ALS-predict, ALS-topk-predict
DIA project WS2020/21.
Closes #1162.
Co-Authored-By: Sven Celin <[email protected]>
---
scripts/builtin/alsPredict.dml | 58 +++++++++++++++++
scripts/builtin/alsTopkPredict.dml | 63 ++++++++++++++++++
.../java/org/apache/sysds/common/Builtins.java | 2 +
.../functions/builtin/BuiltinALSPredictTest.java | 76 ++++++++++++++++++++++
src/test/scripts/functions/builtin/alsPredict.dml | 26 ++++++++
.../scripts/functions/builtin/alsTopkPredict.dml | 26 ++++++++
6 files changed, 251 insertions(+)
diff --git a/scripts/builtin/alsPredict.dml b/scripts/builtin/alsPredict.dml
new file mode 100644
index 0000000..1cbd571
--- /dev/null
+++ b/scripts/builtin/alsPredict.dml
@@ -0,0 +1,58 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script computes the rating/scores for a given list of userIDs
+# using 2 factor matrices L and R. We assume that all users have rates
+# at least once and all items have been rates at least once.
+#
+# INPUT PARAMETERS:
+#
---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+#
---------------------------------------------------------------------------------------------
+# userIDs Matrix --- Column vector of user-ids (n x 1)
+# I Matrix --- Indicator matrix user-id x user-id to exclude from
scoring
+# L Matrix --- The factor matrix L: user-id x feature-id
+# R Matrix --- The factor matrix R: feature-id x item-id
+#
---------------------------------------------------------------------------------------------
+# OUTPUT:
+# Y Matrix --- The output user-id/item-id/score
+
+m_alsPredict = function(Matrix[Double] userIDs, Matrix[Double] I,
Matrix[Double] L, Matrix[Double] R)
+ return (Matrix[Double] Y)
+{
+ n = nrow(userIDs)
+ X_user_max = max(userIDs);
+
+ if (X_user_max > nrow(L))
+ stop ("Predictions cannot be provided. Maximum user-id exceeds the number
of users.");
+ if (ncol(L) != nrow(R))
+ stop ("Predictions cannot be provided. Number of columns of L don't match
the number of columns of R.");
+
+ # creates projection matrix to select users
+ P = table(seq(1,n), userIDs, n, nrow(L));
+
+ # selects users from factor L and exclude list
+ Usel = P %*% L;
+ Isel = P %*% I;
+
+ # calculates scores for selected users and filter exclude list
+ Y = (Isel == 0) * (Usel %*% R);
+}
diff --git a/scripts/builtin/alsTopkPredict.dml
b/scripts/builtin/alsTopkPredict.dml
new file mode 100644
index 0000000..de6c9ce
--- /dev/null
+++ b/scripts/builtin/alsTopkPredict.dml
@@ -0,0 +1,63 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script computes the top-K rating/scores for a given list of userIDs
+# using 2 factor matrices L and R. We assume that all users have rates
+# at least once and all items have been rates at least once.
+#
+# INPUT PARAMETERS:
+#
---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+#
---------------------------------------------------------------------------------------------
+# userIDs Matrix --- Column vector of user-ids (n x 1)
+# I Matrix --- Indicator matrix user-id x user-id to exclude from
scoring
+# L Matrix --- The factor matrix L: user-id x feature-id
+# R Matrix --- The factor matrix R: feature-id x item-id
+# K Int 5 The number of top-K items
+#
---------------------------------------------------------------------------------------------
+# OUTPUT:
+# TopIxs Matrix --- A matrix containing the top-K item-ids with
highest predicted ratings
+# for the specified users (rows)
+# TopVals Matrix --- A matrix containing the top-K predicted ratings
for the specified users (rows)
+
+m_alsTopkPredict = function(Matrix[Double] userIDs, Matrix[Double] I,
Matrix[Double] L, Matrix[Double] R, Integer K = 5)
+ return (Matrix[Double] TopIxs, Matrix[Double] TopVals)
+{
+ zero_cols_ind = (colSums (R != 0)) == 0;
+ K = min (ncol(R) - sum (zero_cols_ind), K);
+
+ Y = alsPredict(userIDs=userIDs, I=I, L=L, R=R)
+
+ # stores sorted movies for selected users
+ TopIxs = matrix(0, rows = nrow (userIDs), cols = K);
+ TopVals = matrix(0, rows = nrow (userIDs), cols = K);
+
+ # uses rowIndexMax/rowMaxs to update kth ratings for all users (assumes no
duplicates)
+ # (alternatively, we could sort the scores per user, but likely
nrow(userIDs)>>K)
+ for (i in 1:K) {
+ TopIxs[,i] = rowIndexMax(Y);
+ TopVals[,i] = rowMaxs(Y);
+ Y = Y * (table(seq(1,nrow(Y)), rowIndexMax(Y), nrow(Y), ncol(Y)) != 0);
+ }
+
+ # post-processing to handle edge cases
+ TopIxs = TopIxs * (TopVals > 0);
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java
b/src/main/java/org/apache/sysds/common/Builtins.java
index 03b9afd..5010c21 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -46,6 +46,8 @@ public enum Builtins {
ALS("als", true),
ALS_CG("alsCG", true),
ALS_DS("alsDS", true),
+ ALS_PREDICT("alsPredict", true),
+ ALS_TOPK_PREDICT("alsTopkPredict", true),
ASIN("asin", false),
ATAN("atan", false),
AUTOENCODER2LAYER("autoencoder_2layer", true),
diff --git
a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinALSPredictTest.java
b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinALSPredictTest.java
new file mode 100644
index 0000000..cfb5c7e
--- /dev/null
+++
b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinALSPredictTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class BuiltinALSPredictTest extends AutomatedTestBase {
+ private final static String TEST_NAME1 = "alsPredict";
+ private final static String TEST_NAME2 = "alsTopkPredict";
+ private final static String TEST_DIR = "functions/builtin/";
+ private static final String TEST_CLASS_DIR = TEST_DIR +
BuiltinALSPredictTest.class.getSimpleName() + "/";
+
+ @Override
+ public void setUp() {
+ addTestConfiguration(TEST_NAME1,new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[]{"B"}));
+ addTestConfiguration(TEST_NAME2,new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[]{"B"}));
+ }
+
+ @Test
+ public void testALSPredict() {
+ runtestALSPredict(TEST_NAME1);
+ }
+
+ @Test
+ public void testALSTopkPredict() {
+ runtestALSPredict(TEST_NAME2);
+ }
+
+ private void runtestALSPredict(String testname) {
+ loadTestConfiguration(getTestConfiguration(testname));
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ fullDMLScriptName = HOME + testname + ".dml";
+ List<String> proArgs = new ArrayList<>();
+
+ proArgs.add("-stats");
+ proArgs.add("-args");
+ proArgs.add(input("X"));
+ proArgs.add(input("L"));
+ proArgs.add(input("R"));
+ proArgs.add(output("Y"));
+ programArgs = proArgs.toArray(new String[proArgs.size()]);
+
+ double[][] X = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}};
+ writeInputMatrixWithMTD("X", X, true);
+
+ double[][] L = {{1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4,
5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}};
+ writeInputMatrixWithMTD("L", L, true);
+
+ double[][] R = {{1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4,
5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}};
+ writeInputMatrixWithMTD("R", R, true);
+
+ runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+ }
+}
diff --git a/src/test/scripts/functions/builtin/alsPredict.dml
b/src/test/scripts/functions/builtin/alsPredict.dml
new file mode 100644
index 0000000..3da84ec
--- /dev/null
+++ b/src/test/scripts/functions/builtin/alsPredict.dml
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1)
+L = read($2)
+R = read($3)
+Y = alsPredict(userIDs=X, I=matrix(0,nrow(L),ncol(R)), L=L, R=R)
+write(Y, $4)
\ No newline at end of file
diff --git a/src/test/scripts/functions/builtin/alsTopkPredict.dml
b/src/test/scripts/functions/builtin/alsTopkPredict.dml
new file mode 100644
index 0000000..2ae995c
--- /dev/null
+++ b/src/test/scripts/functions/builtin/alsTopkPredict.dml
@@ -0,0 +1,26 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1)
+L = read($2)
+R = read($3)
+[TopIxs, TopVals] = alsTopkPredict(userIDs=X, I=matrix(0,nrow(L),ncol(R)),
L=L, R=R)
+write(TopVals, $4)
\ No newline at end of file