This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 6085c4b869 [SYSTEMDS-3329] PageRank builtin function
6085c4b869 is described below
commit 6085c4b869d708705904a3e88fab36efe2fbffe2
Author: MarcNic <[email protected]>
AuthorDate: Sat Mar 16 18:41:21 2024 +0100
[SYSTEMDS-3329] PageRank builtin function
Closes #1904.
---
scripts/builtin/pageRank.dml | 50 ++++++++++++++++++++++
.../java/org/apache/sysds/common/Builtins.java | 1 +
.../sysds/test/applications/PageRankTest.java | 4 +-
.../scripts/applications/page_rank/PageRank.dml | 17 ++------
.../functions/codegen/SystemDS-config-codegen.xml | 2 +-
5 files changed, 57 insertions(+), 17 deletions(-)
diff --git a/scripts/builtin/pageRank.dml b/scripts/builtin/pageRank.dml
new file mode 100644
index 0000000000..bd024e77b2
--- /dev/null
+++ b/scripts/builtin/pageRank.dml
@@ -0,0 +1,50 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# DML builtin method for PageRank algorithm (power iterations)
+#
+# INPUT:
+#
------------------------------------------------------------------------------
+# G Input Matrix
+# p initial page rank vector (number of nodes), e.g., rand intialized
+# e additional customization, default vector of ones
+# u personalization vector (number of nodes)
+# alpha teleport probability
+# max_iter maximum number of iterations
+#
------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ---------------------------------------------------------------------------
+# pprime computed pagerank
+# ---------------------------------------------------------------------------
+
+m_pageRank = function (Matrix[Double] G, Matrix[Double] p,
+ Matrix[Double] e, Matrix[Double] u, Double alpha = 0.85, Int max_iter = 20)
+ return (Matrix[double] pprime)
+{
+ i = 0;
+ while( i < max_iter ) {
+ p = alpha * (G %*% p) + (1 - alpha) * (e %*% u %*% p);
+ i += 1;
+ }
+ pprime = p
+}
+
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java
b/src/main/java/org/apache/sysds/common/Builtins.java
index 3dae7a80ae..4d0e13791f 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -253,6 +253,7 @@ public enum Builtins {
OUTLIER_IQR_APPLY("outlierByIQRApply", true),
OUTLIER_SD("outlierBySd", true),
OUTLIER_SD_APPLY("outlierBySdApply", true),
+ PAGERANK("pageRank", true),
PCA("pca", true),
PCAINVERSE("pcaInverse", true),
PCATRANSFORM("pcaTransform", true),
diff --git a/src/test/java/org/apache/sysds/test/applications/PageRankTest.java
b/src/test/java/org/apache/sysds/test/applications/PageRankTest.java
index ed322f00a9..d606243ac0 100644
--- a/src/test/java/org/apache/sysds/test/applications/PageRankTest.java
+++ b/src/test/java/org/apache/sysds/test/applications/PageRankTest.java
@@ -49,7 +49,7 @@ public class PageRankTest extends AutomatedTestBase {
@Parameters
public static Collection<Object[]> data() {
- Object[][] data = new Object[][] { { 50, 50 }, { 1500, 1500 },
{ 7500, 7500 } };
+ Object[][] data = new Object[][] { { 50, 50 }, { 1500, 1500 },
{ 7500, 7500 }, {10000,10000} };
return Arrays.asList(data);
}
@@ -83,7 +83,7 @@ public class PageRankTest extends AutomatedTestBase {
fullDMLScriptName = getScript();
- double[][] g = getRandomMatrix(rows, cols, 1, 1, 0.000374962,
-1);
+ double[][] g = getRandomMatrix(rows, cols, 1, 1, 0.0000042159,
-1);
double[][] p = getRandomMatrix(rows, 1, 1, 1, 1, -1);
double[][] e = getRandomMatrix(rows, 1, 1, 1, 1, -1);
double[][] u = getRandomMatrix(1, cols, 1, 1, 1, -1);
diff --git a/src/test/scripts/applications/page_rank/PageRank.dml
b/src/test/scripts/applications/page_rank/PageRank.dml
index b3293cf9f6..cf974f25e7 100644
--- a/src/test/scripts/applications/page_rank/PageRank.dml
+++ b/src/test/scripts/applications/page_rank/PageRank.dml
@@ -19,25 +19,14 @@
#
#-------------------------------------------------------------
-# How to invoke this dml script PageRank.dml?
-# Assume PAGE_RANK_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume rows = 1000 and cols = 1000 for g, rows = 1000 and cols = 1 for p,
rows = 1000 and cols = 1 for e, rows = 1 and cols = 1000 for u,
-# Assume alpha = 0.85, max_iteration = 3
-# hadoop jar SystemDS.jar -f $PAGE_RANK_HOME/PageRank.dml -args "$INPUT_DIR/g"
"$INPUT_DIR/p" "$INPUT_DIR/e" "$INPUT_DIR/u" 0.85 3 "$OUPUT_DIR/w"
-
G = read($1);
-# dense
p = read($2);
e = read($3);
u = read($4);
alpha = $5;
-max_iteration = $6;
-i = 0;
+maxi = $6;
-while(i < max_iteration) {
- p = alpha * (G %*% p) + (1 - alpha) * (e %*% u %*% p);
- i = i + 1;
-}
+p = pageRank(G=G, p=p, e=e, u=u, alpha=alpha, max_iter=maxi);
write(p, $7, format="text");
+
diff --git a/src/test/scripts/functions/codegen/SystemDS-config-codegen.xml
b/src/test/scripts/functions/codegen/SystemDS-config-codegen.xml
index f77d94dd4e..0bc25e014f 100644
--- a/src/test/scripts/functions/codegen/SystemDS-config-codegen.xml
+++ b/src/test/scripts/functions/codegen/SystemDS-config-codegen.xml
@@ -25,7 +25,7 @@
<sysds.codegen.plancache>true</sysds.codegen.plancache>
<sysds.codegen.literals>1</sysds.codegen.literals>
- <!-- The number of theads for the spark instance artificially selected-->
+ <!-- The number of threads for the spark instance artificially selected-->
<sysds.local.spark.number.threads>16</sysds.local.spark.number.threads>
<sysds.codegen.api>auto</sysds.codegen.api>