[SYSTEMML-2121] Add PageRank to staging algorithms and codegen tests Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/cad7c1e0 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/cad7c1e0 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/cad7c1e0
Branch: refs/heads/master Commit: cad7c1e0f292d39907ba569735d6f9258365b166 Parents: d100013 Author: Matthias Boehm <[email protected]> Authored: Thu May 31 21:04:25 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Thu May 31 21:05:12 2018 -0700 ---------------------------------------------------------------------- scripts/staging/PageRank.dml | 38 ++++ .../sysml/hops/codegen/SpoofCompiler.java | 4 +- .../functions/codegenalg/AlgorithmPageRank.java | 176 +++++++++++++++++++ .../functions/codegenalg/Algorithm_PageRank.R | 38 ++++ .../SystemML-config-codegen-fuse-all.xml | 2 +- ...stemML-config-codegen-fuse-no-redundancy.xml | 2 +- .../codegenalg/SystemML-config-codegen.xml | 2 +- .../functions/codegenalg/ZPackageSuite.java | 1 + 8 files changed, 258 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/cad7c1e0/scripts/staging/PageRank.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/PageRank.dml b/scripts/staging/PageRank.dml new file mode 100644 index 0000000..a33cf98 --- /dev/null +++ b/scripts/staging/PageRank.dml @@ -0,0 +1,38 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# G: N x M, p: M x 1, e: N x 1: u: 1 x M +# ./sparkDML2.sh SystemML.jar -f PageRank.dml -args "in/g" "in/p" "in/e" "in/u" 0.85 3 "out/w" + +G = read($1); +p = read($2); +e = read($3); +u = read($4); +alpha = $5; +max_iteration = $6; +i = 0; + +while( i < max_iteration ) { + p = alpha * (G %*% p) + (1 - alpha) * (e %*% u %*% p); + i += 1; +} + +write(p, $7); http://git-wip-us.apache.org/repos/asf/systemml/blob/cad7c1e0/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java index 368dc94..6a23c8d 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java +++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java @@ -102,10 +102,10 @@ public class SpoofCompiler private static final Log LOG = LogFactory.getLog(SpoofCompiler.class.getName()); //internal configuration flags - public static boolean LDEBUG = false; + public static final boolean LDEBUG = false; public static CompilerType JAVA_COMPILER = CompilerType.JANINO; public static PlanSelector PLAN_SEL_POLICY = PlanSelector.FUSE_COST_BASED_V2; - public static IntegrationType INTEGRATION = IntegrationType.RUNTIME; + public static final IntegrationType INTEGRATION = IntegrationType.RUNTIME; public static final boolean RECOMPILE_CODEGEN = true; public static final boolean PRUNE_REDUNDANT_PLANS = true; public static PlanCachePolicy PLAN_CACHE_POLICY = PlanCachePolicy.CSLH; http://git-wip-us.apache.org/repos/asf/systemml/blob/cad7c1e0/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmPageRank.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmPageRank.java b/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmPageRank.java new file mode 100644 index 0000000..9299a77 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmPageRank.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.codegenalg; + +import java.io.File; +import java.util.HashMap; + +import org.junit.Assert; +import org.junit.Test; +import org.apache.sysml.api.DMLScript; +import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; +import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.lops.LopProperties.ExecType; +import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.integration.TestConfiguration; +import org.apache.sysml.test.utils.TestUtils; + +public class AlgorithmPageRank extends AutomatedTestBase +{ + private final static String TEST_NAME1 = "Algorithm_PageRank"; + private final static String TEST_DIR = "functions/codegenalg/"; + private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmPageRank.class.getSimpleName() + "/"; + private final static String TEST_CONF_DEFAULT = "SystemML-config-codegen.xml"; + private final static File TEST_CONF_FILE_DEFAULT = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF_DEFAULT); + private final static String TEST_CONF_FUSE_ALL = "SystemML-config-codegen-fuse-all.xml"; + private final static File TEST_CONF_FILE_FUSE_ALL = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF_FUSE_ALL); + private final static String TEST_CONF_FUSE_NO_REDUNDANCY = "SystemML-config-codegen-fuse-no-redundancy.xml"; + private final static File TEST_CONF_FILE_FUSE_NO_REDUNDANCY = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF_FUSE_NO_REDUNDANCY); + + private enum TestType { DEFAULT,FUSE_ALL,FUSE_NO_REDUNDANCY } + + //absolute diff for large output scale in the +E12 + private final static double eps = 0.1; + + private final static int rows = 1468; + private final static int cols = 1468; + + private final static double sparsity1 = 0.41; //dense + private final static double sparsity2 = 0.05; //sparse + + private final static double alpha = 0.85; + private final static double maxiter = 10; + + private TestType currentTestType = TestType.DEFAULT; + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "w" })); + } + + @Test + public void testPageRankDenseCP() { + runPageRankTest(TEST_NAME1, true, false, ExecType.CP, TestType.DEFAULT); + } + + @Test + public void testPageRankSparseCP() { + runPageRankTest(TEST_NAME1, true, true, ExecType.CP, TestType.DEFAULT); + } + + @Test + public void testPageRankDenseCPFuseAll() { + runPageRankTest(TEST_NAME1, true, false, ExecType.CP, TestType.FUSE_ALL); + } + + @Test + public void testPageRankSparseCPFuseAll() { + runPageRankTest(TEST_NAME1, true, true, ExecType.CP, TestType.FUSE_ALL); + } + + @Test + public void testPageRankDenseCPFuseNoRedundancy() { + runPageRankTest(TEST_NAME1, true, false, ExecType.CP, TestType.FUSE_NO_REDUNDANCY); + } + + @Test + public void testPageRankSparseCPFuseNoRedundancy() { + runPageRankTest(TEST_NAME1, true, true, ExecType.CP, TestType.FUSE_NO_REDUNDANCY); + } + + private void runPageRankTest( String testname, boolean rewrites, boolean sparse, ExecType instType, TestType testType) + { + boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION; + RUNTIME_PLATFORM platformOld = rtplatform; + switch( instType ){ + case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break; + case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break; + default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break; + } + currentTestType = testType; + boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; + if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK ) + DMLScript.USE_LOCAL_SPARK_CONFIG = true; + + try + { + String TEST_NAME = testname; + TestConfiguration config = getTestConfiguration(TEST_NAME); + loadTestConfiguration(config); + + fullDMLScriptName = "scripts/staging/PageRank.dml"; + programArgs = new String[]{ "-explain", "-stats", "-args", input("G"), + input("p"), input("e"), input("u"), String.valueOf(alpha), + String.valueOf(maxiter), output("p")}; + rCmd = getRCmd(inputDir(), String.valueOf(alpha), + String.valueOf(maxiter), expectedDir()); + + OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites; + //TODO test both with and without operator fusion + OptimizerUtils.ALLOW_OPERATOR_FUSION = false; + + //generate actual datasets + double[][] G = getRandomMatrix(rows, cols, 1, 1, sparse?sparsity2:sparsity1, 234); + writeInputMatrixWithMTD("G", G, true); + writeInputMatrixWithMTD("p", getRandomMatrix(cols, 1, 0, 1e-14, 1, 71), true); + writeInputMatrixWithMTD("e", getRandomMatrix(rows, 1, 0, 1e-14, 1, 72), true); + writeInputMatrixWithMTD("u", getRandomMatrix(1, cols, 0, 1e-14, 1, 73), true); + + runTest(true, false, null, -1); + runRScript(true); + + //compare matrices + HashMap<CellIndex, Double> dml = readDMLMatrixFromHDFS("p"); + HashMap<CellIndex, Double> r = readRMatrixFromFS("p"); + TestUtils.compareMatrices(dml, r, eps, "Stat-DML", "Stat-R"); + Assert.assertTrue(heavyHittersContainsSubString("spoofRA") + || heavyHittersContainsSubString("sp_spoofRA")); + } + finally { + rtplatform = platformOld; + DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; + OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag; + OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true; + OptimizerUtils.ALLOW_OPERATOR_FUSION = true; + } + } + + /** + * Override default configuration with custom test configuration to ensure + * scratch space and local temporary directory locations are also updated. + */ + @Override + protected File getConfigTemplateFile() { + // Instrumentation in this test's output log to show custom configuration file used for template. + String message = "This test case overrides default configuration with "; + if(currentTestType == TestType.FUSE_ALL){ + System.out.println(message + TEST_CONF_FILE_FUSE_ALL.getPath()); + return TEST_CONF_FILE_FUSE_ALL; + } else if(currentTestType == TestType.FUSE_NO_REDUNDANCY){ + System.out.println(message + TEST_CONF_FILE_FUSE_NO_REDUNDANCY.getPath()); + return TEST_CONF_FILE_FUSE_NO_REDUNDANCY; + } else { + System.out.println(message + TEST_CONF_FILE_DEFAULT.getPath()); + return TEST_CONF_FILE_DEFAULT; + } + } +} http://git-wip-us.apache.org/repos/asf/systemml/blob/cad7c1e0/src/test/scripts/functions/codegenalg/Algorithm_PageRank.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegenalg/Algorithm_PageRank.R b/src/test/scripts/functions/codegenalg/Algorithm_PageRank.R new file mode 100644 index 0000000..27cb7bd --- /dev/null +++ b/src/test/scripts/functions/codegenalg/Algorithm_PageRank.R @@ -0,0 +1,38 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +args <- commandArgs(TRUE) +library("Matrix") + +G = readMM(paste(args[1], "G.mtx", sep="")); +p = as.matrix(readMM(paste(args[1], "p.mtx", sep=""))); +e = as.matrix(readMM(paste(args[1], "e.mtx", sep=""))); +u = as.matrix(readMM(paste(args[1], "u.mtx", sep=""))); +alpha = as.double(args[2]); +max_iteration = as.integer(args[3]); +i = 0; + +while( i < max_iteration ) { + p = alpha * (G %*% p) + (1 - alpha) * (e %*% (u %*% p)); + i = i + 1; +} + +writeMM(as(p,"CsparseMatrix"), paste(args[4], "p", sep="")); http://git-wip-us.apache.org/repos/asf/systemml/blob/cad7c1e0/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-all.xml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-all.xml b/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-all.xml index c2cff96..099025e 100644 --- a/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-all.xml +++ b/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-all.xml @@ -20,7 +20,7 @@ <root> <sysml.localtmpdir>/tmp/systemml</sysml.localtmpdir> <sysml.scratch>scratch_space</sysml.scratch> - <sysml.optlevel>7</sysml.optlevel> + <sysml.optlevel>6</sysml.optlevel> <sysml.codegen.enabled>true</sysml.codegen.enabled> <sysml.codegen.plancache>true</sysml.codegen.plancache> <sysml.codegen.literals>1</sysml.codegen.literals> http://git-wip-us.apache.org/repos/asf/systemml/blob/cad7c1e0/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-no-redundancy.xml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-no-redundancy.xml b/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-no-redundancy.xml index 9fd1897..145361e 100644 --- a/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-no-redundancy.xml +++ b/src/test/scripts/functions/codegenalg/SystemML-config-codegen-fuse-no-redundancy.xml @@ -20,7 +20,7 @@ <root> <sysml.localtmpdir>/tmp/systemml</sysml.localtmpdir> <sysml.scratch>scratch_space</sysml.scratch> - <sysml.optlevel>7</sysml.optlevel> + <sysml.optlevel>6</sysml.optlevel> <sysml.codegen.enabled>true</sysml.codegen.enabled> <sysml.codegen.plancache>true</sysml.codegen.plancache> <sysml.codegen.literals>1</sysml.codegen.literals> http://git-wip-us.apache.org/repos/asf/systemml/blob/cad7c1e0/src/test/scripts/functions/codegenalg/SystemML-config-codegen.xml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegenalg/SystemML-config-codegen.xml b/src/test/scripts/functions/codegenalg/SystemML-config-codegen.xml index d072ab3..d7b6265 100644 --- a/src/test/scripts/functions/codegenalg/SystemML-config-codegen.xml +++ b/src/test/scripts/functions/codegenalg/SystemML-config-codegen.xml @@ -20,7 +20,7 @@ <root> <sysml.localtmpdir>/tmp/systemml</sysml.localtmpdir> <sysml.scratch>scratch_space</sysml.scratch> - <sysml.optlevel>7</sysml.optlevel> + <sysml.optlevel>6</sysml.optlevel> <sysml.codegen.enabled>true</sysml.codegen.enabled> <sysml.codegen.plancache>true</sysml.codegen.plancache> <sysml.codegen.literals>1</sysml.codegen.literals> http://git-wip-us.apache.org/repos/asf/systemml/blob/cad7c1e0/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java ---------------------------------------------------------------------- diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java index ca45a3c..7bae898 100644 --- a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java +++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java @@ -36,6 +36,7 @@ import org.junit.runners.Suite; AlgorithmMDABivar.class, AlgorithmMLogreg.class, AlgorithmMSVM.class, + AlgorithmPageRank.class, AlgorithmPNMF.class, AlgorithmStepwiseRegression.class, })
