Repository: systemml Updated Branches: refs/heads/master 1f0745c5c -> d5fd4230e
[SYSTEMML-2070] New codegen algorithm tests (linreg/logreg datagen) This patch adds the data generation scripts for linear and logistic regression to the codegen algorithm testsuite. Furthermore, this also includes some minor cleanups of the datagen dml scripts because they also serve as examples. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d5fd4230 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d5fd4230 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d5fd4230 Branch: refs/heads/master Commit: d5fd4230e38ffda0186a8af9a8812e124181f5b9 Parents: 1f0745c Author: Matthias Boehm <[email protected]> Authored: Fri Jan 19 20:07:44 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Fri Jan 19 20:07:44 2018 -0800 ---------------------------------------------------------------------- .../datagen/genRandData4LinearRegression.dml | 4 +- .../datagen/genRandData4LogisticRegression.dml | 20 +- .../functions/codegenalg/AlgorithmDatagen.java | 199 +++++++++++++++++++ .../functions/codegenalg/ZPackageSuite.java | 1 + 4 files changed, 212 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/d5fd4230/scripts/datagen/genRandData4LinearRegression.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4LinearRegression.dml b/scripts/datagen/genRandData4LinearRegression.dml index 2ef707f..ebce4f3 100644 --- a/scripts/datagen/genRandData4LinearRegression.dml +++ b/scripts/datagen/genRandData4LinearRegression.dml @@ -47,9 +47,9 @@ X = X * maxFeatureValue w = w * maxWeight Y = X %*% w -if(b!=0) { +if( b != 0 ) { b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform") - w = t(cbind(t(w), b_mat)) + w = rbind(w, t(b_mat)) Y = Y + b } http://git-wip-us.apache.org/repos/asf/systemml/blob/d5fd4230/scripts/datagen/genRandData4LogisticRegression.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genRandData4LogisticRegression.dml b/scripts/datagen/genRandData4LogisticRegression.dml index fa2bc68..f085093 100644 --- a/scripts/datagen/genRandData4LogisticRegression.dml +++ b/scripts/datagen/genRandData4LogisticRegression.dml @@ -47,25 +47,25 @@ X = X * maxFeatureValue w = Rand(rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0) w = w * maxWeight -ot=X%*%w -if(b!=0) { +ot = X %*% w +if( b != 0) { b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform") - w = t(cbind(t(w), b_mat)) + w = rbind(w, t(b_mat)) ot = ot + b } -prob = 1/(1+exp(-ot)) -if(addNoise == 1){ +prob = 1 / (1 + exp(-ot)) +if( addNoise == 1 ){ r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0) -}else{ +} +else { print("this data generator generates the same dataset for both noise=0 and noise=1") r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0) - #r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform") } + Y = 1 - 2 * (prob < r) -if( $12 == 1 ) { - Y = (Y+3)/2; -} +if( $12 == 1 ) + Y = (Y + 3) / 2 write(w, $5, format=$11) write(X, $6, format=$11) http://git-wip-us.apache.org/repos/asf/systemml/blob/d5fd4230/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmDatagen.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmDatagen.java b/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmDatagen.java new file mode 100644 index 0000000..d006276 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmDatagen.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.codegenalg; + +import java.io.File; + +import org.junit.Assert; +import org.junit.Test; +import org.apache.sysml.api.DMLScript; +import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; +import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.lops.LopProperties.ExecType; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.integration.TestConfiguration; +import org.apache.sysml.test.utils.TestUtils; + +public class AlgorithmDatagen extends AutomatedTestBase +{ + private final static String TEST_NAME1 = "Algorithm_Datagen"; + private final static String TEST_DIR = "functions/codegenalg/"; + private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmDatagen.class.getSimpleName() + "/"; + private final static String TEST_CONF = "SystemML-config-codegen.xml"; + private final static File TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF); + + private final static int rows = 2468; + private final static int cols = 200; + + private final static double sparsity1 = 0.9; //dense + private final static double sparsity2 = 0.1; //sparse + + public enum DatagenType { + LINREG, + LOGREG, + } + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "X","Y","w" })); + } + + @Test + public void testDatagenLinregDenseRewritesCP() { + runStepwiseTest(DatagenType.LINREG, false, true, ExecType.CP); + } + + @Test + public void testDatagenLinregSparseRewritesCP() { + runStepwiseTest(DatagenType.LINREG, true, true, ExecType.CP); + } + + @Test + public void testDatagenLinregDenseNoRewritesCP() { + runStepwiseTest(DatagenType.LINREG, false, false, ExecType.CP); + } + + @Test + public void testDatagenLinregSparseNoRewritesCP() { + runStepwiseTest(DatagenType.LINREG, true, false, ExecType.CP); + } + + @Test + public void testDatagenLogregDenseRewritesCP() { + runStepwiseTest(DatagenType.LOGREG, false, true, ExecType.CP); + } + + @Test + public void testDatagenLogregSparseRewritesCP() { + runStepwiseTest(DatagenType.LOGREG, true, true, ExecType.CP); + } + + @Test + public void testDatagenLogregDenseNoRewritesCP() { + runStepwiseTest(DatagenType.LOGREG, false, false, ExecType.CP); + } + + @Test + public void testDatagenLogregSparseNoRewritesCP() { + runStepwiseTest(DatagenType.LOGREG, true, false, ExecType.CP); + } + + @Test + public void testDatagenLinregDenseRewritesSP() { + runStepwiseTest(DatagenType.LINREG, false, true, ExecType.SPARK); + } + + @Test + public void testDatagenLinregSparseRewritesSP() { + runStepwiseTest(DatagenType.LINREG, true, true, ExecType.SPARK); + } + + @Test + public void testDatagenLinregDenseNoRewritesSP() { + runStepwiseTest(DatagenType.LINREG, false, false, ExecType.SPARK); + } + + @Test + public void testDatagenLinregSparseNoRewritesSP() { + runStepwiseTest(DatagenType.LINREG, true, false, ExecType.SPARK); + } + + @Test + public void testDatagenLogregDenseRewritesSP() { + runStepwiseTest(DatagenType.LOGREG, false, true, ExecType.SPARK); + } + + @Test + public void testDatagenLogregSparseRewritesSP() { + runStepwiseTest(DatagenType.LOGREG, true, true, ExecType.SPARK); + } + + @Test + public void testDatagenLogregDenseNoRewritesSP() { + runStepwiseTest(DatagenType.LOGREG, false, false, ExecType.SPARK); + } + + @Test + public void testDatagenLogregSparseNoRewritesSP() { + runStepwiseTest(DatagenType.LOGREG, true, false, ExecType.SPARK); + } + + private void runStepwiseTest( DatagenType type, boolean sparse, boolean rewrites, ExecType instType) + { + boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION; + RUNTIME_PLATFORM platformOld = rtplatform; + switch( instType ){ + case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break; + default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break; + } + + boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; + if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK ) + DMLScript.USE_LOCAL_SPARK_CONFIG = true; + + OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites; + + try + { + String TEST_NAME = TEST_NAME1; + TestConfiguration config = getTestConfiguration(TEST_NAME); + loadTestConfiguration(config); + + double sparsity = sparse ? sparsity2 : sparsity1; + + if( type == DatagenType.LINREG) { + fullDMLScriptName = "scripts/datagen/genRandData4LinearRegression.dml"; + programArgs = new String[]{ "-explain", "-stats", "-args", + String.valueOf(rows), String.valueOf(cols), "10", "1", output("w"), + output("X"), output("y"), "1", "1", String.valueOf(sparsity), "binary"}; + } + else { //LOGREG + fullDMLScriptName = "scripts/datagen/genRandData4LogisticRegression.dml"; + programArgs = new String[]{ "-explain", "-stats", "-args", + String.valueOf(rows), String.valueOf(cols), "10", "1", output("w"), + output("X"), output("y"), "1", "1", String.valueOf(sparsity), "binary", "1"}; + } + + runTest(true, false, null, -1); + + Assert.assertTrue(heavyHittersContainsSubString("spoof") + || heavyHittersContainsSubString("sp_spoof")); + } + finally { + rtplatform = platformOld; + DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; + OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag; + OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true; + OptimizerUtils.ALLOW_OPERATOR_FUSION = true; + } + } + + /** + * Override default configuration with custom test configuration to ensure + * scratch space and local temporary directory locations are also updated. + */ + @Override + protected File getConfigTemplateFile() { + // Instrumentation in this test's output log to show custom configuration file used for template. + System.out.println("This test case overrides default configuration with " + TEST_CONF_FILE.getPath()); + return TEST_CONF_FILE; + } +} http://git-wip-us.apache.org/repos/asf/systemml/blob/d5fd4230/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java ---------------------------------------------------------------------- diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java index 3159928..ca45a3c 100644 --- a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java +++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java @@ -28,6 +28,7 @@ import org.junit.runners.Suite; @Suite.SuiteClasses({ AlgorithmARIMA.class, AlgorithmAutoEncoder.class, + AlgorithmDatagen.class, AlgorithmGLM.class, AlgorithmKMeans.class, AlgorithmL2SVM.class,
