Repository: systemml
Updated Branches:
  refs/heads/master 1f0745c5c -> d5fd4230e


[SYSTEMML-2070] New codegen algorithm tests (linreg/logreg datagen)

This patch adds the data generation scripts for linear and logistic
regression to the codegen algorithm testsuite. Furthermore, this also
includes some minor cleanups of the datagen dml scripts because they
also serve as examples.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d5fd4230
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d5fd4230
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d5fd4230

Branch: refs/heads/master
Commit: d5fd4230e38ffda0186a8af9a8812e124181f5b9
Parents: 1f0745c
Author: Matthias Boehm <[email protected]>
Authored: Fri Jan 19 20:07:44 2018 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Fri Jan 19 20:07:44 2018 -0800

----------------------------------------------------------------------
 .../datagen/genRandData4LinearRegression.dml    |   4 +-
 .../datagen/genRandData4LogisticRegression.dml  |  20 +-
 .../functions/codegenalg/AlgorithmDatagen.java  | 199 +++++++++++++++++++
 .../functions/codegenalg/ZPackageSuite.java     |   1 +
 4 files changed, 212 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/d5fd4230/scripts/datagen/genRandData4LinearRegression.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4LinearRegression.dml 
b/scripts/datagen/genRandData4LinearRegression.dml
index 2ef707f..ebce4f3 100644
--- a/scripts/datagen/genRandData4LinearRegression.dml
+++ b/scripts/datagen/genRandData4LinearRegression.dml
@@ -47,9 +47,9 @@ X = X * maxFeatureValue
 w = w * maxWeight
 Y = X %*% w
 
-if(b!=0) {
+if( b != 0 ) {
        b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform")
-       w =  t(cbind(t(w), b_mat))
+       w =  rbind(w, t(b_mat))
        Y = Y + b
 }
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/d5fd4230/scripts/datagen/genRandData4LogisticRegression.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4LogisticRegression.dml 
b/scripts/datagen/genRandData4LogisticRegression.dml
index fa2bc68..f085093 100644
--- a/scripts/datagen/genRandData4LogisticRegression.dml
+++ b/scripts/datagen/genRandData4LogisticRegression.dml
@@ -47,25 +47,25 @@ X = X * maxFeatureValue
 w = Rand(rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
 w = w * maxWeight
 
-ot=X%*%w
-if(b!=0) {
+ot = X %*% w
+if( b != 0) {
        b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform")
-       w =  t(cbind(t(w), b_mat))
+       w =  rbind(w, t(b_mat))
        ot = ot + b
 }
 
-prob = 1/(1+exp(-ot))
-if(addNoise == 1){
+prob = 1 / (1 + exp(-ot))
+if( addNoise == 1 ){
        r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-}else{
+} 
+else {
        print("this data generator generates the same dataset for both noise=0 
and noise=1")
        r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-       #r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
 }
+
 Y = 1 - 2 * (prob < r)
-if( $12 == 1 ) {
-  Y = (Y+3)/2;
-}
+if( $12 == 1 )
+  Y = (Y + 3) / 2
 
 write(w, $5, format=$11)
 write(X, $6, format=$11)

http://git-wip-us.apache.org/repos/asf/systemml/blob/d5fd4230/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmDatagen.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmDatagen.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmDatagen.java
new file mode 100644
index 0000000..d006276
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/codegenalg/AlgorithmDatagen.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegenalg;
+
+import java.io.File;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class AlgorithmDatagen extends AutomatedTestBase 
+{
+       private final static String TEST_NAME1 = "Algorithm_Datagen";
+       private final static String TEST_DIR = "functions/codegenalg/";
+       private final static String TEST_CLASS_DIR = TEST_DIR + 
AlgorithmDatagen.class.getSimpleName() + "/";
+       private final static String TEST_CONF = "SystemML-config-codegen.xml";
+       private final static File   TEST_CONF_FILE = new File(SCRIPT_DIR + 
TEST_DIR, TEST_CONF);
+       
+       private final static int rows = 2468;
+       private final static int cols = 200;
+       
+       private final static double sparsity1 = 0.9; //dense
+       private final static double sparsity2 = 0.1; //sparse
+       
+       public enum DatagenType {
+               LINREG,
+               LOGREG,
+       }
+       
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_NAME1, new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "X","Y","w" })); 
+       }
+
+       @Test
+       public void testDatagenLinregDenseRewritesCP() {
+               runStepwiseTest(DatagenType.LINREG, false, true, ExecType.CP);
+       }
+       
+       @Test
+       public void testDatagenLinregSparseRewritesCP() {
+               runStepwiseTest(DatagenType.LINREG, true, true, ExecType.CP);
+       }
+       
+       @Test
+       public void testDatagenLinregDenseNoRewritesCP() {
+               runStepwiseTest(DatagenType.LINREG, false, false, ExecType.CP);
+       }
+       
+       @Test
+       public void testDatagenLinregSparseNoRewritesCP() {
+               runStepwiseTest(DatagenType.LINREG, true, false, ExecType.CP);
+       }
+       
+       @Test
+       public void testDatagenLogregDenseRewritesCP() {
+               runStepwiseTest(DatagenType.LOGREG, false, true, ExecType.CP);
+       }
+       
+       @Test
+       public void testDatagenLogregSparseRewritesCP() {
+               runStepwiseTest(DatagenType.LOGREG, true, true, ExecType.CP);
+       }
+       
+       @Test
+       public void testDatagenLogregDenseNoRewritesCP() {
+               runStepwiseTest(DatagenType.LOGREG, false, false, ExecType.CP);
+       }
+       
+       @Test
+       public void testDatagenLogregSparseNoRewritesCP() {
+               runStepwiseTest(DatagenType.LOGREG, true, false, ExecType.CP);
+       }
+
+       @Test
+       public void testDatagenLinregDenseRewritesSP() {
+               runStepwiseTest(DatagenType.LINREG, false, true, 
ExecType.SPARK);
+       }
+       
+       @Test
+       public void testDatagenLinregSparseRewritesSP() {
+               runStepwiseTest(DatagenType.LINREG, true, true, ExecType.SPARK);
+       }
+       
+       @Test
+       public void testDatagenLinregDenseNoRewritesSP() {
+               runStepwiseTest(DatagenType.LINREG, false, false, 
ExecType.SPARK);
+       }
+       
+       @Test
+       public void testDatagenLinregSparseNoRewritesSP() {
+               runStepwiseTest(DatagenType.LINREG, true, false, 
ExecType.SPARK);
+       }
+       
+       @Test
+       public void testDatagenLogregDenseRewritesSP() {
+               runStepwiseTest(DatagenType.LOGREG, false, true, 
ExecType.SPARK);
+       }
+       
+       @Test
+       public void testDatagenLogregSparseRewritesSP() {
+               runStepwiseTest(DatagenType.LOGREG, true, true, ExecType.SPARK);
+       }
+       
+       @Test
+       public void testDatagenLogregDenseNoRewritesSP() {
+               runStepwiseTest(DatagenType.LOGREG, false, false, 
ExecType.SPARK);
+       }
+       
+       @Test
+       public void testDatagenLogregSparseNoRewritesSP() {
+               runStepwiseTest(DatagenType.LOGREG, true, false, 
ExecType.SPARK);
+       }
+       
+       private void runStepwiseTest( DatagenType type, boolean sparse, boolean 
rewrites, ExecType instType)
+       {
+               boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+               RUNTIME_PLATFORM platformOld = rtplatform;
+               switch( instType ){
+                       case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+                       default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; 
break;
+               }
+               
+               boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+               if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == 
RUNTIME_PLATFORM.HYBRID_SPARK )
+                       DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+               
+               OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+               
+               try
+               {
+                       String TEST_NAME = TEST_NAME1;
+                       TestConfiguration config = 
getTestConfiguration(TEST_NAME);
+                       loadTestConfiguration(config);
+                       
+                       double sparsity = sparse ? sparsity2 : sparsity1;
+                       
+                       if( type ==  DatagenType.LINREG) {
+                               fullDMLScriptName = 
"scripts/datagen/genRandData4LinearRegression.dml";
+                               programArgs = new String[]{ "-explain", 
"-stats", "-args",
+                                       String.valueOf(rows), 
String.valueOf(cols), "10", "1", output("w"),
+                                       output("X"), output("y"), "1", "1", 
String.valueOf(sparsity), "binary"};
+                       }
+                       else { //LOGREG
+                               fullDMLScriptName = 
"scripts/datagen/genRandData4LogisticRegression.dml";
+                               programArgs = new String[]{ "-explain", 
"-stats", "-args",
+                                       String.valueOf(rows), 
String.valueOf(cols), "10", "1", output("w"),
+                                       output("X"), output("y"), "1", "1", 
String.valueOf(sparsity), "binary", "1"};
+                       }
+                       
+                       runTest(true, false, null, -1); 
+
+                       Assert.assertTrue(heavyHittersContainsSubString("spoof")
+                               || heavyHittersContainsSubString("sp_spoof"));
+               }
+               finally {
+                       rtplatform = platformOld;
+                       DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+                       OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
+                       OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+                       OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+               }
+       }
+
+       /**
+        * Override default configuration with custom test configuration to 
ensure
+        * scratch space and local temporary directory locations are also 
updated.
+        */
+       @Override
+       protected File getConfigTemplateFile() {
+               // Instrumentation in this test's output log to show custom 
configuration file used for template.
+               System.out.println("This test case overrides default 
configuration with " + TEST_CONF_FILE.getPath());
+               return TEST_CONF_FILE;
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/d5fd4230/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java
----------------------------------------------------------------------
diff --git 
a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java
 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java
index 3159928..ca45a3c 100644
--- 
a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java
+++ 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegenalg/ZPackageSuite.java
@@ -28,6 +28,7 @@ import org.junit.runners.Suite;
 @Suite.SuiteClasses({
        AlgorithmARIMA.class,
        AlgorithmAutoEncoder.class,
+       AlgorithmDatagen.class,
        AlgorithmGLM.class,
        AlgorithmKMeans.class,
        AlgorithmL2SVM.class,

Reply via email to