Repository: incubator-systemml
Updated Branches:
  refs/heads/master 638d213a0 -> e77205d9c


[SYSTEMML-1022][SYSTEMML-1035] Update spark version and unit tests

This patch updates Spark build version to 1.6.0 and fixes
MLContextFrameTest with change to FrameRDDConverterUtils. Also added cast to
double for ID column in MLContextTest, DataFrameVectorFrameConversionTest, and 
DataFrameVectorScriptTest.

Closes #269.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e77205d9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e77205d9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e77205d9

Branch: refs/heads/master
Commit: e77205d9c3eee943f32a34dadd8014696a40df63
Parents: 638d213
Author: Glenn Weidner <gweid...@us.ibm.com>
Authored: Tue Nov 15 12:35:36 2016 -0800
Committer: Glenn Weidner <gweid...@us.ibm.com>
Committed: Tue Nov 15 12:35:36 2016 -0800

----------------------------------------------------------------------
 pom.xml                                         |  2 +-
 .../spark/utils/FrameRDDConverterUtils.java     |  9 +++++
 .../DataFrameVectorFrameConversionTest.java     |  2 +-
 .../mlcontext/DataFrameVectorScriptTest.java    |  2 +-
 .../mlcontext/MLContextFrameTest.java           | 37 +++++++++++---------
 .../integration/mlcontext/MLContextTest.java    | 20 +++++------
 6 files changed, 42 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e77205d9/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 733b866..7844944 100644
--- a/pom.xml
+++ b/pom.xml
@@ -65,7 +65,7 @@
        <properties>
                <hadoop.version>2.4.1</hadoop.version>
                <antlr.version>4.5.3</antlr.version>
-               <spark.version>1.4.1</spark.version>
+               <spark.version>1.6.0</spark.version>
                <scala.version>2.10.5</scala.version>
                <scala.binary.version>2.10</scala.binary.version>
                <scala.test.version>2.2.6</scala.test.version>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e77205d9/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
index f89117b..b7d1c3d 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
@@ -508,6 +508,15 @@ public class FrameRDDConverterUtils
                JavaRDD<String> dataRdd = sc.textFile(fnameIn);
                return dataRdd.map(new RowGenerator(schema, delim));
        }
+       
+       /* 
+        * It will return JavaRDD<Row> based on csv data.
+        */
+       public static JavaRDD<Row> csvToRowRDD(JavaSparkContext sc, 
JavaRDD<String> dataRdd, String delim, ValueType[] schema)
+       {
+               // Convert each line to a java rdd.
+               return dataRdd.map(new RowGenerator(schema, delim));
+       }
 
        /* 
         * Row Generator class based on individual line in CSV file.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e77205d9/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java
index 20287c7..8e5933c 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorFrameConversionTest.java
@@ -315,7 +315,7 @@ public class DataFrameVectorFrameConversionTest extends 
AutomatedTestBase
                for( int i=0; i<mb.getNumRows(); i++ ) {
                        Object[] row = new Object[clen];
                        if( containsID )
-                               row[0] = i+1;
+                               row[0] = (double)i+1;
                        for( int j=0, j2=0; j<mb.getNumColumns(); j++, j2++ ) {
                                if( schema[j2] != ValueType.OBJECT ) {
                                        row[j2+off] = UtilFunctions

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e77205d9/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java
index 6720744..2a53fb4 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/mlcontext/DataFrameVectorScriptTest.java
@@ -336,7 +336,7 @@ public class DataFrameVectorScriptTest extends 
AutomatedTestBase
                for( int i=0; i<mb.getNumRows(); i++ ) {
                        Object[] row = new Object[clen];
                        if( containsID )
-                               row[0] = i+1;
+                               row[0] = (double)i+1;
                        for( int j=0, j2=0; j<mb.getNumColumns(); j++, j2++ ) {
                                if( schema[j2] != ValueType.OBJECT ) {
                                        row[j2+off] = UtilFunctions

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e77205d9/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java
 
b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java
index b536034..4dc2028 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java
@@ -52,7 +52,8 @@ import 
org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import 
org.apache.sysml.runtime.instructions.spark.utils.FrameRDDConverterUtils;
 import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtils;
 import org.apache.sysml.test.integration.AutomatedTestBase;
-import 
org.apache.sysml.test.integration.mlcontext.MLContextTest.CommaSeparatedValueStringToRow;
+import 
org.apache.sysml.test.integration.mlcontext.MLContextTest.CommaSeparatedValueStringToDoubleArrayRow;
+
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Assert;
@@ -76,6 +77,7 @@ public class MLContextFrameTest extends AutomatedTestBase {
        private static SparkConf conf;
        private static JavaSparkContext sc;
        private static MLContext ml;
+       private static String CSV_DELIM = ",";
 
        @BeforeClass
        public static void setUpClass() {
@@ -233,8 +235,8 @@ public class MLContextFrameTest extends AutomatedTestBase {
                        JavaRDD<String> javaRDDB = sc.parallelize(listB);
 
                        if (inputType == IO_TYPE.DATAFRAME) {
-                               JavaRDD<Row> javaRddRowA = javaRDDA.map(new 
MLContextTest.CommaSeparatedValueStringToRow());
-                               JavaRDD<Row> javaRddRowB = javaRDDB.map(new 
MLContextTest.CommaSeparatedValueStringToRow());
+                               JavaRDD<Row> javaRddRowA = 
FrameRDDConverterUtils.csvToRowRDD(sc, javaRDDA, CSV_DELIM, schemaA);
+                               JavaRDD<Row> javaRddRowB = 
FrameRDDConverterUtils.csvToRowRDD(sc, javaRDDB, CSV_DELIM, schemaB);
 
                                // Create DataFrame
                                SQLContext sqlContext = new SQLContext(sc);
@@ -401,16 +403,16 @@ public class MLContextFrameTest extends AutomatedTestBase 
{
                        List<Row> listAOut = dataFrameA.collectAsList();
 
                        Row row1 = listAOut.get(0);
-                       Assert.assertEquals("Mistmatch with expected value", 
Long.valueOf(1), row1.get(0));
-                       Assert.assertEquals("Mistmatch with expected value", 
"Str2", row1.get(1));
-                       Assert.assertEquals("Mistmatch with expected value", 
3.0, row1.get(2));
-                       Assert.assertEquals("Mistmatch with expected value", 
true, row1.get(3));
+                       Assert.assertEquals("Mismatch with expected value", 
Long.valueOf(1), row1.get(0));
+                       Assert.assertEquals("Mismatch with expected value", 
"Str2", row1.get(1));
+                       Assert.assertEquals("Mismatch with expected value", 
3.0, row1.get(2));
+                       Assert.assertEquals("Mismatch with expected value", 
true, row1.get(3));
                        
                        Row row2 = listAOut.get(1);
-                       Assert.assertEquals("Mistmatch with expected value", 
Long.valueOf(4), row2.get(0));
-                       Assert.assertEquals("Mistmatch with expected value", 
"Str12", row2.get(1));
-                       Assert.assertEquals("Mistmatch with expected value", 
13.0, row2.get(2));
-                       Assert.assertEquals("Mistmatch with expected value", 
true, row2.get(3));
+                       Assert.assertEquals("Mismatch with expected value", 
Long.valueOf(4), row2.get(0));
+                       Assert.assertEquals("Mismatch with expected value", 
"Str12", row2.get(1));
+                       Assert.assertEquals("Mismatch with expected value", 
13.0, row2.get(2));
+                       Assert.assertEquals("Mismatch with expected value", 
true, row2.get(3));
 
                        DataFrame dataFrameC = 
mlResults.getDataFrame("C").drop(RDDConverterUtils.DF_ID_COLUMN);
                        StructType dfschemaC = dataFrameC.schema(); 
@@ -422,12 +424,12 @@ public class MLContextFrameTest extends AutomatedTestBase 
{
                        List<Row> listCOut = dataFrameC.collectAsList();
 
                        Row row3 = listCOut.get(0);
-                       Assert.assertEquals("Mistmatch with expected value", 
"Str12", row3.get(0));
-                       Assert.assertEquals("Mistmatch with expected value", 
13.0, row3.get(1));
+                       Assert.assertEquals("Mismatch with expected value", 
"Str12", row3.get(0));
+                       Assert.assertEquals("Mismatch with expected value", 
13.0, row3.get(1));
 
                        Row row4 = listCOut.get(1);
-                       Assert.assertEquals("Mistmatch with expected value", 
"Str25", row4.get(0));
-                       Assert.assertEquals("Mistmatch with expected value", 
26.0, row4.get(1));
+                       Assert.assertEquals("Mismatch with expected value", 
"Str25", row4.get(0));
+                       Assert.assertEquals("Mismatch with expected value", 
26.0, row4.get(1));
                } else {
                        String[][] frameA = 
mlResults.getFrameAs2DStringArray("A");
                        Assert.assertEquals("Str2", frameA[0][1]);
@@ -481,14 +483,15 @@ public class MLContextFrameTest extends AutomatedTestBase 
{
                dataA.add("Test2,5.0");
                dataA.add("Test3,6.0");
                JavaRDD<String> javaRddStringA = sc.parallelize(dataA);
+               ValueType[] schema = { ValueType.STRING, ValueType.DOUBLE };
 
                List<String> dataB = new ArrayList<String>();
                dataB.add("1.0");
                dataB.add("2.0");
                JavaRDD<String> javaRddStringB = sc.parallelize(dataB);
 
-               JavaRDD<Row> javaRddRowA = javaRddStringA.map(new 
CommaSeparatedValueStringToRow());
-               JavaRDD<Row> javaRddRowB = javaRddStringB.map(new 
CommaSeparatedValueStringToRow());
+               JavaRDD<Row> javaRddRowA = 
FrameRDDConverterUtils.csvToRowRDD(sc, javaRddStringA, CSV_DELIM, schema);
+               JavaRDD<Row> javaRddRowB = javaRddStringB.map(new 
CommaSeparatedValueStringToDoubleArrayRow());
 
                SQLContext sqlContext = new SQLContext(sc);
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e77205d9/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java 
b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
index acf888a..dd343d4 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
@@ -567,7 +567,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddString.map(new 
CommaSeparatedValueStringToDoubleArrayRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C2", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C3", 
DataTypes.DoubleType, true));
@@ -594,7 +594,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddString.map(new 
CommaSeparatedValueStringToDoubleArrayRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C2", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C3", 
DataTypes.DoubleType, true));
@@ -621,7 +621,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddString.map(new 
CommaSeparatedValueStringToDoubleArrayRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C2", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C3", 
DataTypes.DoubleType, true));
@@ -648,7 +648,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddString.map(new 
CommaSeparatedValueStringToDoubleArrayRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C2", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C3", 
DataTypes.DoubleType, true));
@@ -675,7 +675,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddTuple.map(new 
DoubleVectorRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", new VectorUDT(), 
true));
                StructType schema = DataTypes.createStructType(fields);
                DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, 
schema);
@@ -700,7 +700,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddTuple.map(new 
DoubleVectorRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", new VectorUDT(), 
true));
                StructType schema = DataTypes.createStructType(fields);
                DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, 
schema);
@@ -2119,7 +2119,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddString.map(new 
CommaSeparatedValueStringToDoubleArrayRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C2", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C3", 
DataTypes.DoubleType, true));
@@ -2144,7 +2144,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddString.map(new 
CommaSeparatedValueStringToDoubleArrayRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C2", 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C3", 
DataTypes.DoubleType, true));
@@ -2169,7 +2169,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddTuple.map(new 
DoubleVectorRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", new VectorUDT(), 
true));
                StructType schema = DataTypes.createStructType(fields);
                DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, 
schema);
@@ -2192,7 +2192,7 @@ public class MLContextTest extends AutomatedTestBase {
                JavaRDD<Row> javaRddRow = javaRddTuple.map(new 
DoubleVectorRow());
                SQLContext sqlContext = new SQLContext(sc);
                List<StructField> fields = new ArrayList<StructField>();
-               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.IntegerType, true));
+               
fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, 
DataTypes.DoubleType, true));
                fields.add(DataTypes.createStructField("C1", new VectorUDT(), 
true));
                StructType schema = DataTypes.createStructType(fields);
                DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, 
schema);

Reply via email to