[SYSTEMML-556] JMLC support for frame column names, incl jmlc test

As a preparation step for modifying transform to work over frames, we
need to expose frame column names through the jmlc api and carry this
information as part of frames. Furthermore, this patch also includes an
initial jmlc transform test (but is still disabled until the full rework
of transform happened).

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/82b51425
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/82b51425
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/82b51425

Branch: refs/heads/master
Commit: 82b51425e8583b7d624da3cb498859652f1fbf8a
Parents: 5a2d888
Author: Matthias Boehm <[email protected]>
Authored: Fri Mar 11 00:52:34 2016 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Fri Mar 11 00:52:34 2016 -0800

----------------------------------------------------------------------
 .../apache/sysml/api/jmlc/PreparedScript.java   |  10 +
 .../sysml/runtime/matrix/data/FrameBlock.java   |  57 +++++-
 .../sysml/runtime/util/DataConverter.java       |  16 ++
 .../functions/jmlc/FrameTransformTest.java      | 188 +++++++++++++++++++
 src/test/scripts/functions/jmlc/transform.dml   |  31 +++
 5 files changed, 294 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java 
b/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
index 3bc4fc3..2af80fb 100644
--- a/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
+++ b/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
@@ -198,6 +198,11 @@ public class PreparedScript
        }
        
        /** Binds a frame object to a registered input variable. */
+       public void setFrame(String varname, String[][] frame, List<ValueType> 
schema, List<String> colnames) throws DMLException {
+               setFrame(varname, frame, schema, colnames, false);
+       }
+       
+       /** Binds a frame object to a registered input variable. */
        public void setFrame(String varname, String[][] frame, boolean reuse) 
throws DMLException {
                setFrame(varname, DataConverter.convertToFrameBlock(frame), 
reuse);
        }
@@ -207,6 +212,11 @@ public class PreparedScript
                setFrame(varname, DataConverter.convertToFrameBlock(frame, 
schema), reuse);
        }
        
+       /** Binds a frame object to a registered input variable. */
+       public void setFrame(String varname, String[][] frame, List<ValueType> 
schema, List<String> colnames, boolean reuse) throws DMLException {
+               setFrame(varname, DataConverter.convertToFrameBlock(frame, 
schema, colnames), reuse);
+       }
+       
        /**
         * Binds a frame object to a registered input variable. 
         * If reuse requested, then the input is guaranteed to be 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
index d08376e..34078f7 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
@@ -48,11 +48,15 @@ public class FrameBlock implements Writable, Externalizable
        /** The schema of the data frame as an ordered list of value types */
        private List<ValueType> _schema = null; 
        
+       /** The column names of the data frame as an ordered list of strings */
+       private List<String> _colnames = null;
+       
        /** The data frame data as an ordered list of columns */
        private List<Array> _coldata = null;
        
        public FrameBlock() {
                _schema = new ArrayList<ValueType>();
+               _colnames = new ArrayList<String>();
                _coldata = new ArrayList<Array>();
        }
        
@@ -60,9 +64,18 @@ public class FrameBlock implements Writable, Externalizable
                this(schema, new String[0][]);
        }
        
+       public FrameBlock(List<ValueType> schema, List<String> names) {
+               this(schema, names, new String[0][]);
+       }
+       
        public FrameBlock(List<ValueType> schema, String[][] data) {
+               this(schema, createColNames(schema.size()), data);
+       }
+       
+       public FrameBlock(List<ValueType> schema, List<String> names, 
String[][] data) {
                _numRows = data.length;
                _schema = new ArrayList<ValueType>(schema);
+               _colnames = new ArrayList<String>(names);
                _coldata = new ArrayList<Array>();
                for( int i=0; i<data.length; i++ )
                        appendRow(data[i]);
@@ -97,6 +110,15 @@ public class FrameBlock implements Writable, Externalizable
        }
        
        /**
+        * Returns the column names of the frame block.
+        * 
+        * @return
+        */
+       public List<String> getColumnNames() {
+               return _colnames;
+       }
+       
+       /**
         * Allocate column data structures if necessary, i.e., if schema 
specified
         * but not all column data structures created yet.
         */
@@ -127,6 +149,18 @@ public class FrameBlock implements Writable, Externalizable
                        throw new RuntimeException("Mismatch in number of rows: 
"+newlen+" (expected: "+_numRows+")");
        }
        
+       /**
+        * 
+        * @param size
+        * @return
+        */
+       private static List<String> createColNames(int size) {
+               ArrayList<String> ret = new ArrayList<String>(size);
+               for( int i=1; i<=size; i++ )
+                       ret.add("C"+i);
+               return ret;
+       }
+       
        ///////
        // basic get and set functionality
        
@@ -266,6 +300,7 @@ public class FrameBlock implements Writable, Externalizable
                //write columns (value type, data)
                for( int j=0; j<getNumColumns(); j++ ) {
                        out.writeByte(_schema.get(j).ordinal());
+                       out.writeUTF(_colnames.get(j));
                        _coldata.get(j).write(out);
                }
        }
@@ -280,6 +315,7 @@ public class FrameBlock implements Writable, Externalizable
                _coldata.clear();
                for( int j=0; j<numCols; j++ ) {
                        ValueType vt = ValueType.values()[in.readByte()];
+                       String name = in.readUTF();
                        Array arr = null;
                        switch( vt ) {
                                case STRING:  arr = new StringArray(new 
String[_numRows]); break;
@@ -290,6 +326,7 @@ public class FrameBlock implements Writable, Externalizable
                        }
                        arr.readFields(in);
                        _schema.add(vt);
+                       _colnames.add(name);
                        _coldata.add(arr);
                }
        }
@@ -337,10 +374,10 @@ public class FrameBlock implements Writable, 
Externalizable
                
                //allocate output frame (incl deep copy schema)
                if( ret == null )
-                       ret = new FrameBlock(_schema);
-               else
-                       ret._schema = new ArrayList<ValueType>(_schema);
+                       ret = new FrameBlock();
                ret._numRows = _numRows;
+               ret._schema = new ArrayList<ValueType>(_schema);
+               ret._colnames = new ArrayList<String>(_colnames);
                
                //copy data to output and partial overwrite w/ rhs
                for( int j=0; j<getNumColumns(); j++ ) {
@@ -379,9 +416,11 @@ public class FrameBlock implements Writable, Externalizable
                        ret = new FrameBlock();
                ret._numRows = ru-rl+1;
                
-               //copy output schema
-               for( int j=cl; j<=cu; j++ )
+               //copy output schema and colnames
+               for( int j=cl; j<=cu; j++ ) {
                        ret._schema.add(_schema.get(j));
+                       ret._colnames.add(_colnames.get(j));
+               }
                
                //copy output data
                for( int j=cl; j<=cu; j++ )
@@ -420,6 +459,8 @@ public class FrameBlock implements Writable, Externalizable
                        //concatenate schemas (w/ deep copy to prevent side 
effects)
                        ret._schema = new ArrayList<ValueType>(_schema);
                        ret._schema.addAll(that._schema);
+                       ret._colnames = new ArrayList<String>(_colnames);
+                       ret._colnames.addAll(that._colnames);
                        
                        //concatenate column data (w/ deep copy to prevent side 
effects)
                        for( Array tmp : _coldata )
@@ -437,10 +478,10 @@ public class FrameBlock implements Writable, 
Externalizable
                        
                        //allocate output frame (incl deep copy schema)
                        if( ret == null )
-                               ret = new FrameBlock(_schema);
-                       else
-                               ret._schema = new ArrayList<ValueType>(_schema);
+                               ret = new FrameBlock();
                        ret._numRows = _numRows;
+                       ret._schema = new ArrayList<ValueType>(_schema);
+                       ret._colnames = new ArrayList<String>(_colnames);
                        
                        //concatenate data (deep copy first, append second)
                        for( Array tmp : _coldata )

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java 
b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
index 131bee6..2b642db 100644
--- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
@@ -700,6 +700,22 @@ public class DataConverter
        }
        
        /**
+        * 
+        * @param data
+        * @param schema
+        * @param colnames
+        * @return
+        */
+       public static FrameBlock convertToFrameBlock(String[][] data, 
List<ValueType> schema, List<String> colnames) {
+               //check for empty frame block 
+               if( data == null || data.length==0 )
+                       return new FrameBlock();
+               
+               //create frame block
+               return new FrameBlock(schema, colnames, data);
+       }
+       
+       /**
         * Converts a matrix block into a frame block of value type double.
         * 
         * @param mb

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameTransformTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameTransformTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameTransformTest.java
new file mode 100644
index 0000000..69950a3
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameTransformTest.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.jmlc;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.apache.sysml.api.DMLException;
+import org.apache.sysml.api.jmlc.Connection;
+import org.apache.sysml.api.jmlc.PreparedScript;
+import org.apache.sysml.api.jmlc.ResultVariables;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+/**
+ * 
+ * 
+ */
+public class FrameTransformTest extends AutomatedTestBase 
+{
+       private final static String TEST_NAME1 = "transform";
+       private final static String TEST_DIR = "functions/jmlc/";
+       private final static String TEST_CLASS_DIR = TEST_DIR + 
FrameTransformTest.class.getSimpleName() + "/";
+       
+       private final static int rows = 700;
+       private final static int cols = 3;
+       
+       private final static int nRuns = 10;
+       
+       private final static double sparsity1 = 0.7;
+       private final static double sparsity2 = 0.1;
+       
+       
+       @Override
+       public void setUp() {
+               addTestConfiguration(TEST_NAME1, new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "Y" }) ); 
+       }
+       
+       /*
+       @Test
+       public void testJMLCTransformDense() throws IOException {
+               runJMLCReuseTest(TEST_NAME1, false, false);
+       }
+       
+       @Test
+       public void testJMLCTransformSparse() throws IOException {
+               runJMLCReuseTest(TEST_NAME1, true, false);
+       }
+       
+       @Test
+       public void testJMLCTransformDenseReuse() throws IOException {
+               runJMLCReuseTest(TEST_NAME1, false, true);
+       }
+       
+       @Test
+       public void testJMLCTransformSparseReuse() throws IOException {
+               runJMLCReuseTest(TEST_NAME1, true, true);
+       }
+       */
+
+       /**
+        * 
+        * @param sparseM1
+        * @param sparseM2
+        * @param instType
+        * @throws IOException 
+        */
+       @SuppressWarnings("unused")
+       private void runJMLCReuseTest( String testname, boolean sparse, boolean 
modelReuse ) 
+               throws IOException
+       {       
+               String TEST_NAME = testname;
+               
+               TestConfiguration config = getTestConfiguration(TEST_NAME);
+               loadTestConfiguration(config);
+       
+               //generate inputs
+               double[][] Xd = TestUtils.round(getRandomMatrix(rows, cols, 
0.51, 7.49, sparse?sparsity2:sparsity1, 1234));
+               String[][] Xs = createFrameData(Xd);
+               
+               //run DML via JMLC
+               ArrayList<double[][]> Yset = execDMLScriptviaJMLC( TEST_NAME, 
Xs, modelReuse );
+               
+               //check non-empty y
+               for( double[][] data : Yset )
+                       Assert.assertEquals("Wrong result: "+data[0][0]+".", 
new Double(7), new Double(data[0][0]));
+       }
+
+       /**
+        * 
+        * @param X
+        * @return
+        * @throws DMLException
+        * @throws IOException
+        */
+       private ArrayList<double[][]> execDMLScriptviaJMLC( String testname, 
String[][] X, boolean modelReuse) 
+               throws IOException
+       {
+               Timing time = new Timing(true);
+               
+               ArrayList<double[][]> ret = new ArrayList<double[][]>();
+               
+               //establish connection to SystemML
+               Connection conn = new Connection();
+                               
+               try
+               {
+                       //prepare input arguments
+                       HashMap<String,String> args = new 
HashMap<String,String>();
+                       args.put("$TRANSFORM_PATH", SCRIPT_DIR + TEST_DIR + 
"/tfmtd");
+                       args.put("$TRANSFORM_SPEC", "{ \"ids\": true 
,\"recode\": [ 1, 2, 3] }");
+                       
+                       //read and precompile script
+                       String script = conn.readScript(SCRIPT_DIR + TEST_DIR + 
testname + ".dml");     
+                       PreparedScript pstmt = conn.prepareScript(script, args, 
new String[]{"X"}, new String[]{"Y"}, false);
+                       
+                       if( modelReuse )
+                               pstmt.setFrame("X", X);
+                       
+                       //execute script multiple times
+                       for( int i=0; i<nRuns; i++ )
+                       {
+                               //bind input parameters
+                               if( !modelReuse )
+                                       pstmt.setFrame("X", X);
+                               
+                               //execute script
+                               ResultVariables rs = pstmt.executeScript();
+                               
+                               //get output parameter
+                               double[][] Y = rs.getMatrix("Y");
+                               ret.add(Y); //keep result for comparison
+                       }
+               }
+               catch(Exception ex)
+               {
+                       ex.printStackTrace();
+                       throw new IOException(ex);
+               }
+               finally
+               {
+                       if( conn != null )
+                               conn.close();
+               }
+               
+               System.out.println("JMLC scoring w/ "+nRuns+" runs in 
"+time.stop()+"ms.");
+               
+               return ret;
+       }
+       
+       /**
+        * 
+        * @param data
+        * @return
+        */
+       private String[][] createFrameData(double[][] data) {
+               String[][] ret = new String[data.length][];
+               for( int i=0; i<data.length; i++ ) {
+                       String[] row = new String[data[i].length]; 
+                       for( int j=0; j<data[i].length; j++ )
+                               row[j] = "V"+String.valueOf(data[i][j]);
+                       ret[i] = row;
+               }
+               
+               return ret;
+       }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/test/scripts/functions/jmlc/transform.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/jmlc/transform.dml 
b/src/test/scripts/functions/jmlc/transform.dml
new file mode 100644
index 0000000..1fce0bb
--- /dev/null
+++ b/src/test/scripts/functions/jmlc/transform.dml
@@ -0,0 +1,31 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($X, data_type="frame", format="csv");
+specJson = $TRANSFORM_SPEC
+
+Xt = transform(target=X, transformPath=$TRANSFORM_PATH, spec=specJson);
+
+V = matrix(Xt, rows=nrow(Xt)*ncol(Xt), cols=1);
+Y = as.matrix(sum(table(V, 1) != 0))
+
+write(Y, $Y);
+

Reply via email to