[SYSTEMML-556] JMLC support for frame column names, incl jmlc test As a preparation step for modifying transform to work over frames, we need to expose frame column names through the jmlc api and carry this information as part of frames. Furthermore, this patch also includes an initial jmlc transform test (but is still disabled until the full rework of transform happened).
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/82b51425 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/82b51425 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/82b51425 Branch: refs/heads/master Commit: 82b51425e8583b7d624da3cb498859652f1fbf8a Parents: 5a2d888 Author: Matthias Boehm <[email protected]> Authored: Fri Mar 11 00:52:34 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Fri Mar 11 00:52:34 2016 -0800 ---------------------------------------------------------------------- .../apache/sysml/api/jmlc/PreparedScript.java | 10 + .../sysml/runtime/matrix/data/FrameBlock.java | 57 +++++- .../sysml/runtime/util/DataConverter.java | 16 ++ .../functions/jmlc/FrameTransformTest.java | 188 +++++++++++++++++++ src/test/scripts/functions/jmlc/transform.dml | 31 +++ 5 files changed, 294 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java b/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java index 3bc4fc3..2af80fb 100644 --- a/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java +++ b/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java @@ -198,6 +198,11 @@ public class PreparedScript } /** Binds a frame object to a registered input variable. */ + public void setFrame(String varname, String[][] frame, List<ValueType> schema, List<String> colnames) throws DMLException { + setFrame(varname, frame, schema, colnames, false); + } + + /** Binds a frame object to a registered input variable. */ public void setFrame(String varname, String[][] frame, boolean reuse) throws DMLException { setFrame(varname, DataConverter.convertToFrameBlock(frame), reuse); } @@ -207,6 +212,11 @@ public class PreparedScript setFrame(varname, DataConverter.convertToFrameBlock(frame, schema), reuse); } + /** Binds a frame object to a registered input variable. */ + public void setFrame(String varname, String[][] frame, List<ValueType> schema, List<String> colnames, boolean reuse) throws DMLException { + setFrame(varname, DataConverter.convertToFrameBlock(frame, schema, colnames), reuse); + } + /** * Binds a frame object to a registered input variable. * If reuse requested, then the input is guaranteed to be http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java index d08376e..34078f7 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java @@ -48,11 +48,15 @@ public class FrameBlock implements Writable, Externalizable /** The schema of the data frame as an ordered list of value types */ private List<ValueType> _schema = null; + /** The column names of the data frame as an ordered list of strings */ + private List<String> _colnames = null; + /** The data frame data as an ordered list of columns */ private List<Array> _coldata = null; public FrameBlock() { _schema = new ArrayList<ValueType>(); + _colnames = new ArrayList<String>(); _coldata = new ArrayList<Array>(); } @@ -60,9 +64,18 @@ public class FrameBlock implements Writable, Externalizable this(schema, new String[0][]); } + public FrameBlock(List<ValueType> schema, List<String> names) { + this(schema, names, new String[0][]); + } + public FrameBlock(List<ValueType> schema, String[][] data) { + this(schema, createColNames(schema.size()), data); + } + + public FrameBlock(List<ValueType> schema, List<String> names, String[][] data) { _numRows = data.length; _schema = new ArrayList<ValueType>(schema); + _colnames = new ArrayList<String>(names); _coldata = new ArrayList<Array>(); for( int i=0; i<data.length; i++ ) appendRow(data[i]); @@ -97,6 +110,15 @@ public class FrameBlock implements Writable, Externalizable } /** + * Returns the column names of the frame block. + * + * @return + */ + public List<String> getColumnNames() { + return _colnames; + } + + /** * Allocate column data structures if necessary, i.e., if schema specified * but not all column data structures created yet. */ @@ -127,6 +149,18 @@ public class FrameBlock implements Writable, Externalizable throw new RuntimeException("Mismatch in number of rows: "+newlen+" (expected: "+_numRows+")"); } + /** + * + * @param size + * @return + */ + private static List<String> createColNames(int size) { + ArrayList<String> ret = new ArrayList<String>(size); + for( int i=1; i<=size; i++ ) + ret.add("C"+i); + return ret; + } + /////// // basic get and set functionality @@ -266,6 +300,7 @@ public class FrameBlock implements Writable, Externalizable //write columns (value type, data) for( int j=0; j<getNumColumns(); j++ ) { out.writeByte(_schema.get(j).ordinal()); + out.writeUTF(_colnames.get(j)); _coldata.get(j).write(out); } } @@ -280,6 +315,7 @@ public class FrameBlock implements Writable, Externalizable _coldata.clear(); for( int j=0; j<numCols; j++ ) { ValueType vt = ValueType.values()[in.readByte()]; + String name = in.readUTF(); Array arr = null; switch( vt ) { case STRING: arr = new StringArray(new String[_numRows]); break; @@ -290,6 +326,7 @@ public class FrameBlock implements Writable, Externalizable } arr.readFields(in); _schema.add(vt); + _colnames.add(name); _coldata.add(arr); } } @@ -337,10 +374,10 @@ public class FrameBlock implements Writable, Externalizable //allocate output frame (incl deep copy schema) if( ret == null ) - ret = new FrameBlock(_schema); - else - ret._schema = new ArrayList<ValueType>(_schema); + ret = new FrameBlock(); ret._numRows = _numRows; + ret._schema = new ArrayList<ValueType>(_schema); + ret._colnames = new ArrayList<String>(_colnames); //copy data to output and partial overwrite w/ rhs for( int j=0; j<getNumColumns(); j++ ) { @@ -379,9 +416,11 @@ public class FrameBlock implements Writable, Externalizable ret = new FrameBlock(); ret._numRows = ru-rl+1; - //copy output schema - for( int j=cl; j<=cu; j++ ) + //copy output schema and colnames + for( int j=cl; j<=cu; j++ ) { ret._schema.add(_schema.get(j)); + ret._colnames.add(_colnames.get(j)); + } //copy output data for( int j=cl; j<=cu; j++ ) @@ -420,6 +459,8 @@ public class FrameBlock implements Writable, Externalizable //concatenate schemas (w/ deep copy to prevent side effects) ret._schema = new ArrayList<ValueType>(_schema); ret._schema.addAll(that._schema); + ret._colnames = new ArrayList<String>(_colnames); + ret._colnames.addAll(that._colnames); //concatenate column data (w/ deep copy to prevent side effects) for( Array tmp : _coldata ) @@ -437,10 +478,10 @@ public class FrameBlock implements Writable, Externalizable //allocate output frame (incl deep copy schema) if( ret == null ) - ret = new FrameBlock(_schema); - else - ret._schema = new ArrayList<ValueType>(_schema); + ret = new FrameBlock(); ret._numRows = _numRows; + ret._schema = new ArrayList<ValueType>(_schema); + ret._colnames = new ArrayList<String>(_colnames); //concatenate data (deep copy first, append second) for( Array tmp : _coldata ) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/main/java/org/apache/sysml/runtime/util/DataConverter.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java index 131bee6..2b642db 100644 --- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java +++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java @@ -700,6 +700,22 @@ public class DataConverter } /** + * + * @param data + * @param schema + * @param colnames + * @return + */ + public static FrameBlock convertToFrameBlock(String[][] data, List<ValueType> schema, List<String> colnames) { + //check for empty frame block + if( data == null || data.length==0 ) + return new FrameBlock(); + + //create frame block + return new FrameBlock(schema, colnames, data); + } + + /** * Converts a matrix block into a frame block of value type double. * * @param mb http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameTransformTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameTransformTest.java b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameTransformTest.java new file mode 100644 index 0000000..69950a3 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameTransformTest.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.jmlc; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; + +import org.junit.Assert; +import org.apache.sysml.api.DMLException; +import org.apache.sysml.api.jmlc.Connection; +import org.apache.sysml.api.jmlc.PreparedScript; +import org.apache.sysml.api.jmlc.ResultVariables; +import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.integration.TestConfiguration; +import org.apache.sysml.test.utils.TestUtils; + +/** + * + * + */ +public class FrameTransformTest extends AutomatedTestBase +{ + private final static String TEST_NAME1 = "transform"; + private final static String TEST_DIR = "functions/jmlc/"; + private final static String TEST_CLASS_DIR = TEST_DIR + FrameTransformTest.class.getSimpleName() + "/"; + + private final static int rows = 700; + private final static int cols = 3; + + private final static int nRuns = 10; + + private final static double sparsity1 = 0.7; + private final static double sparsity2 = 0.1; + + + @Override + public void setUp() { + addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "Y" }) ); + } + + /* + @Test + public void testJMLCTransformDense() throws IOException { + runJMLCReuseTest(TEST_NAME1, false, false); + } + + @Test + public void testJMLCTransformSparse() throws IOException { + runJMLCReuseTest(TEST_NAME1, true, false); + } + + @Test + public void testJMLCTransformDenseReuse() throws IOException { + runJMLCReuseTest(TEST_NAME1, false, true); + } + + @Test + public void testJMLCTransformSparseReuse() throws IOException { + runJMLCReuseTest(TEST_NAME1, true, true); + } + */ + + /** + * + * @param sparseM1 + * @param sparseM2 + * @param instType + * @throws IOException + */ + @SuppressWarnings("unused") + private void runJMLCReuseTest( String testname, boolean sparse, boolean modelReuse ) + throws IOException + { + String TEST_NAME = testname; + + TestConfiguration config = getTestConfiguration(TEST_NAME); + loadTestConfiguration(config); + + //generate inputs + double[][] Xd = TestUtils.round(getRandomMatrix(rows, cols, 0.51, 7.49, sparse?sparsity2:sparsity1, 1234)); + String[][] Xs = createFrameData(Xd); + + //run DML via JMLC + ArrayList<double[][]> Yset = execDMLScriptviaJMLC( TEST_NAME, Xs, modelReuse ); + + //check non-empty y + for( double[][] data : Yset ) + Assert.assertEquals("Wrong result: "+data[0][0]+".", new Double(7), new Double(data[0][0])); + } + + /** + * + * @param X + * @return + * @throws DMLException + * @throws IOException + */ + private ArrayList<double[][]> execDMLScriptviaJMLC( String testname, String[][] X, boolean modelReuse) + throws IOException + { + Timing time = new Timing(true); + + ArrayList<double[][]> ret = new ArrayList<double[][]>(); + + //establish connection to SystemML + Connection conn = new Connection(); + + try + { + //prepare input arguments + HashMap<String,String> args = new HashMap<String,String>(); + args.put("$TRANSFORM_PATH", SCRIPT_DIR + TEST_DIR + "/tfmtd"); + args.put("$TRANSFORM_SPEC", "{ \"ids\": true ,\"recode\": [ 1, 2, 3] }"); + + //read and precompile script + String script = conn.readScript(SCRIPT_DIR + TEST_DIR + testname + ".dml"); + PreparedScript pstmt = conn.prepareScript(script, args, new String[]{"X"}, new String[]{"Y"}, false); + + if( modelReuse ) + pstmt.setFrame("X", X); + + //execute script multiple times + for( int i=0; i<nRuns; i++ ) + { + //bind input parameters + if( !modelReuse ) + pstmt.setFrame("X", X); + + //execute script + ResultVariables rs = pstmt.executeScript(); + + //get output parameter + double[][] Y = rs.getMatrix("Y"); + ret.add(Y); //keep result for comparison + } + } + catch(Exception ex) + { + ex.printStackTrace(); + throw new IOException(ex); + } + finally + { + if( conn != null ) + conn.close(); + } + + System.out.println("JMLC scoring w/ "+nRuns+" runs in "+time.stop()+"ms."); + + return ret; + } + + /** + * + * @param data + * @return + */ + private String[][] createFrameData(double[][] data) { + String[][] ret = new String[data.length][]; + for( int i=0; i<data.length; i++ ) { + String[] row = new String[data[i].length]; + for( int j=0; j<data[i].length; j++ ) + row[j] = "V"+String.valueOf(data[i][j]); + ret[i] = row; + } + + return ret; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/82b51425/src/test/scripts/functions/jmlc/transform.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/jmlc/transform.dml b/src/test/scripts/functions/jmlc/transform.dml new file mode 100644 index 0000000..1fce0bb --- /dev/null +++ b/src/test/scripts/functions/jmlc/transform.dml @@ -0,0 +1,31 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +X = read($X, data_type="frame", format="csv"); +specJson = $TRANSFORM_SPEC + +Xt = transform(target=X, transformPath=$TRANSFORM_PATH, spec=specJson); + +V = matrix(Xt, rows=nrow(Xt)*ncol(Xt), cols=1); +Y = as.matrix(sum(table(V, 1) != 0)) + +write(Y, $Y); +
