Repository: incubator-systemml Updated Branches: refs/heads/master 52776144a -> 0bff338bc
[SYSTEMML-558] FrameBlock-matrix casting operations, incl tests Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0bff338b Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0bff338b Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0bff338b Branch: refs/heads/master Commit: 0bff338bc50ff054516a03dfcac0316c5d8e902a Parents: 5277614 Author: Matthias Boehm <[email protected]> Authored: Thu Mar 10 14:18:06 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Thu Mar 10 14:18:40 2016 -0800 ---------------------------------------------------------------------- .../sysml/runtime/util/DataConverter.java | 100 +++++++++++++ .../functions/frame/FrameCastingTest.java | 146 +++++++++++++++++++ .../functions/frame/ZPackageSuite.java | 1 + 3 files changed, 247 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0bff338b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java index 00df11e..c75aabf 100644 --- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java +++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java @@ -21,6 +21,7 @@ package org.apache.sysml.runtime.util; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -38,11 +39,13 @@ import org.apache.sysml.runtime.io.ReadProperties; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.CTableMap; import org.apache.sysml.runtime.matrix.data.FileFormatProperties; +import org.apache.sysml.runtime.matrix.data.FrameBlock; import org.apache.sysml.runtime.matrix.data.IJV; import org.apache.sysml.runtime.matrix.data.InputInfo; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.MatrixIndexes; import org.apache.sysml.runtime.matrix.data.OutputInfo; +import org.apache.sysml.runtime.matrix.data.SparseBlock; import org.apache.sysml.udf.Matrix; @@ -613,6 +616,103 @@ public class DataConverter } /** + * Converts a frame block with arbitrary schema into a matrix block. + * Since matrix block only supports value type double, we do a best + * effort conversion of non-double types which might result in errors + * for non-numerical data. + * + * @param frame + * @return + * @throws DMLRuntimeException + */ + public static MatrixBlock convertToMatrixBlock(FrameBlock frame) + throws DMLRuntimeException + { + MatrixBlock mb = new MatrixBlock(frame.getNumRows(), frame.getNumColumns(), false); + + List<ValueType> schema = frame.getSchema(); + for( int i=0; i<frame.getNumRows(); i++ ) + for( int j=0; j<frame.getNumColumns(); j++ ) { + mb.appendValue(i, j, UtilFunctions.objectToDouble( + schema.get(j), frame.get(i, j))); + } + mb.examSparsity(); + + return mb; + } + + /** + * Converts a matrix block into a frame block of value type double. + * + * @param mb + * @return + */ + public static FrameBlock convertToFrameBlock(MatrixBlock mb) { + return convertToFrameBlock(mb, ValueType.DOUBLE); + } + + /** + * Converts a matrix block into a frame block of a given value type. + * + * @param mb + * @param vt + * @return + */ + public static FrameBlock convertToFrameBlock(MatrixBlock mb, ValueType vt) { + //construct temporary schema + List<ValueType> schema = new ArrayList<ValueType>(); + for( int j=0; j<mb.getNumColumns(); j++ ) + schema.add(vt); + + return convertToFrameBlock(mb, schema); + } + + /** + * + * @param mb + * @param schema + * @return + */ + public static FrameBlock convertToFrameBlock(MatrixBlock mb, List<ValueType> schema) + { + FrameBlock frame = new FrameBlock(schema); + Object[] row = new Object[mb.getNumColumns()]; + + if( mb.isInSparseFormat() ) //SPARSE + { + SparseBlock sblock = mb.getSparseBlock(); + for( int i=0; i<mb.getNumRows(); i++ ) { + Arrays.fill(row, null); //reset + if( !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] aval = sblock.values(i); + for( int j=apos; j<apos+alen; j++ ) { + row[aix[j]] = UtilFunctions.doubleToObject( + schema.get(aix[j]), aval[j]); + } + } + frame.appendRow(row); + } + } + else //DENSE + { + for( int i=0; i<mb.getNumRows(); i++ ) { + Arrays.fill(row, null); //reset + for( int j=0; j<mb.getNumColumns(); j++ ) { + row[j] = UtilFunctions.doubleToObject( + schema.get(j), + mb.quickGetValue(i, j)); + } + frame.appendRow(row); + } + } + + return frame; + } + + /** * * @param mb * @param colwise http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0bff338b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java new file mode 100644 index 0000000..5fe14dd --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.frame; + +import java.util.Arrays; +import java.util.List; + +import org.apache.sysml.parser.Expression.ValueType; +import org.apache.sysml.runtime.matrix.data.FrameBlock; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.util.DataConverter; +import org.apache.sysml.runtime.util.UtilFunctions; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.utils.TestUtils; +import org.junit.Assert; +import org.junit.Test; + +public class FrameCastingTest extends AutomatedTestBase +{ + private final static int rows = 2891; + private final static ValueType[] schemaStrings = new ValueType[]{ValueType.STRING, ValueType.STRING, ValueType.STRING}; + private final static ValueType[] schemaMixed = new ValueType[]{ValueType.STRING, ValueType.DOUBLE, ValueType.INT, ValueType.BOOLEAN}; + + private enum CastType { + M2F_S, + M2F_G, + F2M, + } + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + } + + @Test + public void testFrameStringsM2F_S() { + runFrameCastingTest(schemaStrings, CastType.M2F_S); + } + + @Test + public void testFrameStringsM2F_G() { + runFrameCastingTest(schemaStrings, CastType.M2F_G); + } + + @Test + public void testFrameStringsF2M() { + runFrameCastingTest(schemaStrings, CastType.F2M); + } + + @Test + public void testFrameMixedM2F_S() { + runFrameCastingTest(schemaMixed, CastType.M2F_S); + } + + @Test + public void testFrameMixedM2F_G() { + runFrameCastingTest(schemaMixed, CastType.M2F_G); + } + + @Test + public void testFrameMixedF2M() { + runFrameCastingTest(schemaMixed, CastType.F2M); + } + + /** + * + * @param sparseM1 + * @param sparseM2 + * @param instType + */ + private void runFrameCastingTest( ValueType[] schema, CastType ctype) + { + try + { + //data generation + double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 2412); + for( int i=0; i<rows; i++ ) { + for( int j=0; j<schema.length; j++ ) + A[i][j] = UtilFunctions.objectToDouble(schema[j], + UtilFunctions.doubleToObject(schema[j], A[i][j])); + } + + //core casting operations + FrameBlock frame = null; + if( ctype == CastType.F2M ) + { + //construct input schema + List<ValueType> lschema1 = Arrays.asList(schema); + FrameBlock frame1 = new FrameBlock(lschema1); + Object[] row1 = new Object[lschema1.size()]; + for( int i=0; i<rows; i++ ) { + for( int j=0; j<lschema1.size(); j++ ) + row1[j] = UtilFunctions.doubleToObject(lschema1.get(j), A[i][j]); + frame1.appendRow(row1); + } + + MatrixBlock mb = DataConverter.convertToMatrixBlock(frame1); + frame = DataConverter.convertToFrameBlock(mb); + } + else if( ctype == CastType.M2F_G ) + { + MatrixBlock mb = DataConverter.convertToMatrixBlock(A); + frame = DataConverter.convertToFrameBlock(mb); + } + else if( ctype == CastType.M2F_S ) + { + MatrixBlock mb = DataConverter.convertToMatrixBlock(A); + frame = DataConverter.convertToFrameBlock(mb, Arrays.asList(schema)); + } + + //check basic meta data + if( frame.getNumRows() != rows ) + Assert.fail("Wrong number of rows: "+frame.getNumRows()+", expected: "+rows); + + //check correct values + List<ValueType> lschema = frame.getSchema(); + for( int i=0; i<rows; i++ ) + for( int j=0; j<lschema.size(); j++ ) { + double tmp = UtilFunctions.objectToDouble(lschema.get(j), frame.get(i, j)); + if( tmp != A[i][j] ) + Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+A[i][j]); + } + } + catch(Exception ex) { + ex.printStackTrace(); + throw new RuntimeException(ex); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0bff338b/src/test_suites/java/org/apache/sysml/test/integration/functions/frame/ZPackageSuite.java ---------------------------------------------------------------------- diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/frame/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/frame/ZPackageSuite.java index f2afef9..3a3a449 100644 --- a/src/test_suites/java/org/apache/sysml/test/integration/functions/frame/ZPackageSuite.java +++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/frame/ZPackageSuite.java @@ -27,6 +27,7 @@ import org.junit.runners.Suite; @RunWith(Suite.class) @Suite.SuiteClasses({ FrameAppendTest.class, + FrameCastingTest.class, FrameGetSetTest.class, FrameIndexingTest.class, FrameSerializationTest.class,
