Repository: incubator-systemml Updated Branches: refs/heads/master cfc561eec -> 1371ab4cf
[SYSTEMML-557] New FrameObject/FrameBlock data structures, incl tests This patch introduces an initial block data structure for frames as well as the related meta data object (for cp). FrameBlock follows a column-oriented layout in order to store int/boolean/double columns in native arrays for memory efficiency. Currently, this data structures provides basic meta data handling, get/set/append functionality, row iterators, and serialization/deserialization. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/1371ab4c Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/1371ab4c Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/1371ab4c Branch: refs/heads/master Commit: 1371ab4cf64201b739865e1a38a6b9dfe18d396f Parents: cfc561e Author: Matthias Boehm <[email protected]> Authored: Wed Mar 9 20:04:09 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Wed Mar 9 20:04:09 2016 -0800 ---------------------------------------------------------------------- .../controlprogram/caching/FrameObject.java | 109 ++++ .../sysml/runtime/matrix/data/FrameBlock.java | 526 +++++++++++++++++++ .../functions/frame/FrameGetSetTest.java | 209 ++++++++ .../functions/frame/FrameSerializationTest.java | 175 ++++++ .../functions/frame/ZPackageSuite.java | 37 ++ 5 files changed, 1056 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1371ab4c/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java new file mode 100644 index 0000000..40c62c7 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.controlprogram.caching; + +import java.util.List; + +import org.apache.sysml.parser.Expression.DataType; +import org.apache.sysml.parser.Expression.ValueType; +import org.apache.sysml.runtime.matrix.data.FrameBlock; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; + +public class FrameObject extends CacheableData +{ + private static final long serialVersionUID = 1755082174281927785L; + + /** Container object that holds the actual data.*/ + private FrameBlock _data = null; + + /** The name of HDFS file in which the data is backed up. */ + private String _hdfsFileName = null; // file name and path + + protected FrameObject() { + super(DataType.FRAME, ValueType.UNKNOWN); + } + + public FrameObject(String fname, List<ValueType> schema, FrameBlock data) { + this(); + setFileName(fname); + setData(data); + } + + public void setFileName(String fname) { + _hdfsFileName = fname; + } + + public String getFileName() { + return _hdfsFileName; + } + + /** + * NOTE: temporary API until integrated into caching. + * + * @param block + */ + public void setData(FrameBlock data) { + _data = data; + } + + /** + * NOTE: temporary API until integrated into caching. + * + * @return + */ + public FrameBlock getData() { + return _data; + } + + + //////////////////////////////////// + // currently unsupported caching api + + @Override + protected boolean isBlobPresent() { + throw new RuntimeException("Caching not implemented yet for FrameObject."); + } + + @Override + protected void evictBlobFromMemory(MatrixBlock mb) throws CacheIOException { + //TODO refactoring api (no dependence on matrixblock) + throw new RuntimeException("Caching not implemented yet for FrameObject."); + } + + @Override + protected void restoreBlobIntoMemory() throws CacheIOException { + throw new RuntimeException("Caching not implemented yet for FrameObject."); + } + + @Override + protected void freeEvictedBlob() { + throw new RuntimeException("Caching not implemented yet for FrameObject."); + } + + @Override + protected boolean isBelowCachingThreshold() { + throw new RuntimeException("Caching not implemented yet for FrameObject."); + } + + @Override + public String getDebugName() { + throw new RuntimeException("Caching not implemented yet for FrameObject."); + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1371ab4c/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java new file mode 100644 index 0000000..c522c39 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java @@ -0,0 +1,526 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.matrix.data; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.io.Writable; +import org.apache.sysml.parser.Expression.ValueType; + +/** + * + */ +@SuppressWarnings({"rawtypes","unchecked"}) //allow generic native arrays +public class FrameBlock implements Writable, Externalizable +{ + private static final long serialVersionUID = -3993450030207130665L; + + /** The number of rows of the FrameBlock */ + private int _numRows = -1; + + /** The schema of the data frame as an ordered list of value types */ + private List<ValueType> _schema = null; + + /** The data frame data as an ordered list of columns */ + private List<Array> _coldata = null; + + public FrameBlock() { + _schema = new ArrayList<ValueType>(); + _coldata = new ArrayList<Array>(); + } + + public FrameBlock(List<ValueType> schema) { + this(schema, new String[0][]); + } + + public FrameBlock(List<ValueType> schema, String[][] data) { + _numRows = data.length; + _schema = new ArrayList<ValueType>(schema); + _coldata = new ArrayList<Array>(); + for( int i=0; i<data.length; i++ ) + appendRow(data[i]); + } + + /** + * Get the number of rows of the data frame. + * + * @return + */ + public int getNumRows() { + return _numRows; + } + + /** + * Get the number of columns of the data frame, that is + * the number of columns defined in the schema. + * + * @return + */ + public int getNumColumns() { + return _schema.size(); + } + + /** + * Allocate column data structures if necessary, i.e., if schema specified + * but not all column data structures created yet. + */ + public void ensureAllocatedColumns() { + //early abort if already + if( _schema.size() == _coldata.size() ) + return; + //allocate columns if necessary + for( int j=0; j<_schema.size(); j++ ) { + if( j >= _coldata.size() ) + switch( _schema.get(j) ) { + case STRING: _coldata.add(new StringArray(new String[0])); break; + case BOOLEAN: _coldata.add(new BooleanArray(new boolean[0])); break; + case INT: _coldata.add(new LongArray(new long[0])); break; + case DOUBLE: _coldata.add(new DoubleArray(new double[0])); break; + default: throw new RuntimeException("Unsupported value type: "+_schema.get(j)); + } + } + } + + /** + * Checks for matching column sizes in case of existing columns. + * + * @param newlen + */ + public void ensureColumnCompatibility(int newlen) { + if( _coldata.size() > 0 && _numRows != newlen ) + throw new RuntimeException("Mismatch in number of rows: "+newlen+" (expected: "+_numRows+")"); + } + + /////// + // basic get and set functionality + + /** + * Gets a boxed object of the value in position (r,c). + * + * @param r row index, 0-based + * @param c column index, 0-based + * @return + */ + public Object get(int r, int c) { + return _coldata.get(c).get(r); + } + + /** + * Sets the value in position (r,c), where the input is assumed + * to be a boxed object consistent with the schema definition. + * + * @param r + * @param c + * @param val + */ + public void set(int r, int c, Object val) { + _coldata.get(c).set(r, val); + } + + /** + * Append a row to the end of the data frame, where all row fields + * are boxed objects according to the schema. + * + * @param row + */ + public void appendRow(Object[] row) { + ensureAllocatedColumns(); + for( int j=0; j<row.length; j++ ) + _coldata.get(j).append(row[j]); + _numRows++; + } + + /** + * Append a row to the end of the data frame, where all row fields + * are string encoded. + * + * @param row + */ + public void appendRow(String[] row) { + ensureAllocatedColumns(); + for( int j=0; j<row.length; j++ ) + _coldata.get(j).append(row[j]); + _numRows++; + } + + /** + * Append a column of value type STRING as the last column of + * the data frame. The given array is wrapped but not copied + * and hence might be updated in the future. + * + * @param col + */ + public void appendColumn(String[] col) { + ensureColumnCompatibility(col.length); + _schema.add(ValueType.STRING); + _coldata.add(new StringArray(col)); + _numRows = col.length; + } + + /** + * Append a column of value type BOOLEAN as the last column of + * the data frame. The given array is wrapped but not copied + * and hence might be updated in the future. + * + * @param col + */ + public void appendColumn(boolean[] col) { + ensureColumnCompatibility(col.length); + _schema.add(ValueType.BOOLEAN); + _coldata.add(new BooleanArray(col)); + _numRows = col.length; + } + + /** + * Append a column of value type INT as the last column of + * the data frame. The given array is wrapped but not copied + * and hence might be updated in the future. + * + * @param col + */ + public void appendColumn(long[] col) { + ensureColumnCompatibility(col.length); + _schema.add(ValueType.INT); + _coldata.add(new LongArray(col)); + _numRows = col.length; + } + + /** + * Append a column of value type DOUBLE as the last column of + * the data frame. The given array is wrapped but not copied + * and hence might be updated in the future. + * + * @param col + */ + public void appendColumn(double[] col) { + ensureColumnCompatibility(col.length); + _schema.add(ValueType.DOUBLE); + _coldata.add(new DoubleArray(col)); + _numRows = col.length; + } + + /** + * Get a row iterator over the frame where all fields are encoded + * as strings independent of their value types. + * + * @return + */ + public Iterator<String[]> getStringRowIterator() { + return new StringRowIterator(); + } + + /** + * Get a row iterator over the frame where all fields are encoded + * as boxed objects according to their value types. + * + * @return + */ + public Iterator<Object[]> getObjectRowIterator() { + return new ObjectRowIterator(); + } + + /////// + // serialization / deserialization (implementation of writable and externalizable) + + @Override + public void write(DataOutput out) throws IOException { + //write header (rows, cols) + out.writeInt(getNumRows()); + out.writeInt(getNumColumns()); + //write columns (value type, data) + for( int j=0; j<getNumColumns(); j++ ) { + out.writeByte(_schema.get(j).ordinal()); + _coldata.get(j).write(out); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + //read head (rows, cols) + _numRows = in.readInt(); + int numCols = in.readInt(); + //read columns (value type, data) + _schema.clear(); + _coldata.clear(); + for( int j=0; j<numCols; j++ ) { + ValueType vt = ValueType.values()[in.readByte()]; + Array arr = null; + switch( vt ) { + case STRING: arr = new StringArray(new String[_numRows]); break; + case BOOLEAN: arr = new BooleanArray(new boolean[_numRows]); break; + case INT: arr = new LongArray(new long[_numRows]); break; + case DOUBLE: arr = new DoubleArray(new double[_numRows]); break; + default: throw new IOException("Unsupported value type: "+vt); + } + arr.readFields(in); + _schema.add(vt); + _coldata.add(arr); + } + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + //redirect serialization to writable impl + write(out); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + //redirect deserialization to writable impl + readFields(in); + } + + /////// + // row iterators (over strings and boxed objects) + + /** + * + */ + private abstract class RowIterator<T> implements Iterator<T[]> { + protected T[] _curRow = null; + protected int _curPos = -1; + + protected RowIterator() { + _curPos = 0; + _curRow = createRow(getNumColumns()); + } + + @Override + public boolean hasNext() { + return (_curPos < _numRows); + } + + @Override + public void remove() { + throw new RuntimeException("RowIterator.remove is unsupported!"); + } + + protected abstract T[] createRow(int size); + } + + /** + * + */ + private class StringRowIterator extends RowIterator<String> { + @Override + protected String[] createRow(int size) { + return new String[size]; + } + + @Override + public String[] next( ) { + for( int j=0; j<getNumColumns(); j++ ) + _curRow[j] = get(_curPos, j).toString(); + _curPos++; + return _curRow; + } + } + + /** + * + */ + private class ObjectRowIterator extends RowIterator<Object> { + @Override + protected Object[] createRow(int size) { + return new Object[size]; + } + + @Override + public Object[] next( ) { + for( int j=0; j<getNumColumns(); j++ ) + _curRow[j] = get(_curPos, j); + _curPos++; + return _curRow; + } + } + + /////// + // generic, resizable native arrays + + /** + * Base class for generic, resizable array of various value types. We + * use this custom class hierarchy instead of Trove or other libraries + * in order to avoid unnecessary dependencies. + */ + private abstract static class Array<T> implements Writable { + protected int _size = 0; + protected int newSize() { + return (int) Math.max(_size*2, 4); + } + public abstract T get(int index); + public abstract void set(int index, T value); + public abstract void append(String value); + public abstract void append(T value); + } + + /** + * + */ + private static class StringArray extends Array<String> { + private String[] _data = null; + + public StringArray(String[] data) { + _data = data; + _size = _data.length; + } + public String get(int index) { + return _data[index]; + } + public void set(int index, String value) { + _data[index] = value; + } + public void append(String value) { + if( _data.length <= _size ) + _data = Arrays.copyOf(_data, newSize()); + _data[_size++] = value; + } + @Override + public void write(DataOutput out) throws IOException { + for( int i=0; i<_size; i++ ) + out.writeUTF(_data[i]); + } + @Override + public void readFields(DataInput in) throws IOException { + _size = _data.length; + for( int i=0; i<_size; i++ ) + _data[i] = in.readUTF(); + } + } + + /** + * + */ + private static class BooleanArray extends Array<Boolean> { + private boolean[] _data = null; + + public BooleanArray(boolean[] data) { + _data = data; + _size = _data.length; + } + public Boolean get(int index) { + return _data[index]; + } + public void set(int index, Boolean value) { + _data[index] = value; + } + public void append(String value) { + append(Boolean.parseBoolean(value)); + } + public void append(Boolean value) { + if( _data.length <= _size ) + _data = Arrays.copyOf(_data, newSize()); + _data[_size++] = value; + } + @Override + public void write(DataOutput out) throws IOException { + for( int i=0; i<_size; i++ ) + out.writeBoolean(_data[i]); + } + @Override + public void readFields(DataInput in) throws IOException { + _size = _data.length; + for( int i=0; i<_size; i++ ) + _data[i] = in.readBoolean(); + } + } + + /** + * + */ + private static class LongArray extends Array<Long> { + private long[] _data = null; + + public LongArray(long[] data) { + _data = data; + _size = _data.length; + } + public Long get(int index) { + return _data[index]; + } + public void set(int index, Long value) { + _data[index] = value; + } + public void append(String value) { + append(Long.parseLong(value)); + } + public void append(Long value) { + if( _data.length <= _size ) + _data = Arrays.copyOf(_data, newSize()); + _data[_size++] = value; + } + @Override + public void write(DataOutput out) throws IOException { + for( int i=0; i<_size; i++ ) + out.writeLong(_data[i]); + } + @Override + public void readFields(DataInput in) throws IOException { + _size = _data.length; + for( int i=0; i<_size; i++ ) + _data[i] = in.readLong(); + } + } + + /** + * + */ + private static class DoubleArray extends Array<Double> { + private double[] _data = null; + + public DoubleArray(double[] data) { + _data = data; + _size = _data.length; + } + public Double get(int index) { + return _data[index]; + } + public void set(int index, Double value) { + _data[index] = value; + } + public void append(String value) { + append(Double.parseDouble(value)); + } + public void append(Double value) { + if( _data.length <= _size ) + _data = Arrays.copyOf(_data, newSize()); + _data[_size++] = value; + } + @Override + public void write(DataOutput out) throws IOException { + for( int i=0; i<_size; i++ ) + out.writeDouble(_data[i]); + } + @Override + public void readFields(DataInput in) throws IOException { + _size = _data.length; + for( int i=0; i<_size; i++ ) + _data[i] = in.readDouble(); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1371ab4c/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameGetSetTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameGetSetTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameGetSetTest.java new file mode 100644 index 0000000..acdb89f --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameGetSetTest.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.frame; + +import java.util.Arrays; +import java.util.List; + +import org.apache.sysml.parser.Expression.ValueType; +import org.apache.sysml.runtime.matrix.data.FrameBlock; +import org.apache.sysml.runtime.util.UtilFunctions; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.utils.TestUtils; +import org.junit.Assert; +import org.junit.Test; + +public class FrameGetSetTest extends AutomatedTestBase +{ + private final static int rows = 3254; + private final static ValueType[] schemaStrings = new ValueType[]{ValueType.STRING, ValueType.STRING, ValueType.STRING}; + private final static ValueType[] schemaMixed = new ValueType[]{ValueType.STRING, ValueType.DOUBLE, ValueType.INT, ValueType.BOOLEAN}; + + private enum InitType { + COLUMN, + ROW_OBJ, + ROW_STRING, + } + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + } + + @Test + public void testFrameStringsColumn() { + runFrameGetSetTest(schemaStrings, InitType.COLUMN); + } + + @Test + public void testFrameMixedColumn() { + runFrameGetSetTest(schemaMixed, InitType.COLUMN); + } + + @Test + public void testFrameStringsRowObj() { + runFrameGetSetTest(schemaStrings, InitType.ROW_OBJ); + } + + @Test + public void testFrameMixedRowObj() { + runFrameGetSetTest(schemaMixed, InitType.ROW_OBJ); + } + + @Test + public void testFrameStringsRowString() { + runFrameGetSetTest(schemaStrings, InitType.ROW_STRING); + } + + @Test + public void testFrameMixedRowString() { + runFrameGetSetTest(schemaMixed, InitType.ROW_STRING); + } + + + /** + * + * @param sparseM1 + * @param sparseM2 + * @param instType + */ + private void runFrameGetSetTest( ValueType[] schema, InitType itype) + { + try + { + //data generation + double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 8234); + + //init data frame + List<ValueType> lschema = Arrays.asList(schema); + FrameBlock frame = new FrameBlock(lschema); + + //init data frame + if( itype == InitType.COLUMN ) + { + for( int j=0; j<lschema.size(); j++ ) { + ValueType vt = lschema.get(j); + switch( vt ) { + case STRING: + String[] tmp1 = new String[rows]; + for( int i=0; i<rows; i++ ) + tmp1[i] = (String)toObject(vt, A[i][j]); + frame.appendColumn(tmp1); + break; + case BOOLEAN: + boolean[] tmp2 = new boolean[rows]; + for( int i=0; i<rows; i++ ) + A[i][j] = (tmp2[i] = (Boolean)toObject(vt, A[i][j]))?1:0; + frame.appendColumn(tmp2); + break; + case INT: + long[] tmp3 = new long[rows]; + for( int i=0; i<rows; i++ ) + A[i][j] = tmp3[i] = (Long)toObject(vt, A[i][j]); + frame.appendColumn(tmp3); + break; + case DOUBLE: + double[] tmp4 = new double[rows]; + for( int i=0; i<rows; i++ ) + tmp4[i] = (Double)toObject(vt, A[i][j]); + frame.appendColumn(tmp4); + break; + default: + throw new RuntimeException("Unsupported value type: "+vt); + } + } + } + else if( itype == InitType.ROW_OBJ ) { + Object[] row = new Object[lschema.size()]; + for( int i=0; i<rows; i++ ) { + for( int j=0; j<lschema.size(); j++ ) + A[i][j] = fromObject(lschema.get(j), row[j] = toObject(lschema.get(j), A[i][j])); + frame.appendRow(row); + } + } + else if( itype == InitType.ROW_STRING ) { + String[] row = new String[lschema.size()]; + for( int i=0; i<rows; i++ ) { + for( int j=0; j<lschema.size(); j++ ) { + Object obj = toObject(lschema.get(j), A[i][j]); + A[i][j] = fromObject(lschema.get(j), obj); + row[j] = obj.toString(); + } + frame.appendRow(row); + } + } + + //some updates via set + for( int i=7; i<13; i++ ) + for( int j=0; j<=2; j++ ) { + frame.set(i, j, toObject(lschema.get(j), (double)i*j)); + A[i][j] = (double)i*j; + } + + //check basic meta data + if( frame.getNumRows() != rows ) + Assert.fail("Wrong number of rows: "+frame.getNumRows()+", expected: "+rows); + + //check correct values + for( int i=0; i<rows; i++ ) + for( int j=0; j<lschema.size(); j++ ) { + double tmp = fromObject(lschema.get(j), frame.get(i, j)); + if( tmp != A[i][j] ) + Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+A[i][j]); + } + } + catch(Exception ex) { + ex.printStackTrace(); + throw new RuntimeException(ex); + } + } + + /** + * + * @param vt + * @param in + * @return + */ + private Object toObject(ValueType vt, double in) { + switch( vt ) { + case STRING: return String.valueOf(in); + case BOOLEAN: return (in!=0); + case INT: return UtilFunctions.toLong(in); + case DOUBLE: return in; + default: throw new RuntimeException("Unsupported value type: "+vt); + } + } + + /** + * + * @param vt + * @param in + * @return + */ + private double fromObject(ValueType vt, Object in) { + switch( vt ) { + case STRING: return Double.parseDouble((String)in); + case BOOLEAN: return ((Boolean)in)?1d:0d; + case INT: return (Long)in; + case DOUBLE: return (Double)in; + default: throw new RuntimeException("Unsupported value type: "+vt); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1371ab4c/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameSerializationTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameSerializationTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameSerializationTest.java new file mode 100644 index 0000000..2481599 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameSerializationTest.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.frame; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.Arrays; +import java.util.List; + +import org.apache.sysml.parser.Expression.ValueType; +import org.apache.sysml.runtime.matrix.data.FrameBlock; +import org.apache.sysml.runtime.util.UtilFunctions; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.utils.TestUtils; +import org.junit.Assert; +import org.junit.Test; + +public class FrameSerializationTest extends AutomatedTestBase +{ + private final static int rows = 2791; + private final static ValueType[] schemaStrings = new ValueType[]{ValueType.STRING, ValueType.STRING, ValueType.STRING}; + private final static ValueType[] schemaMixed = new ValueType[]{ValueType.STRING, ValueType.DOUBLE, ValueType.INT, ValueType.BOOLEAN}; + + private enum SerType { + WRITABLE_SER, + JAVA_SER, + } + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + } + + @Test + public void testFrameStringsWritable() { + runFrameSerializeTest(schemaStrings, SerType.WRITABLE_SER); + } + + @Test + public void testFrameMixedWritable() { + runFrameSerializeTest(schemaMixed, SerType.WRITABLE_SER); + } + + @Test + public void testFrameStringsJava() { + runFrameSerializeTest(schemaStrings, SerType.JAVA_SER); + } + + @Test + public void testFrameMixedJava() { + runFrameSerializeTest(schemaMixed, SerType.JAVA_SER); + } + + + /** + * + * @param sparseM1 + * @param sparseM2 + * @param instType + */ + private void runFrameSerializeTest( ValueType[] schema, SerType stype) + { + try + { + //data generation + double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 8234); + + //init data frame + List<ValueType> lschema = Arrays.asList(schema); + FrameBlock frame = new FrameBlock(lschema); + + //init data frame + Object[] row = new Object[lschema.size()]; + for( int i=0; i<rows; i++ ) { + for( int j=0; j<lschema.size(); j++ ) + A[i][j] = fromObject(lschema.get(j), row[j] = toObject(lschema.get(j), A[i][j])); + frame.appendRow(row); + } + + //core serialization and deserialization + if( stype == SerType.WRITABLE_SER ) { + //serialization + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + frame.write(dos); + + //deserialization + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + DataInputStream dis = new DataInputStream(bis); + frame = new FrameBlock(); + frame.readFields(dis); + } + else if( stype == SerType.JAVA_SER ) { + //serialization + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(bos); + oos.writeObject(frame); + + //deserialization + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + ObjectInputStream ois = new ObjectInputStream(bis); + frame = (FrameBlock) ois.readObject(); + } + + //check basic meta data + if( frame.getNumRows() != rows ) + Assert.fail("Wrong number of rows: "+frame.getNumRows()+", expected: "+rows); + + //check correct values + for( int i=0; i<rows; i++ ) + for( int j=0; j<lschema.size(); j++ ) { + double tmp = fromObject(lschema.get(j), frame.get(i, j)); + if( tmp != A[i][j] ) + Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+A[i][j]); + } + } + catch(Exception ex) { + ex.printStackTrace(); + throw new RuntimeException(ex); + } + } + + /** + * + * @param vt + * @param in + * @return + */ + private Object toObject(ValueType vt, double in) { + switch( vt ) { + case STRING: return String.valueOf(in); + case BOOLEAN: return (in!=0); + case INT: return UtilFunctions.toLong(in); + case DOUBLE: return in; + default: throw new RuntimeException("Unsupported value type: "+vt); + } + } + + /** + * + * @param vt + * @param in + * @return + */ + private double fromObject(ValueType vt, Object in) { + switch( vt ) { + case STRING: return Double.parseDouble((String)in); + case BOOLEAN: return ((Boolean)in)?1d:0d; + case INT: return (Long)in; + case DOUBLE: return (Double)in; + default: throw new RuntimeException("Unsupported value type: "+vt); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1371ab4c/src/test_suites/java/org/apache/sysml/test/integration/functions/frame/ZPackageSuite.java ---------------------------------------------------------------------- diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/frame/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/frame/ZPackageSuite.java new file mode 100644 index 0000000..33b026b --- /dev/null +++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/frame/ZPackageSuite.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.frame; + +import org.junit.runner.RunWith; +import org.junit.runners.Suite; + +/** Group together the tests in this package into a single suite so that the Maven build + * won't run two of them at once. */ +@RunWith(Suite.class) [email protected]({ + FrameGetSetTest.class, + FrameSerializationTest.class, +}) + + +/** This class is just a holder for the above JUnit annotations. */ +public class ZPackageSuite { + +}
