[SYSTEMML-1956] Fix robustness frame rbind w/ mismatching schemas This patch makes the frame rbind more robust by allowing graceful schema conversions. We now try - in a best effort manner - to convert the values of the second input into the types of the first input, which determine the schema of the output.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/deb4baf0 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/deb4baf0 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/deb4baf0 Branch: refs/heads/master Commit: deb4baf06c3a9d204523dc868f72ea23e307f4c4 Parents: 92bad9e Author: Matthias Boehm <[email protected]> Authored: Wed Oct 11 17:12:31 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Thu Oct 12 01:13:07 2017 -0700 ---------------------------------------------------------------------- .../sysml/runtime/matrix/data/FrameBlock.java | 31 ++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/deb4baf0/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java index 6079956..23dbd8b 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java @@ -554,6 +554,20 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable } /** + * Get a row iterator over the frame where all fields are encoded + * as boxed objects according to the value types of the provided + * target schema. + * + * @param schema target schema of objects + * @return object array iterator + */ + public Iterator<Object[]> getObjectRowIterator(ValueType[] schema) { + ObjectRowIterator iter = new ObjectRowIterator(0, _numRows); + iter.setSchema(schema); + return iter; + } + + /** * Get a row iterator over the frame where all selected fields are * encoded as boxed objects according to their value types. * @@ -992,7 +1006,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable ret._coldata = new Array[getNumColumns()]; for( int j=0; j<getNumColumns(); j++ ) ret._coldata[j] = _coldata[j].clone(); - Iterator<Object[]> iter = that.getObjectRowIterator(); + Iterator<Object[]> iter = that.getObjectRowIterator(_schema); while( iter.hasNext() ) ret.appendRow(iter.next()); } @@ -1221,6 +1235,8 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable } private class ObjectRowIterator extends RowIterator<Object> { + private ValueType[] _tgtSchema = null; + public ObjectRowIterator(int rl, int ru) { super(rl, ru); } @@ -1229,6 +1245,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable super(rl, ru, cols); } + public void setSchema(ValueType[] schema) { + _tgtSchema = schema; + } + @Override protected Object[] createRow(int size) { return new Object[size]; @@ -1237,10 +1257,17 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable @Override public Object[] next( ) { for( int j=0; j<_cols.length; j++ ) - _curRow[j] = get(_curPos, _cols[j]-1); + _curRow[j] = getValue(_curPos, _cols[j]-1); _curPos++; return _curRow; } + + private Object getValue(int i, int j) { + Object val = get(i, j); + if( _tgtSchema != null ) + val = UtilFunctions.objectToObject(_tgtSchema[j], val); + return val; + } } ///////
