Repository: incubator-systemml
Updated Branches:
  refs/heads/master 51c4291ab -> 172bfcacc


[SYSTEMML-573] Fix meta data handling of textcell frame readers/writers

Including fixes for meta data handling on frame block merge, slice, and
deep copy. 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/12f2da9f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/12f2da9f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/12f2da9f

Branch: refs/heads/master
Commit: 12f2da9f007ea97db566bc12ed95e13c590bc8c2
Parents: 51c4291
Author: Matthias Boehm <[email protected]>
Authored: Thu Jul 7 01:04:18 2016 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Fri Jul 8 10:59:13 2016 -0700

----------------------------------------------------------------------
 .../functions/ConvertFrameBlockToIJVLines.java  |  9 ++++++
 .../sysml/runtime/io/FrameReaderTextCell.java   | 17 +++++++---
 .../sysml/runtime/io/FrameWriterTextCell.java   | 11 +++++++
 .../sysml/runtime/matrix/data/FrameBlock.java   | 28 ++++++++++------
 .../matrix/mapred/FrameReblockBuffer.java       | 13 +++++---
 .../functions/frame/FrameMetaReadWriteTest.java | 34 +++++++++-----------
 6 files changed, 76 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/12f2da9f/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/ConvertFrameBlockToIJVLines.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/ConvertFrameBlockToIJVLines.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/ConvertFrameBlockToIJVLines.java
index 3c50668..792e0b6 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/ConvertFrameBlockToIJVLines.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/ConvertFrameBlockToIJVLines.java
@@ -40,6 +40,15 @@ public class ConvertFrameBlockToIJVLines implements 
FlatMapFunction<Tuple2<Long,
                
                ArrayList<String> cells = new ArrayList<String>();
                
+               //write frame meta data
+               if( rowoffset == 1 ) {
+                       for( int j=0; j<block.getNumColumns(); j++ )
+                               if( !block.isColumnMetadataDefault(j) ) {
+                                       cells.add("-1 " + (j+1) + " " + 
block.getColumnMetadata(j).getNumDistinct());
+                                       cells.add("-2 " + (j+1) + " " + 
block.getColumnMetadata(j).getMvValue());
+                               }
+               }
+               
                //convert frame block to list of ijv cell triples
                StringBuilder sb = new StringBuilder();
                Iterator<String[]> iter = block.getStringRowIterator();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/12f2da9f/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java 
b/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java
index 8345b47..e7efc2b 100644
--- a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java
+++ b/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java
@@ -182,7 +182,12 @@ public class FrameReaderTextCell extends FrameReader
                                st.reset( value.toString() ); //reinit tokenizer
                                row = st.nextInt()-1;
                                col = st.nextInt()-1;
-                               dest.set(row, col, 
UtilFunctions.stringToObject(schema.get(col), st.nextToken()));
+                               if( row == -3 )
+                                       
dest.getColumnMetadata(col).setMvValue(st.nextToken());
+                               else if( row == -2 )
+                                       
dest.getColumnMetadata(col).setNumDistinct(st.nextLong());
+                               else
+                                       dest.set(row, col, 
UtilFunctions.stringToObject(schema.get(col), st.nextToken()));
                        }
                }
                catch(Exception ex) 
@@ -253,8 +258,13 @@ public class FrameReaderTextCell extends FrameReader
                        while( (value=br.readLine())!=null ) {
                                st.reset( value ); //reinit tokenizer
                                row = st.nextInt()-1;
-                               col = st.nextInt()-1;   
-                               dest.set(row, col, 
UtilFunctions.stringToObject(schema.get(col), st.nextToken()));
+                               col = st.nextInt()-1;
+                               if( row == -3 )
+                                       
dest.getColumnMetadata(col).setMvValue(st.nextToken());
+                               else if (row == -2)
+                                       
dest.getColumnMetadata(col).setNumDistinct(st.nextLong());
+                               else
+                                       dest.set(row, col, 
UtilFunctions.stringToObject(schema.get(col), st.nextToken()));
                        }
                }
                catch(Exception ex)
@@ -272,5 +282,4 @@ public class FrameReaderTextCell extends FrameReader
                        IOUtilFunctions.closeSilently(br);
                }
        }
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/12f2da9f/src/main/java/org/apache/sysml/runtime/io/FrameWriterTextCell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/FrameWriterTextCell.java 
b/src/main/java/org/apache/sysml/runtime/io/FrameWriterTextCell.java
index 38348ad..d257b69 100644
--- a/src/main/java/org/apache/sysml/runtime/io/FrameWriterTextCell.java
+++ b/src/main/java/org/apache/sysml/runtime/io/FrameWriterTextCell.java
@@ -110,6 +110,17 @@ public class FrameWriterTextCell extends FrameWriter
                        //for obj reuse and preventing repeated buffer 
re-allocations
                        StringBuilder sb = new StringBuilder();
                        
+                       //write frame meta data
+                       if( rl == 0 ) {
+                               for( int j=0; j<cols; j++ )
+                                       if( !src.isColumnMetadataDefault(j) ) {
+                                               sb.append("-1 " + (j+1) + " " + 
src.getColumnMetadata(j).getNumDistinct() + "\n");
+                                               sb.append("-2 " + (j+1) + " " + 
src.getColumnMetadata(j).getMvValue() + "\n");
+                                               br.write( sb.toString() );
+                                               sb.setLength(0);
+                                       }
+                       }
+                       
                        //write frame row range to output
                        Iterator<String[]> iter = src.getStringRowIterator(rl, 
ru);
                        for( int i=rl; iter.hasNext(); i++ ) { //for all rows

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/12f2da9f/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
index d5787ca..2088e85 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
@@ -83,8 +83,9 @@ public class FrameBlock implements Writable, CacheBlock, 
Externalizable
        }
        
        public FrameBlock(FrameBlock that) {
-               this(that.getSchema());
+               this(that.getSchema(), that.getColumnNames());
                copy(that);
+               setColumnMetadata(that.getColumnMetadata());
        }
        
        public FrameBlock(int ncols, ValueType vt) {
@@ -206,7 +207,7 @@ public class FrameBlock implements Writable, CacheBlock, 
Externalizable
         * @param colmeta
         */
        public void setColumnMetadata(List<ColumnMetadata> colmeta) {
-               _colmeta = colmeta;
+               _colmeta = new ArrayList<FrameBlock.ColumnMetadata>(colmeta);
        }
        
        /**
@@ -330,7 +331,8 @@ public class FrameBlock implements Writable, CacheBlock, 
Externalizable
                getColumnNames().clear();
                if( _colmeta != null ) {
                        for( int i=0; i<_colmeta.size(); i++ )
-                               _colmeta.get(i).reset();
+                               if( !isColumnMetadataDefault(i) )
+                                       _colmeta.set(i, new ColumnMetadata(0));
                }
                if(_coldata != null) {
                        for( int i=0; i < _coldata.size(); i++ )
@@ -888,6 +890,13 @@ public class FrameBlock implements Writable, CacheBlock, 
Externalizable
                if ( getNumRows() != that.getNumRows() || getNumColumns() != 
that.getNumColumns() )
                        throw new DMLRuntimeException("Dimension mismatch on 
merge disjoint (target="+getNumRows()+"x"+getNumColumns()+", 
source="+that.getNumRows()+"x"+that.getNumColumns()+")");
                
+               //meta data copy if necessary
+               for( int j=0; j<getNumColumns(); j++ )
+                       if( !that.isColumnMetadataDefault(j) ) {
+                               
_colmeta.get(j).setNumDistinct(that._colmeta.get(j).getNumDistinct());
+                               
_colmeta.get(j).setMvValue(that._colmeta.get(j).getMvValue());
+                       }
+               
                //core frame block merge through cell copy
                for( int i=0; i<that.getNumRows(); i++ ) {
                        for( int j=0; j<getNumColumns(); j++ ) {
@@ -896,7 +905,6 @@ public class FrameBlock implements Writable, CacheBlock, 
Externalizable
                                        set(i, j,obj);
                        }
                }
-               
        }
        
        /**
@@ -1252,16 +1260,16 @@ public class FrameBlock implements Writable, 
CacheBlock, Externalizable
                private long _ndistinct = 0;
                private String _mvValue = null;
                
-               public ColumnMetadata(long ndistinct, String mvval) {
+               public ColumnMetadata(long ndistinct) {
                        _ndistinct = ndistinct;
-                       _mvValue = mvval;
                }
-               public ColumnMetadata(long ndistinct) {
+               public ColumnMetadata(long ndistinct, String mvval) {
                        _ndistinct = ndistinct;
+                       _mvValue = mvval;
                }
-               public void reset() {
-                       _ndistinct = 0;
-                       _mvValue = null;
+               public ColumnMetadata(ColumnMetadata that) {
+                       _ndistinct = that._ndistinct;
+                       _mvValue = that._mvValue;
                }
                
                public long getNumDistinct() {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/12f2da9f/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java 
b/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java
index 2762a0d..9fd9e36 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java
@@ -218,7 +218,8 @@ public class FrameReblockBuffer
                long cbi = -1, cbj = -1; //current block indexes
                for( int i=0; i<_count; i++ )
                {
-                       long bi = 
UtilFunctions.computeBlockIndex(_buff[i].getRow(), _brlen);
+                       //compute block indexes (w/ robustness for meta data 
handling)
+                       long bi = 
Math.max(UtilFunctions.computeBlockIndex(_buff[i].getRow(), _brlen), 1);
                        long bj = 
UtilFunctions.computeBlockIndex(_buff[i].getCol(), _bclen);
                        
                        //output block and switch to next index pair
@@ -229,13 +230,17 @@ public class FrameReblockBuffer
                                cbj = bj;                                       
                                tmpIx = (bi-1)*_brlen+1;
                                tmpBlock = new FrameBlock(_schema);
-                               
tmpBlock.ensureAllocatedColumns(Math.min(_brlen, (int)(_rlen-(bi-1)*_brlen)));
-                               
+                               
tmpBlock.ensureAllocatedColumns(Math.min(_brlen, (int)(_rlen-(bi-1)*_brlen)));  
                        
                        }
                        
                        int ci = 
UtilFunctions.computeCellInBlock(_buff[i].getRow(), _brlen);
                        int cj = 
UtilFunctions.computeCellInBlock(_buff[i].getCol(), _bclen);
-                       tmpBlock.set(ci, cj, _buff[i].getObjVal());
+                       if( ci == -3 )
+                               
tmpBlock.getColumnMetadata(cj).setMvValue(_buff[i].getObjVal().toString());
+                       else if( ci == -2 )
+                               
tmpBlock.getColumnMetadata(cj).setNumDistinct(Long.parseLong(_buff[i].getObjVal().toString()));
+                       else
+                               tmpBlock.set(ci, cj, _buff[i].getObjVal());
                }
                
                //output last block 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/12f2da9f/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameMetaReadWriteTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameMetaReadWriteTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameMetaReadWriteTest.java
index c71eac6..3803015 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameMetaReadWriteTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameMetaReadWriteTest.java
@@ -55,33 +55,31 @@ public class FrameMetaReadWriteTest extends 
AutomatedTestBase
                runFrameReadWriteTest(OutputInfo.BinaryBlockOutputInfo, 
ExecType.CP);
        }
 
+       @Test
+       public void testFrameBinarySpark()  {
+               runFrameReadWriteTest(OutputInfo.BinaryBlockOutputInfo, 
ExecType.SPARK);
+       }
+       
+       @Test
+       public void testFrameTextcellCP()  {
+               runFrameReadWriteTest(OutputInfo.TextCellOutputInfo, 
ExecType.CP);
+       }
+       
+       @Test
+       public void testFrameTextcellSpark()  {
+               runFrameReadWriteTest(OutputInfo.TextCellOutputInfo, 
ExecType.SPARK);
+       }
+       
 // TODO: add meta data support for text formats (requires consolidation with 
file-based transform first)       
 //     @Test
 //     public void testFrameCsvCP()  {
 //             runFrameReadWriteTest(OutputInfo.CSVOutputInfo, ExecType.CP);
 //     }
-//     
-//     @Test
-//     public void testFrameTextcellCP()  {
-//             runFrameReadWriteTest(OutputInfo.TextCellOutputInfo, 
ExecType.CP);
-//     }
-       
-       @Test
-       public void testFrameBinarySpark()  {
-               runFrameReadWriteTest(OutputInfo.BinaryBlockOutputInfo, 
ExecType.SPARK);
-       }
-
-// TODO: add meta data support for text formats (requires consolidation with 
file-based transform first)
+//
 //     @Test
 //     public void testFrameCsvSpark()  {
 //             runFrameReadWriteTest(OutputInfo.CSVOutputInfo, ExecType.SPARK);
 //     }
-//     
-//     @Test
-//     public void testFrameTextcellSpark()  {
-//             runFrameReadWriteTest(OutputInfo.TextCellOutputInfo, 
ExecType.SPARK);
-//     }
-
        
        /**
         * 

Reply via email to