Repository: systemml
Updated Branches:
  refs/heads/master 152eba1a7 -> 36effc54d


[SYSTEMML-1749] Fix result correctness single-threaded csv frame reader 

This patch fixes the single-threaded csv frame reader, which produced
incorrect results if the used record reader returns multiple splits. The
underlying problem was that the contents of each read split were written
starting at position 0. We now correctly maintain the current row
position across splits.

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/9e87f00a
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/9e87f00a
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/9e87f00a

Branch: refs/heads/master
Commit: 9e87f00ac94e6d368e968cf8ac183d1afa02d65b
Parents: 152eba1
Author: Matthias Boehm <[email protected]>
Authored: Thu Jul 6 20:58:00 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Fri Jul 7 11:25:30 2017 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/runtime/io/FrameReaderTextCSV.java     | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/9e87f00a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java 
b/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java
index 9e10f2c..76da0de 100644
--- a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java
+++ b/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java
@@ -113,11 +113,12 @@ public class FrameReaderTextCSV extends FrameReader
                informat.configure(job);
                InputSplit[] splits = informat.getSplits(job, 1);
                splits = IOUtilFunctions.sortInputSplits(splits);
-               for( int i=0; i<splits.length; i++ )
-                       readCSVFrameFromInputSplit(splits[i], informat, job, 
dest, schema, names, rlen, clen, 0, i==0);
+               for( int i=0, rpos=0; i<splits.length; i++ )
+                       rpos = readCSVFrameFromInputSplit(splits[i], informat,
+                               job, dest, schema, names, rlen, clen, rpos, 
i==0);
        }
 
-       protected final void readCSVFrameFromInputSplit( InputSplit split, 
InputFormat<LongWritable,Text> informat, JobConf job, 
+       protected final int readCSVFrameFromInputSplit( InputSplit split, 
InputFormat<LongWritable,Text> informat, JobConf job, 
                        FrameBlock dest, ValueType[] schema, String[] names, 
long rlen, long clen, int rl, boolean first)
                throws IOException
        {
@@ -184,6 +185,8 @@ public class FrameReaderTextCSV extends FrameReader
                finally {
                        IOUtilFunctions.closeSilently(reader);
                }
+               
+               return row;
        }
 
        protected Pair<Integer,Integer> computeCSVSize( Path path, JobConf job, 
FileSystem fs) 

Reply via email to