Repository: systemml Updated Branches: refs/heads/master 152eba1a7 -> 36effc54d
[SYSTEMML-1749] Fix result correctness single-threaded csv frame reader This patch fixes the single-threaded csv frame reader, which produced incorrect results if the used record reader returns multiple splits. The underlying problem was that the contents of each read split were written starting at position 0. We now correctly maintain the current row position across splits. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/9e87f00a Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/9e87f00a Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/9e87f00a Branch: refs/heads/master Commit: 9e87f00ac94e6d368e968cf8ac183d1afa02d65b Parents: 152eba1 Author: Matthias Boehm <[email protected]> Authored: Thu Jul 6 20:58:00 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Fri Jul 7 11:25:30 2017 -0700 ---------------------------------------------------------------------- .../org/apache/sysml/runtime/io/FrameReaderTextCSV.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/9e87f00a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java b/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java index 9e10f2c..76da0de 100644 --- a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java +++ b/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCSV.java @@ -113,11 +113,12 @@ public class FrameReaderTextCSV extends FrameReader informat.configure(job); InputSplit[] splits = informat.getSplits(job, 1); splits = IOUtilFunctions.sortInputSplits(splits); - for( int i=0; i<splits.length; i++ ) - readCSVFrameFromInputSplit(splits[i], informat, job, dest, schema, names, rlen, clen, 0, i==0); + for( int i=0, rpos=0; i<splits.length; i++ ) + rpos = readCSVFrameFromInputSplit(splits[i], informat, + job, dest, schema, names, rlen, clen, rpos, i==0); } - protected final void readCSVFrameFromInputSplit( InputSplit split, InputFormat<LongWritable,Text> informat, JobConf job, + protected final int readCSVFrameFromInputSplit( InputSplit split, InputFormat<LongWritable,Text> informat, JobConf job, FrameBlock dest, ValueType[] schema, String[] names, long rlen, long clen, int rl, boolean first) throws IOException { @@ -184,6 +185,8 @@ public class FrameReaderTextCSV extends FrameReader finally { IOUtilFunctions.closeSilently(reader); } + + return row; } protected Pair<Integer,Integer> computeCSVSize( Path path, JobConf job, FileSystem fs)
