Repository: orc Updated Branches: refs/heads/branch-1.4 daefe685e -> e8c21fd38
ORC-285. Empty vector batches of floats or doubles get java.io.EOFException Fixes #205 Signed-off-by: Owen O'Malley <omal...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/orc/repo Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/9d3434fe Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/9d3434fe Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/9d3434fe Branch: refs/heads/branch-1.4 Commit: 9d3434fe841531483497bf9721d3fc00e958fc8a Parents: daefe68 Author: Owen O'Malley <omal...@apache.org> Authored: Wed Dec 27 09:13:50 2017 -0800 Committer: Owen O'Malley <omal...@apache.org> Committed: Tue Jan 23 15:13:55 2018 -0800 ---------------------------------------------------------------------- .../org/apache/orc/impl/TreeReaderFactory.java | 125 ++++++++++--------- .../test/org/apache/orc/TestVectorOrcFile.java | 35 ++++++ 2 files changed, 99 insertions(+), 61 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/orc/blob/9d3434fe/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java index 4b369af..9649be9 100644 --- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java @@ -640,40 +640,42 @@ public class TreeReaderFactory { final boolean hasNulls = !result.noNulls; boolean allNulls = hasNulls; - if (hasNulls) { - // conditions to ensure bounds checks skips - for (int i = 0; batchSize <= result.isNull.length && i < batchSize; i++) { - allNulls = allNulls & result.isNull[i]; - } - if (allNulls) { - result.vector[0] = Double.NaN; - result.isRepeating = true; - } else { - // some nulls - result.isRepeating = false; + if (batchSize > 0) { + if (hasNulls) { // conditions to ensure bounds checks skips - for (int i = 0; batchSize <= result.isNull.length - && batchSize <= result.vector.length && i < batchSize; i++) { - if (!result.isNull[i]) { - result.vector[i] = utils.readFloat(stream); - } else { - // If the value is not present then set NaN - result.vector[i] = Double.NaN; + for (int i = 0; batchSize <= result.isNull.length && i < batchSize; i++) { + allNulls = allNulls & result.isNull[i]; + } + if (allNulls) { + result.vector[0] = Double.NaN; + result.isRepeating = true; + } else { + // some nulls + result.isRepeating = false; + // conditions to ensure bounds checks skips + for (int i = 0; batchSize <= result.isNull.length + && batchSize <= result.vector.length && i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = utils.readFloat(stream); + } else { + // If the value is not present then set NaN + result.vector[i] = Double.NaN; + } } } + } else { + // no nulls & > 1 row (check repeating) + boolean repeating = (batchSize > 1); + final float f1 = utils.readFloat(stream); + result.vector[0] = f1; + // conditions to ensure bounds checks skips + for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { + final float f2 = utils.readFloat(stream); + repeating = repeating && (f1 == f2); + result.vector[i] = f2; + } + result.isRepeating = repeating; } - } else { - // no nulls & > 1 row (check repeating) - boolean repeating = (batchSize > 1); - final float f1 = utils.readFloat(stream); - result.vector[0] = f1; - // conditions to ensure bounds checks skips - for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { - final float f2 = utils.readFloat(stream); - repeating = repeating && (f1 == f2); - result.vector[i] = f2; - } - result.isRepeating = repeating; } } @@ -733,41 +735,42 @@ public class TreeReaderFactory { final boolean hasNulls = !result.noNulls; boolean allNulls = hasNulls; - - if (hasNulls) { - // conditions to ensure bounds checks skips - for (int i = 0; i < batchSize && batchSize <= result.isNull.length; i++) { - allNulls = allNulls & result.isNull[i]; - } - if (allNulls) { - result.vector[0] = Double.NaN; - result.isRepeating = true; - } else { - // some nulls - result.isRepeating = false; + if (batchSize != 0) { + if (hasNulls) { // conditions to ensure bounds checks skips - for (int i = 0; batchSize <= result.isNull.length - && batchSize <= result.vector.length && i < batchSize; i++) { - if (!result.isNull[i]) { - result.vector[i] = utils.readDouble(stream); - } else { - // If the value is not present then set NaN - result.vector[i] = Double.NaN; + for (int i = 0; i < batchSize && batchSize <= result.isNull.length; i++) { + allNulls = allNulls & result.isNull[i]; + } + if (allNulls) { + result.vector[0] = Double.NaN; + result.isRepeating = true; + } else { + // some nulls + result.isRepeating = false; + // conditions to ensure bounds checks skips + for (int i = 0; batchSize <= result.isNull.length + && batchSize <= result.vector.length && i < batchSize; i++) { + if (!result.isNull[i]) { + result.vector[i] = utils.readDouble(stream); + } else { + // If the value is not present then set NaN + result.vector[i] = Double.NaN; + } } } + } else { + // no nulls + boolean repeating = (batchSize > 1); + final double d1 = utils.readDouble(stream); + result.vector[0] = d1; + // conditions to ensure bounds checks skips + for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { + final double d2 = utils.readDouble(stream); + repeating = repeating && (d1 == d2); + result.vector[i] = d2; + } + result.isRepeating = repeating; } - } else { - // no nulls - boolean repeating = (batchSize > 1); - final double d1 = utils.readDouble(stream); - result.vector[0] = d1; - // conditions to ensure bounds checks skips - for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) { - final double d2 = utils.readDouble(stream); - repeating = repeating && (d1 == d2); - result.vector[i] = d2; - } - result.isRepeating = repeating; } } http://git-wip-us.apache.org/repos/asf/orc/blob/9d3434fe/java/core/src/test/org/apache/orc/TestVectorOrcFile.java ---------------------------------------------------------------------- diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java index bb4e3a9..4ca4a40 100644 --- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java +++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java @@ -3248,4 +3248,39 @@ public class TestVectorOrcFile { assertEquals(OrcProto.CompressionKind.NONE, ps.getCompression()); } } + + @Test + public void testEmptyDoubleStream() throws Exception { + TypeDescription schema = + TypeDescription.fromString("struct<list1:array<double>," + + "list2:array<float>>"); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf).setSchema(schema)); + VectorizedRowBatch batch = schema.createRowBatch(); + batch.size = 2; + ListColumnVector list1 = (ListColumnVector) batch.cols[0]; + ListColumnVector list2 = (ListColumnVector) batch.cols[1]; + for(int r=0; r < batch.size; ++r) { + list1.offsets[r] = 0; + list1.lengths[r] = 0; + list2.offsets[r] = 0; + list2.lengths[r] = 0; + } + writer.addRowBatch(batch); + writer.close(); + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf)); + RecordReader rows = reader.rows(); + batch = reader.getSchema().createRowBatch(); + assertTrue(rows.nextBatch(batch)); + assertEquals(2, batch.size); + list1 = (ListColumnVector) batch.cols[0]; + list2 = (ListColumnVector) batch.cols[1]; + for(int r=0; r < batch.size; ++r) { + assertEquals(0, list1.lengths[r]); + assertEquals(0, list2.lengths[r]); + } + assertFalse(rows.nextBatch(batch)); + rows.close(); + } }