Repository: orc
Updated Branches:
  refs/heads/branch-1.4 daefe685e -> e8c21fd38


ORC-285. Empty vector batches of floats or doubles get java.io.EOFException

Fixes #205

Signed-off-by: Owen O'Malley <omal...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/9d3434fe
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/9d3434fe
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/9d3434fe

Branch: refs/heads/branch-1.4
Commit: 9d3434fe841531483497bf9721d3fc00e958fc8a
Parents: daefe68
Author: Owen O'Malley <omal...@apache.org>
Authored: Wed Dec 27 09:13:50 2017 -0800
Committer: Owen O'Malley <omal...@apache.org>
Committed: Tue Jan 23 15:13:55 2018 -0800

----------------------------------------------------------------------
 .../org/apache/orc/impl/TreeReaderFactory.java  | 125 ++++++++++---------
 .../test/org/apache/orc/TestVectorOrcFile.java  |  35 ++++++
 2 files changed, 99 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/9d3434fe/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java 
b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 4b369af..9649be9 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -640,40 +640,42 @@ public class TreeReaderFactory {
       final boolean hasNulls = !result.noNulls;
       boolean allNulls = hasNulls;
 
-      if (hasNulls) {
-        // conditions to ensure bounds checks skips
-        for (int i = 0; batchSize <= result.isNull.length && i < batchSize; 
i++) {
-          allNulls = allNulls & result.isNull[i];
-        }
-        if (allNulls) {
-          result.vector[0] = Double.NaN;
-          result.isRepeating = true;
-        } else {
-          // some nulls
-          result.isRepeating = false;
+      if (batchSize > 0) {
+        if (hasNulls) {
           // conditions to ensure bounds checks skips
-          for (int i = 0; batchSize <= result.isNull.length
-              && batchSize <= result.vector.length && i < batchSize; i++) {
-            if (!result.isNull[i]) {
-              result.vector[i] = utils.readFloat(stream);
-            } else {
-              // If the value is not present then set NaN
-              result.vector[i] = Double.NaN;
+          for (int i = 0; batchSize <= result.isNull.length && i < batchSize; 
i++) {
+            allNulls = allNulls & result.isNull[i];
+          }
+          if (allNulls) {
+            result.vector[0] = Double.NaN;
+            result.isRepeating = true;
+          } else {
+            // some nulls
+            result.isRepeating = false;
+            // conditions to ensure bounds checks skips
+            for (int i = 0; batchSize <= result.isNull.length
+                && batchSize <= result.vector.length && i < batchSize; i++) {
+              if (!result.isNull[i]) {
+                result.vector[i] = utils.readFloat(stream);
+              } else {
+                // If the value is not present then set NaN
+                result.vector[i] = Double.NaN;
+              }
             }
           }
+        } else {
+          // no nulls & > 1 row (check repeating)
+          boolean repeating = (batchSize > 1);
+          final float f1 = utils.readFloat(stream);
+          result.vector[0] = f1;
+          // conditions to ensure bounds checks skips
+          for (int i = 1; i < batchSize && batchSize <= result.vector.length; 
i++) {
+            final float f2 = utils.readFloat(stream);
+            repeating = repeating && (f1 == f2);
+            result.vector[i] = f2;
+          }
+          result.isRepeating = repeating;
         }
-      } else {
-        // no nulls & > 1 row (check repeating)
-        boolean repeating = (batchSize > 1);
-        final float f1 = utils.readFloat(stream);
-        result.vector[0] = f1;
-        // conditions to ensure bounds checks skips
-        for (int i = 1; i < batchSize && batchSize <= result.vector.length; 
i++) {
-          final float f2 = utils.readFloat(stream);
-          repeating = repeating && (f1 == f2);
-          result.vector[i] = f2;
-        }
-        result.isRepeating = repeating;
       }
     }
 
@@ -733,41 +735,42 @@ public class TreeReaderFactory {
 
       final boolean hasNulls = !result.noNulls;
       boolean allNulls = hasNulls;
-
-      if (hasNulls) {
-        // conditions to ensure bounds checks skips
-        for (int i = 0; i < batchSize && batchSize <= result.isNull.length; 
i++) {
-          allNulls = allNulls & result.isNull[i];
-        }
-        if (allNulls) {
-          result.vector[0] = Double.NaN;
-          result.isRepeating = true;
-        } else {
-          // some nulls
-          result.isRepeating = false;
+      if (batchSize != 0) {
+        if (hasNulls) {
           // conditions to ensure bounds checks skips
-          for (int i = 0; batchSize <= result.isNull.length
-              && batchSize <= result.vector.length && i < batchSize; i++) {
-            if (!result.isNull[i]) {
-              result.vector[i] = utils.readDouble(stream);
-            } else {
-              // If the value is not present then set NaN
-              result.vector[i] = Double.NaN;
+          for (int i = 0; i < batchSize && batchSize <= result.isNull.length; 
i++) {
+            allNulls = allNulls & result.isNull[i];
+          }
+          if (allNulls) {
+            result.vector[0] = Double.NaN;
+            result.isRepeating = true;
+          } else {
+            // some nulls
+            result.isRepeating = false;
+            // conditions to ensure bounds checks skips
+            for (int i = 0; batchSize <= result.isNull.length
+                && batchSize <= result.vector.length && i < batchSize; i++) {
+              if (!result.isNull[i]) {
+                result.vector[i] = utils.readDouble(stream);
+              } else {
+                // If the value is not present then set NaN
+                result.vector[i] = Double.NaN;
+              }
             }
           }
+        } else {
+          // no nulls
+          boolean repeating = (batchSize > 1);
+          final double d1 = utils.readDouble(stream);
+          result.vector[0] = d1;
+          // conditions to ensure bounds checks skips
+          for (int i = 1; i < batchSize && batchSize <= result.vector.length; 
i++) {
+            final double d2 = utils.readDouble(stream);
+            repeating = repeating && (d1 == d2);
+            result.vector[i] = d2;
+          }
+          result.isRepeating = repeating;
         }
-      } else {
-        // no nulls
-        boolean repeating = (batchSize > 1);
-        final double d1 = utils.readDouble(stream);
-        result.vector[0] = d1;
-        // conditions to ensure bounds checks skips
-        for (int i = 1; i < batchSize && batchSize <= result.vector.length; 
i++) {
-          final double d2 = utils.readDouble(stream);
-          repeating = repeating && (d1 == d2);
-          result.vector[i] = d2;
-        }
-        result.isRepeating = repeating;
       }
     }
 

http://git-wip-us.apache.org/repos/asf/orc/blob/9d3434fe/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java 
b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index bb4e3a9..4ca4a40 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -3248,4 +3248,39 @@ public class TestVectorOrcFile {
       assertEquals(OrcProto.CompressionKind.NONE, ps.getCompression());
     }
   }
+
+  @Test
+  public void testEmptyDoubleStream() throws Exception {
+    TypeDescription schema =
+        TypeDescription.fromString("struct<list1:array<double>," +
+            "list2:array<float>>");
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 2;
+    ListColumnVector list1 = (ListColumnVector) batch.cols[0];
+    ListColumnVector list2 = (ListColumnVector) batch.cols[1];
+    for(int r=0; r < batch.size; ++r) {
+      list1.offsets[r] = 0;
+      list1.lengths[r] = 0;
+      list2.offsets[r] = 0;
+      list2.lengths[r] = 0;
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    assertTrue(rows.nextBatch(batch));
+    assertEquals(2, batch.size);
+    list1 = (ListColumnVector) batch.cols[0];
+    list2 = (ListColumnVector) batch.cols[1];
+    for(int r=0; r < batch.size; ++r) {
+      assertEquals(0, list1.lengths[r]);
+      assertEquals(0, list2.lengths[r]);
+    }
+    assertFalse(rows.nextBatch(batch));
+    rows.close();
+  }
 }

Reply via email to