Repository: arrow
Updated Branches:
  refs/heads/master 02c32ff93 -> 8bf61d168


ARROW-697: JAVA Throw exception for record batches > 2GB

Add a test to verify that we throw a clear error message for record batches 
over 2GB. This entry point is easist to test without adding some magic bytes to 
the tests suite since its explicit on the input, and the other public entry 
points for deserialization have the same checks (just extracted from the 
metadata).

Author: Holden Karau <[email protected]>

Closes #597 from holdenk/ARROW-697-java-raise-exception-for-large-batch-size 
and squashes the following commits:

d2d6b3d [Holden Karau] Merge branch 'master' into 
ARROW-697-java-raise-exception-for-large-batch-size
d56daab [Holden Karau] Throw IOException if record batch length, node length, 
or null count are larger than Int.MAX_VALUE
0a96b74 [Holden Karau] Add a test to verify that we throw a clear error message 
for record batches over 2GB in size


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/8bf61d16
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/8bf61d16
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/8bf61d16

Branch: refs/heads/master
Commit: 8bf61d1682b883a7a538678f7f3c68dc06bb758d
Parents: 02c32ff
Author: Holden Karau <[email protected]>
Authored: Wed Apr 26 15:14:49 2017 -0400
Committer: Wes McKinney <[email protected]>
Committed: Wed Apr 26 15:14:49 2017 -0400

----------------------------------------------------------------------
 .../arrow/vector/stream/MessageSerializer.java    | 10 +++++++++-
 .../vector/stream/MessageSerializerTest.java      | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/8bf61d16/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java
index ec7e0f2..228ab61 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java
@@ -201,12 +201,17 @@ public class MessageSerializer {
 
   // Deserializes a record batch given the Flatbuffer metadata and in-memory 
body
   private static ArrowRecordBatch deserializeRecordBatch(RecordBatch 
recordBatchFB,
-      ArrowBuf body) {
+      ArrowBuf body) throws IOException {
     // Now read the body
     int nodesLength = recordBatchFB.nodesLength();
     List<ArrowFieldNode> nodes = new ArrayList<>();
     for (int i = 0; i < nodesLength; ++i) {
       FieldNode node = recordBatchFB.nodes(i);
+      if ((int)node.length() != node.length() ||
+          (int)node.nullCount() != node.nullCount()) {
+        throw new IOException("Cannot currently deserialize record batches 
with " +
+                              "node length larger than Int.MAX_VALUE");
+      }
       nodes.add(new ArrowFieldNode((int)node.length(), (int)node.nullCount()));
     }
     List<ArrowBuf> buffers = new ArrayList<>();
@@ -215,6 +220,9 @@ public class MessageSerializer {
       ArrowBuf vectorBuffer = body.slice((int)bufferFB.offset(), 
(int)bufferFB.length());
       buffers.add(vectorBuffer);
     }
+    if ((int)recordBatchFB.length() != recordBatchFB.length()) {
+      throw new IOException("Cannot currently deserialize record batches over 
2GB");
+    }
     ArrowRecordBatch arrowRecordBatch =
         new ArrowRecordBatch((int)recordBatchFB.length(), nodes, buffers);
     body.release();

http://git-wip-us.apache.org/repos/asf/arrow/blob/8bf61d16/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java
 
b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java
index d3d49d5..27879ef 100644
--- 
a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java
@@ -31,6 +31,7 @@ import java.util.List;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.file.ArrowBlock;
 import org.apache.arrow.vector.file.ReadChannel;
 import org.apache.arrow.vector.file.WriteChannel;
 import org.apache.arrow.vector.schema.ArrowFieldNode;
@@ -41,6 +42,8 @@ import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.junit.Test;
+import org.junit.Rule;
+import org.junit.rules.ExpectedException;
 
 import io.netty.buffer.ArrowBuf;
 
@@ -87,6 +90,21 @@ public class MessageSerializerTest {
     assertEquals(schema, deserialized);
   }
 
+  @Rule
+  public ExpectedException expectedEx = ExpectedException.none();
+
+  @Test
+  public void testdeSerializeRecordBatchLongMetaData() throws IOException {
+    expectedEx.expect(IOException.class);
+    expectedEx.expectMessage("Cannot currently deserialize record batches over 
2GB");
+    int offset = 0;
+    int metadataLength = 1;
+    long bodyLength = Integer.MAX_VALUE + 10L;
+    ArrowBlock block = new ArrowBlock(offset, metadataLength, bodyLength);
+    long totalLen = block.getMetadataLength() + block.getBodyLength();
+    MessageSerializer.deserializeRecordBatch(null, block, null);
+  }
+
   @Test
   public void testSerializeRecordBatch() throws IOException {
     byte[] validity = new byte[] { (byte)255, 0};

Reply via email to