Repository: arrow Updated Branches: refs/heads/master 02c32ff93 -> 8bf61d168
ARROW-697: JAVA Throw exception for record batches > 2GB Add a test to verify that we throw a clear error message for record batches over 2GB. This entry point is easist to test without adding some magic bytes to the tests suite since its explicit on the input, and the other public entry points for deserialization have the same checks (just extracted from the metadata). Author: Holden Karau <[email protected]> Closes #597 from holdenk/ARROW-697-java-raise-exception-for-large-batch-size and squashes the following commits: d2d6b3d [Holden Karau] Merge branch 'master' into ARROW-697-java-raise-exception-for-large-batch-size d56daab [Holden Karau] Throw IOException if record batch length, node length, or null count are larger than Int.MAX_VALUE 0a96b74 [Holden Karau] Add a test to verify that we throw a clear error message for record batches over 2GB in size Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/8bf61d16 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/8bf61d16 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/8bf61d16 Branch: refs/heads/master Commit: 8bf61d1682b883a7a538678f7f3c68dc06bb758d Parents: 02c32ff Author: Holden Karau <[email protected]> Authored: Wed Apr 26 15:14:49 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Wed Apr 26 15:14:49 2017 -0400 ---------------------------------------------------------------------- .../arrow/vector/stream/MessageSerializer.java | 10 +++++++++- .../vector/stream/MessageSerializerTest.java | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/8bf61d16/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java b/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java index ec7e0f2..228ab61 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java @@ -201,12 +201,17 @@ public class MessageSerializer { // Deserializes a record batch given the Flatbuffer metadata and in-memory body private static ArrowRecordBatch deserializeRecordBatch(RecordBatch recordBatchFB, - ArrowBuf body) { + ArrowBuf body) throws IOException { // Now read the body int nodesLength = recordBatchFB.nodesLength(); List<ArrowFieldNode> nodes = new ArrayList<>(); for (int i = 0; i < nodesLength; ++i) { FieldNode node = recordBatchFB.nodes(i); + if ((int)node.length() != node.length() || + (int)node.nullCount() != node.nullCount()) { + throw new IOException("Cannot currently deserialize record batches with " + + "node length larger than Int.MAX_VALUE"); + } nodes.add(new ArrowFieldNode((int)node.length(), (int)node.nullCount())); } List<ArrowBuf> buffers = new ArrayList<>(); @@ -215,6 +220,9 @@ public class MessageSerializer { ArrowBuf vectorBuffer = body.slice((int)bufferFB.offset(), (int)bufferFB.length()); buffers.add(vectorBuffer); } + if ((int)recordBatchFB.length() != recordBatchFB.length()) { + throw new IOException("Cannot currently deserialize record batches over 2GB"); + } ArrowRecordBatch arrowRecordBatch = new ArrowRecordBatch((int)recordBatchFB.length(), nodes, buffers); body.release(); http://git-wip-us.apache.org/repos/asf/arrow/blob/8bf61d16/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java index d3d49d5..27879ef 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java @@ -31,6 +31,7 @@ import java.util.List; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.file.ArrowBlock; import org.apache.arrow.vector.file.ReadChannel; import org.apache.arrow.vector.file.WriteChannel; import org.apache.arrow.vector.schema.ArrowFieldNode; @@ -41,6 +42,8 @@ import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Test; +import org.junit.Rule; +import org.junit.rules.ExpectedException; import io.netty.buffer.ArrowBuf; @@ -87,6 +90,21 @@ public class MessageSerializerTest { assertEquals(schema, deserialized); } + @Rule + public ExpectedException expectedEx = ExpectedException.none(); + + @Test + public void testdeSerializeRecordBatchLongMetaData() throws IOException { + expectedEx.expect(IOException.class); + expectedEx.expectMessage("Cannot currently deserialize record batches over 2GB"); + int offset = 0; + int metadataLength = 1; + long bodyLength = Integer.MAX_VALUE + 10L; + ArrowBlock block = new ArrowBlock(offset, metadataLength, bodyLength); + long totalLen = block.getMetadataLength() + block.getBodyLength(); + MessageSerializer.deserializeRecordBatch(null, block, null); + } + @Test public void testSerializeRecordBatch() throws IOException { byte[] validity = new byte[] { (byte)255, 0};
