Repository: arrow
Updated Branches:
  refs/heads/master 3b946b822 -> 33c731dbd


ARROW-398: Java file format requires bitmaps of all 1's to be written…

… when there are no nulls

Author: Julien Le Dem <jul...@dremio.com>

Closes #222 from julienledem/empty_buf and squashes the following commits:

c29da53 [Julien Le Dem] fix extraneous bits
4e87d88 [Julien Le Dem] ARROW-398: Java file format requires bitmaps of all 1's 
to be written when there are no nulls


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/33c731db
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/33c731db
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/33c731db

Branch: refs/heads/master
Commit: 33c731dbd69331b0d7ce0dc924791db4ca461009
Parents: 3b946b8
Author: Julien Le Dem <jul...@dremio.com>
Authored: Fri Dec 2 10:32:36 2016 -0500
Committer: Wes McKinney <wes.mckin...@twosigma.com>
Committed: Fri Dec 2 10:32:36 2016 -0500

----------------------------------------------------------------------
 .../codegen/templates/NullableValueVectors.java |  2 +-
 .../src/main/codegen/templates/UnionVector.java |  2 +-
 .../arrow/vector/BaseDataValueVector.java       |  7 +-
 .../java/org/apache/arrow/vector/BitVector.java | 36 ++++++++++
 .../org/apache/arrow/vector/BufferBacked.java   |  4 +-
 .../org/apache/arrow/vector/ValueVector.java    | 17 -----
 .../org/apache/arrow/vector/VectorLoader.java   |  1 -
 .../apache/arrow/vector/complex/ListVector.java |  2 +-
 .../arrow/vector/complex/NullableMapVector.java |  2 +-
 .../arrow/vector/TestVectorUnloadLoad.java      | 69 ++++++++++++++++++++
 .../vector/file/TestArrowReaderWriter.java      |  4 --
 11 files changed, 116 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/main/codegen/templates/NullableValueVectors.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java 
b/java/vector/src/main/codegen/templates/NullableValueVectors.java
index 48af7a2..716fedc 100644
--- a/java/vector/src/main/codegen/templates/NullableValueVectors.java
+++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java
@@ -144,7 +144,7 @@ public final class ${className} extends BaseDataValueVector 
implements <#if type
 
   @Override
   public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> 
ownBuffers) {
-    org.apache.arrow.vector.BaseDataValueVector.load(getFieldInnerVectors(), 
ownBuffers);
+    org.apache.arrow.vector.BaseDataValueVector.load(fieldNode, 
getFieldInnerVectors(), ownBuffers);
     bits.valueCount = fieldNode.getLength();
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/main/codegen/templates/UnionVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/UnionVector.java 
b/java/vector/src/main/codegen/templates/UnionVector.java
index 5ca3f90..9608b3c 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -105,7 +105,7 @@ public class UnionVector implements FieldVector {
 
   @Override
   public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> 
ownBuffers) {
-    BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers);
+    BaseDataValueVector.load(fieldNode, getFieldInnerVectors(), ownBuffers);
     this.valueCount = fieldNode.getLength();
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java
index c22258d..4c6d363 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.schema.ArrowFieldNode;
 
 import io.netty.buffer.ArrowBuf;
 
@@ -29,13 +30,13 @@ public abstract class BaseDataValueVector extends 
BaseValueVector implements Buf
 
   protected final static byte[] emptyByteArray = new byte[]{}; // Nullable 
vectors use this
 
-  public static void load(List<BufferBacked> vectors, List<ArrowBuf> buffers) {
+  public static void load(ArrowFieldNode fieldNode, List<BufferBacked> 
vectors, List<ArrowBuf> buffers) {
     int expectedSize = vectors.size();
     if (buffers.size() != expectedSize) {
       throw new IllegalArgumentException("Illegal buffer count, expected " + 
expectedSize + ", got: " + buffers.size());
     }
     for (int i = 0; i < expectedSize; i++) {
-      vectors.get(i).load(buffers.get(i));
+      vectors.get(i).load(fieldNode, buffers.get(i));
     }
   }
 
@@ -106,7 +107,7 @@ public abstract class BaseDataValueVector extends 
BaseValueVector implements Buf
   }
 
   @Override
-  public void load(ArrowBuf data) {
+  public void load(ArrowFieldNode fieldNode, ArrowBuf data) {
     this.data.release();
     this.data = data.retain(allocator);
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
index c12db50..7ce1236 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -22,6 +22,7 @@ import org.apache.arrow.memory.OutOfMemoryException;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.holders.BitHolder;
 import org.apache.arrow.vector.holders.NullableBitHolder;
+import org.apache.arrow.vector.schema.ArrowFieldNode;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.util.OversizedAllocationException;
@@ -49,6 +50,41 @@ public final class BitVector extends BaseDataValueVector 
implements FixedWidthVe
   }
 
   @Override
+  public void load(ArrowFieldNode fieldNode, ArrowBuf data) {
+    // When the vector is all nulls or all defined, the content of the buffer 
can be omitted
+    if (data.readableBytes() == 0 && fieldNode.getLength() != 0) {
+      data.release();
+      int count = fieldNode.getLength();
+      allocateNew(count);
+      int n = getSizeFromCount(count);
+      if (fieldNode.getNullCount() == 0) {
+        // all defined
+        // create an all 1s buffer
+        // set full bytes
+        int fullBytesCount = count / 8;
+        for (int i = 0; i < fullBytesCount; ++i) {
+          this.data.setByte(i, 0xFF);
+        }
+        int remainder = count % 8;
+        // set remaining bits
+        if (remainder > 0) {
+          byte bitMask = (byte) (0xFFL >>> ((8 - remainder) & 7));;
+          this.data.setByte(fullBytesCount, bitMask);
+        }
+      } else if (fieldNode.getNullCount() == fieldNode.getLength()) {
+        // all null
+        // create an all 0s buffer
+        zeroVector();
+      } else {
+        throw new IllegalArgumentException("The buffer can be empty only if 
there's no data or it's all null or all defined");
+      }
+      this.data.writerIndex(n);
+    } else {
+      super.load(fieldNode, data);
+    }
+  }
+
+  @Override
   public Field getField() {
     throw new UnsupportedOperationException("internal vector");
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java 
b/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java
index d1c262d..3c8b321 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java
@@ -17,6 +17,8 @@
  */
 package org.apache.arrow.vector;
 
+import org.apache.arrow.vector.schema.ArrowFieldNode;
+
 import io.netty.buffer.ArrowBuf;
 
 /**
@@ -24,7 +26,7 @@ import io.netty.buffer.ArrowBuf;
  */
 public interface BufferBacked {
 
-  void load(ArrowBuf data);
+  void load(ArrowFieldNode fieldNode, ArrowBuf data);
 
   ArrowBuf unLoad();
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
index ba7790e..5b24a41 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
@@ -131,13 +131,6 @@ public interface ValueVector extends Closeable, 
Iterable<ValueVector> {
   FieldReader getReader();
 
   /**
-   * Get the metadata for this field. Used in serialization
-   *
-   * @return FieldMetadata for this field.
-   */
-//  SerializedField getMetadata();
-
-  /**
    * Returns the number of bytes that is used by this vector instance.
    */
   int getBufferSize();
@@ -167,16 +160,6 @@ public interface ValueVector extends Closeable, 
Iterable<ValueVector> {
   ArrowBuf[] getBuffers(boolean clear);
 
   /**
-   * Load the data provided in the buffer. Typically used when deserializing 
from the wire.
-   *
-   * @param metadata
-   *          Metadata used to decode the incoming buffer.
-   * @param buffer
-   *          The buffer that contains the ValueVector.
-   */
-//  void load(SerializedField metadata, DrillBuf buffer);
-
-  /**
    * An abstraction that is used to read from this vector instance.
    */
   interface Accessor {

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java 
b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
index c5d642e..757f061 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
@@ -81,7 +81,6 @@ public class VectorLoader {
     try {
       vector.loadFieldBuffers(fieldNode, ownBuffers);
     } catch (RuntimeException e) {
-      e.printStackTrace();
       throw new IllegalArgumentException("Could not load buffers for field " +
               field + " error message" + e.getMessage(), e);
     }

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index dd99c73..e18f99f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -93,7 +93,7 @@ public class ListVector extends BaseRepeatedValueVector 
implements FieldVector {
 
   @Override
   public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> 
ownBuffers) {
-    BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers);
+    BaseDataValueVector.load(fieldNode, getFieldInnerVectors(), ownBuffers);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
index 8e1bbfa..f0ddf27 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
@@ -62,7 +62,7 @@ public class NullableMapVector extends MapVector implements 
FieldVector {
 
   @Override
   public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> 
ownBuffers) {
-    BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers);
+    BaseDataValueVector.load(fieldNode, getFieldInnerVectors(), ownBuffers);
     this.valueCount = fieldNode.getLength();
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
index 78f69ee..9dfe8d8 100644
--- 
a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
@@ -17,7 +17,13 @@
  */
 package org.apache.arrow.vector;
 
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import java.io.IOException;
+import java.util.Collections;
 import java.util.List;
 
 import org.apache.arrow.memory.BufferAllocator;
@@ -29,12 +35,17 @@ import 
org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
 import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
 import org.apache.arrow.vector.complex.writer.BigIntWriter;
 import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.schema.ArrowFieldNode;
 import org.apache.arrow.vector.schema.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.Test;
 
+import io.netty.buffer.ArrowBuf;
+
 public class TestVectorUnloadLoad {
 
   static final BufferAllocator allocator = new 
RootAllocator(Integer.MAX_VALUE);
@@ -88,6 +99,64 @@ public class TestVectorUnloadLoad {
     }
   }
 
+  /**
+   * The validity buffer can be empty if:
+   *  - all values are defined
+   *  - all values are null
+   * @throws IOException
+   */
+  @Test
+  public void testLoadEmptyValidityBuffer() throws IOException {
+    Schema schema = new Schema(asList(
+        new Field("intDefined", true, new ArrowType.Int(32, true), 
Collections.<Field>emptyList()),
+        new Field("intNull", true, new ArrowType.Int(32, true), 
Collections.<Field>emptyList())
+        ));
+    int count = 10;
+    ArrowBuf validity = allocator.getEmpty();
+    ArrowBuf values = allocator.buffer(count * 4); // integers
+    for (int i = 0; i < count; i++) {
+      values.setInt(i * 4, i);
+    }
+    try (
+        ArrowRecordBatch recordBatch = new ArrowRecordBatch(count, asList(new 
ArrowFieldNode(count, 0), new ArrowFieldNode(count, count)), asList(validity, 
values, validity, values));
+        BufferAllocator finalVectorsAllocator = 
allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+        VectorSchemaRoot newRoot = new VectorSchemaRoot(schema, 
finalVectorsAllocator);
+        ) {
+
+      // load it
+      VectorLoader vectorLoader = new VectorLoader(newRoot);
+
+      vectorLoader.load(recordBatch);
+
+      NullableIntVector intDefinedVector = 
(NullableIntVector)newRoot.getVector("intDefined");
+      NullableIntVector intNullVector = 
(NullableIntVector)newRoot.getVector("intNull");
+      for (int i = 0; i < count; i++) {
+        assertFalse("#" + i, intDefinedVector.getAccessor().isNull(i));
+        assertEquals("#" + i, i, intDefinedVector.getAccessor().get(i));
+        assertTrue("#" + i, intNullVector.getAccessor().isNull(i));
+      }
+      intDefinedVector.getMutator().setSafe(count + 10, 1234);
+      assertTrue(intDefinedVector.getAccessor().isNull(count + 1));
+      // empty slots should still default to unset
+      intDefinedVector.getMutator().setSafe(count + 1, 789);
+      assertFalse(intDefinedVector.getAccessor().isNull(count + 1));
+      assertEquals(789, intDefinedVector.getAccessor().get(count + 1));
+      assertTrue(intDefinedVector.getAccessor().isNull(count));
+      assertTrue(intDefinedVector.getAccessor().isNull(count + 2));
+      assertTrue(intDefinedVector.getAccessor().isNull(count + 3));
+      assertTrue(intDefinedVector.getAccessor().isNull(count + 4));
+      assertTrue(intDefinedVector.getAccessor().isNull(count + 5));
+      assertTrue(intDefinedVector.getAccessor().isNull(count + 6));
+      assertTrue(intDefinedVector.getAccessor().isNull(count + 7));
+      assertTrue(intDefinedVector.getAccessor().isNull(count + 8));
+      assertTrue(intDefinedVector.getAccessor().isNull(count + 9));
+      assertFalse(intDefinedVector.getAccessor().isNull(count + 10));
+      assertEquals(1234, intDefinedVector.getAccessor().get(count + 10));
+    } finally {
+      values.release();
+    }
+  }
+
   public static VectorUnloader newVectorUnloader(FieldVector root) {
     Schema schema = new Schema(root.getField().getChildren());
     int valueCount = root.getAccessor().getValueCount();

http://git-wip-us.apache.org/repos/asf/arrow/blob/33c731db/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java
 
b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java
index f90329a..8ed89fa 100644
--- 
a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowReaderWriter.java
@@ -30,10 +30,6 @@ import java.util.List;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
-import org.apache.arrow.vector.file.ArrowBlock;
-import org.apache.arrow.vector.file.ArrowFooter;
-import org.apache.arrow.vector.file.ArrowReader;
-import org.apache.arrow.vector.file.ArrowWriter;
 import org.apache.arrow.vector.schema.ArrowFieldNode;
 import org.apache.arrow.vector.schema.ArrowRecordBatch;
 import org.apache.arrow.vector.types.pojo.ArrowType;

Reply via email to