Repository: arrow Updated Branches: refs/heads/master 6b8abb440 -> 6f99156c3
ARROW-287: Make nullable vectors use a BitVecor instead of UInt1Vector for bits Author: Julien Le Dem <jul...@dremio.com> Closes #134 from julienledem/bits and squashes the following commits: d4e5084 [Julien Le Dem] add nullable vector test that verifies Bit based buffers 15fde9d [Julien Le Dem] ARROW-287: Make nullable vectors use a BitVecor instead of UInt1Vector for bits Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/6f99156c Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/6f99156c Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/6f99156c Branch: refs/heads/master Commit: 6f99156c3bb01329e33f74a57d9aaff1ed8304bc Parents: 6b8abb4 Author: Julien Le Dem <jul...@dremio.com> Authored: Tue Sep 13 11:13:54 2016 -0700 Committer: Julien Le Dem <jul...@dremio.com> Committed: Tue Sep 13 11:13:54 2016 -0700 ---------------------------------------------------------------------- .../codegen/templates/NullableValueVectors.java | 6 +- .../java/org/apache/arrow/vector/BitVector.java | 16 ++++- .../apache/arrow/vector/complex/ListVector.java | 6 +- .../arrow/vector/complex/NullableMapVector.java | 8 +-- .../apache/arrow/vector/TestValueVector.java | 67 ++++++++++++++++++-- 5 files changed, 87 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/6f99156c/java/vector/src/main/codegen/templates/NullableValueVectors.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index bb2c001..486cfee 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -53,7 +53,7 @@ public final class ${className} extends BaseDataValueVector implements <#if type private final String valuesField = "$values$"; private final Field field; - final UInt1Vector bits = new UInt1Vector(bitsField, allocator); + final BitVector bits = new BitVector(bitsField, allocator); final ${valuesName} values; private final Mutator mutator; @@ -446,7 +446,7 @@ public final class ${className} extends BaseDataValueVector implements <#if type } public final class Accessor extends BaseDataValueVector.BaseAccessor <#if type.major = "VarLen">implements VariableWidthVector.VariableWidthAccessor</#if> { - final UInt1Vector.Accessor bAccessor = bits.getAccessor(); + final BitVector.Accessor bAccessor = bits.getAccessor(); final ${valuesName}.Accessor vAccessor = values.getAccessor(); /** @@ -545,7 +545,7 @@ public final class ${className} extends BaseDataValueVector implements <#if type public void set(int index, <#if type.major == "VarLen">byte[]<#elseif (type.width < 4)>int<#else>${minor.javaType!type.javaType}</#if> value) { setCount++; final ${valuesName}.Mutator valuesMutator = values.getMutator(); - final UInt1Vector.Mutator bitsMutator = bits.getMutator(); + final BitVector.Mutator bitsMutator = bits.getMutator(); <#if type.major == "VarLen"> for (int i = lastSet + 1; i < index; i++) { valuesMutator.set(i, emptyByteArray); http://git-wip-us.apache.org/repos/asf/arrow/blob/6f99156c/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java index fee6e9c..c12db50 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java @@ -17,8 +17,6 @@ */ package org.apache.arrow.vector; -import io.netty.buffer.ArrowBuf; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -29,6 +27,8 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.arrow.vector.util.TransferPair; +import io.netty.buffer.ArrowBuf; + /** * Bit implements a vector of bit-width values. Elements in the vector are accessed by position from the logical start * of the vector. The width of each element is 1 bit. The equivalent Java primitive is an int containing the value '0' @@ -435,6 +435,18 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe setValueCount(values); } + public void generateTestDataAlt(int size) { + setValueCount(size); + boolean even = true; + final int valueCount = getAccessor().getValueCount(); + for(int i = 0; i < valueCount; i++, even = !even) { + if(even){ + set(i, (byte) 1); + }else{ + set(i, (byte) 0); + } + } + } } @Override http://git-wip-us.apache.org/repos/asf/arrow/blob/6f99156c/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 2984c36..dd99c73 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -28,9 +28,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.BaseDataValueVector; +import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; @@ -55,7 +55,7 @@ import io.netty.buffer.ArrowBuf; public class ListVector extends BaseRepeatedValueVector implements FieldVector { final UInt4Vector offsets; - final UInt1Vector bits; + final BitVector bits; private final List<BufferBacked> innerVectors; private Mutator mutator = new Mutator(); private Accessor accessor = new Accessor(); @@ -65,7 +65,7 @@ public class ListVector extends BaseRepeatedValueVector implements FieldVector { public ListVector(String name, BufferAllocator allocator, CallBack callBack) { super(name, allocator); - this.bits = new UInt1Vector("$bits$", allocator); + this.bits = new BitVector("$bits$", allocator); this.offsets = getOffsetVector(); this.innerVectors = Collections.unmodifiableList(Arrays.<BufferBacked>asList(bits, offsets)); this.writer = new UnionListWriter(this); http://git-wip-us.apache.org/repos/asf/arrow/blob/6f99156c/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java index 6b257c0..8e1bbfa 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java @@ -25,10 +25,10 @@ import java.util.List; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.BaseDataValueVector; +import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BufferBacked; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullableVectorDefinitionSetter; -import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.impl.NullableMapReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -45,7 +45,7 @@ public class NullableMapVector extends MapVector implements FieldVector { private final NullableMapReaderImpl reader = new NullableMapReaderImpl(this); - protected final UInt1Vector bits; + protected final BitVector bits; private final List<BufferBacked> innerVectors; @@ -54,7 +54,7 @@ public class NullableMapVector extends MapVector implements FieldVector { public NullableMapVector(String name, BufferAllocator allocator, CallBack callBack) { super(name, checkNotNull(allocator), callBack); - this.bits = new UInt1Vector("$bits$", allocator); + this.bits = new BitVector("$bits$", allocator); this.innerVectors = Collections.unmodifiableList(Arrays.<BufferBacked>asList(bits)); this.accessor = new Accessor(); this.mutator = new Mutator(); @@ -186,7 +186,7 @@ public class NullableMapVector extends MapVector implements FieldVector { return success; } public final class Accessor extends MapVector.Accessor { - final UInt1Vector.Accessor bAccessor = bits.getAccessor(); + final BitVector.Accessor bAccessor = bits.getAccessor(); @Override public Object getObject(int index) { http://git-wip-us.apache.org/repos/asf/arrow/blob/6f99156c/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 21cdc4f..124452e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -17,17 +17,23 @@ */ package org.apache.arrow.vector; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.nio.charset.Charset; +import java.util.List; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.schema.TypeLayout; import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.types.pojo.Field; import org.junit.After; import org.junit.Before; import org.junit.Test; -import java.nio.charset.Charset; - -import static org.junit.Assert.*; +import io.netty.buffer.ArrowBuf; public class TestValueVector { @@ -224,6 +230,59 @@ public class TestValueVector { } @Test + public void testNullableInt() { + // Create a new value vector for 1024 integers + try (final NullableIntVector vector = (NullableIntVector) MinorType.INT.getNewVector(EMPTY_SCHEMA_PATH, allocator, null)) { + final NullableIntVector.Mutator m = vector.getMutator(); + vector.allocateNew(1024); + + // Put and set a few values. + m.set(0, 1); + m.set(1, 2); + m.set(100, 3); + m.set(1022, 4); + m.set(1023, 5); + + m.setValueCount(1024); + + final NullableIntVector.Accessor accessor = vector.getAccessor(); + assertEquals(1, accessor.get(0)); + assertEquals(2, accessor.get(1)); + assertEquals(3, accessor.get(100)); + assertEquals(4, accessor.get(1022)); + assertEquals(5, accessor.get(1023)); + + // Ensure null values. + assertTrue(vector.getAccessor().isNull(3)); + + Field field = vector.getField(); + TypeLayout typeLayout = field.getTypeLayout(); + + List<ArrowBuf> buffers = vector.getFieldBuffers(); + + assertEquals(2, typeLayout.getVectors().size()); + assertEquals(2, buffers.size()); + + ArrowBuf validityVectorBuf = buffers.get(0); + assertEquals(128, validityVectorBuf.readableBytes()); + assertEquals(3, validityVectorBuf.getByte(0)); // 1st and second bit defined + for (int i = 1; i < 12; i++) { + assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined until 100 + } + assertEquals(16, validityVectorBuf.getByte(12)); // 100th bit is defined (12 * 8 + 4) + for (int i = 13; i < 127; i++) { + assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined between 100th and 1022nd + } + assertEquals(-64, validityVectorBuf.getByte(127)); // 1022nd and 1023rd bit defined + + vector.allocateNew(2048); + // vector has been erased + assertTrue(vector.getAccessor().isNull(0)); + } + } + + + @Test public void testBitVector() { // Create a new value vector for 1024 integers try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {