This is an automated email from the ASF dual-hosted git repository. ravindra pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 5c61263 ARROW-5881: [Java] Provide functionalities to efficiently determine if a validity buffer has completely 1 bits/0 bits 5c61263 is described below commit 5c612637948a682daf8d77704efca1285355156e Author: liyafan82 <fan_li...@foxmail.com> AuthorDate: Fri Jul 12 11:41:42 2019 +0530 ARROW-5881: [Java] Provide functionalities to efficiently determine if a validity buffer has completely 1 bits/0 bits These utilities can be used to efficiently determine, for example, If all values in a vector are null If a vector contains no null If a vector contains any valid element If a vector contains any invalid element Author: liyafan82 <fan_li...@foxmail.com> Closes #4829 from liyafan82/fly_0709_nullbit and squashes the following commits: 1762951ae <liyafan82> Merge methods and change method name 0dc00450e <liyafan82> Do boundary check once at the beginning c57cb6d35 <liyafan82> Provide benchmark for allBitsNull 3deedafdc <liyafan82> Provide functionalities to efficiently determine if a validity buffer has completely 1 bits/0 bits --- .../arrow/vector/BitVectorHelperBenchmarks.java | 15 ++++ .../org/apache/arrow/vector/BitVectorHelper.java | 72 ++++++++++++++++++ .../arrow/vector/complex/AbstractStructVector.java | 8 +- .../apache/arrow/vector/TestBitVectorHelper.java | 86 ++++++++++++++++++++++ 4 files changed, 175 insertions(+), 6 deletions(-) diff --git a/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java index bb48247..e7a0b20 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java +++ b/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java @@ -50,6 +50,8 @@ public class BitVectorHelperBenchmarks { private ArrowBuf validityBuffer; + private ArrowBuf oneBitValidityBuffer; + /** * Setup benchmarks. */ @@ -65,6 +67,11 @@ public class BitVectorHelperBenchmarks { BitVectorHelper.setValidityBit(validityBuffer, i, (byte) 0); } } + + // only one 1 bit in the middle of the buffer + oneBitValidityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8); + oneBitValidityBuffer.setZero(0, VALIDITY_BUFFER_CAPACITY / 8); + BitVectorHelper.setValidityBit(oneBitValidityBuffer, VALIDITY_BUFFER_CAPACITY / 2, (byte) 1); } /** @@ -73,6 +80,7 @@ public class BitVectorHelperBenchmarks { @TearDown public void tearDown() { validityBuffer.close(); + oneBitValidityBuffer.close(); allocator.close(); } @@ -83,6 +91,13 @@ public class BitVectorHelperBenchmarks { return BitVectorHelper.getNullCount(validityBuffer, VALIDITY_BUFFER_CAPACITY); } + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public boolean allBitsNullBenchmark() { + return BitVectorHelper.checkAllBitsEqualTo(oneBitValidityBuffer, VALIDITY_BUFFER_CAPACITY, true); + } + //@Test public static void main(String [] args) throws RunnerException { Options opt = new OptionsBuilder() diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java index 329330e..cd16f72 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java @@ -17,6 +17,10 @@ package org.apache.arrow.vector; +import static io.netty.util.internal.PlatformDependent.getByte; +import static io.netty.util.internal.PlatformDependent.getInt; +import static io.netty.util.internal.PlatformDependent.getLong; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.DataSizeRoundingUtil; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; @@ -179,6 +183,74 @@ public class BitVectorHelper { return 8 * sizeInBytes - count; } + /** + * Tests if all bits in a validity buffer are equal 0 or 1, according to the specified parameter. + * @param validityBuffer the validity buffer. + * @param valueCount the bit count. + * @param checkOneBits if set to true, the method checks if all bits are equal to 1; + * otherwise, it checks if all bits are equal to 0. + * @return true if all bits are 0 or 1 according to the parameter, and false otherwise. + */ + public static boolean checkAllBitsEqualTo( + final ArrowBuf validityBuffer, final int valueCount, final boolean checkOneBits) { + if (valueCount == 0) { + return true; + } + final int sizeInBytes = getValidityBufferSize(valueCount); + + // boundary check + validityBuffer.checkBytes(0, sizeInBytes); + + // If value count is not a multiple of 8, then calculate number of used bits in the last byte + final int remainder = valueCount % 8; + final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1; + + // the integer number to compare against + final int intToCompare = checkOneBits ? -1 : 0; + + int index = 0; + while (index + 8 <= fullBytesCount) { + long longValue = getLong(validityBuffer.memoryAddress() + index); + if (longValue != (long) intToCompare) { + return false; + } + index += 8; + } + + while (index + 4 <= fullBytesCount) { + int intValue = getInt(validityBuffer.memoryAddress() + index); + if (intValue != intToCompare) { + return false; + } + index += 4; + } + + while (index < fullBytesCount) { + byte byteValue = getByte(validityBuffer.memoryAddress() + index); + if (byteValue != (byte) intToCompare) { + return false; + } + index += 1; + } + + // handling with the last bits + if (remainder != 0) { + byte byteValue = getByte(validityBuffer.memoryAddress() + sizeInBytes - 1); + byte mask = (byte) ((1 << remainder) - 1); + byteValue = (byte) (byteValue & mask); + if (checkOneBits) { + if ((mask & byteValue) != mask) { + return false; + } + } else { + if (byteValue != (byte) 0) { + return false; + } + } + } + return true; + } + /** Returns the byte at index from data right-shifted by offset. */ public static byte getBitsFromCurrentByte(final ArrowBuf data, final int index, final int offset) { return (byte) ((data.getByte(index) & 0xFF) >>> offset); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java index bbd3ff2..ba837a2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java @@ -25,6 +25,7 @@ import java.util.stream.Collectors; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.pojo.FieldType; @@ -139,12 +140,7 @@ public abstract class AbstractStructVector extends AbstractContainerVector { } private boolean nullFilled(ValueVector vector) { - for (int r = 0; r < vector.getValueCount(); r++) { - if (!vector.isNull(r)) { - return false; - } - } - return true; + return BitVectorHelper.checkAllBitsEqualTo(vector.getValidityBuffer(), vector.getValueCount(), false); } /** diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java index f62371d..9d52427 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java @@ -18,12 +18,16 @@ package org.apache.arrow.vector; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; import org.apache.arrow.memory.ReferenceManager; +import org.apache.arrow.memory.RootAllocator; import org.junit.Test; import io.netty.buffer.ArrowBuf; import io.netty.buffer.PooledByteBufAllocatorL; +import io.netty.util.internal.PlatformDependent; public class TestBitVectorHelper { @Test @@ -63,4 +67,86 @@ public class TestBitVectorHelper { count = BitVectorHelper.getNullCount(validityBuffer, 11); assertEquals(count, 5); } + + @Test + public void testAllBitsNull() { + final int bufferLength = 32 * 1024; + try (RootAllocator allocator = new RootAllocator(bufferLength); + ArrowBuf validityBuffer = allocator.buffer(bufferLength)) { + + validityBuffer.setZero(0, bufferLength); + int bitLength = 1024; + assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); + + bitLength = 1027; + assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); + + validityBuffer.setZero(0, bufferLength); + bitLength = 1025; + BitVectorHelper.setValidityBit(validityBuffer, 12, 1); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); + + validityBuffer.setZero(0, bufferLength); + bitLength = 1025; + BitVectorHelper.setValidityBit(validityBuffer, 1024, 1); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); + + validityBuffer.setZero(0, bufferLength); + bitLength = 1026; + BitVectorHelper.setValidityBit(validityBuffer, 1024, 1); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); + + validityBuffer.setZero(0, bufferLength); + bitLength = 1027; + BitVectorHelper.setValidityBit(validityBuffer, 1025, 1); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); + + validityBuffer.setZero(0, bufferLength); + bitLength = 1031; + BitVectorHelper.setValidityBit(validityBuffer, 1029, 1); + BitVectorHelper.setValidityBit(validityBuffer, 1030, 1); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); + } + } + + @Test + public void testAllBitsSet() { + final int bufferLength = 32 * 1024; + try (RootAllocator allocator = new RootAllocator(bufferLength); + ArrowBuf validityBuffer = allocator.buffer(bufferLength)) { + + PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); + int bitLength = 1024; + assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); + + bitLength = 1028; + assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); + + PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); + bitLength = 1025; + BitVectorHelper.setValidityBit(validityBuffer, 12, 0); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); + + PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); + bitLength = 1025; + BitVectorHelper.setValidityBit(validityBuffer, 1024, 0); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); + + PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); + bitLength = 1026; + BitVectorHelper.setValidityBit(validityBuffer, 1024, 0); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); + + PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); + bitLength = 1027; + BitVectorHelper.setValidityBit(validityBuffer, 1025, 0); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); + + PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); + bitLength = 1031; + BitVectorHelper.setValidityBit(validityBuffer, 1029, 0); + BitVectorHelper.setValidityBit(validityBuffer, 1030, 0); + assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); + } + } }