This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5c61263 ARROW-5881: [Java] Provide functionalities to efficiently
determine if a validity buffer has completely 1 bits/0 bits
5c61263 is described below
commit 5c612637948a682daf8d77704efca1285355156e
Author: liyafan82 <[email protected]>
AuthorDate: Fri Jul 12 11:41:42 2019 +0530
ARROW-5881: [Java] Provide functionalities to efficiently determine if a
validity buffer has completely 1 bits/0 bits
These utilities can be used to efficiently determine, for example,
If all values in a vector are null
If a vector contains no null
If a vector contains any valid element
If a vector contains any invalid element
Author: liyafan82 <[email protected]>
Closes #4829 from liyafan82/fly_0709_nullbit and squashes the following
commits:
1762951ae <liyafan82> Merge methods and change method name
0dc00450e <liyafan82> Do boundary check once at the beginning
c57cb6d35 <liyafan82> Provide benchmark for allBitsNull
3deedafdc <liyafan82> Provide functionalities to efficiently determine if
a validity buffer has completely 1 bits/0 bits
---
.../arrow/vector/BitVectorHelperBenchmarks.java | 15 ++++
.../org/apache/arrow/vector/BitVectorHelper.java | 72 ++++++++++++++++++
.../arrow/vector/complex/AbstractStructVector.java | 8 +-
.../apache/arrow/vector/TestBitVectorHelper.java | 86 ++++++++++++++++++++++
4 files changed, 175 insertions(+), 6 deletions(-)
diff --git
a/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java
b/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java
index bb48247..e7a0b20 100644
---
a/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java
+++
b/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java
@@ -50,6 +50,8 @@ public class BitVectorHelperBenchmarks {
private ArrowBuf validityBuffer;
+ private ArrowBuf oneBitValidityBuffer;
+
/**
* Setup benchmarks.
*/
@@ -65,6 +67,11 @@ public class BitVectorHelperBenchmarks {
BitVectorHelper.setValidityBit(validityBuffer, i, (byte) 0);
}
}
+
+ // only one 1 bit in the middle of the buffer
+ oneBitValidityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8);
+ oneBitValidityBuffer.setZero(0, VALIDITY_BUFFER_CAPACITY / 8);
+ BitVectorHelper.setValidityBit(oneBitValidityBuffer,
VALIDITY_BUFFER_CAPACITY / 2, (byte) 1);
}
/**
@@ -73,6 +80,7 @@ public class BitVectorHelperBenchmarks {
@TearDown
public void tearDown() {
validityBuffer.close();
+ oneBitValidityBuffer.close();
allocator.close();
}
@@ -83,6 +91,13 @@ public class BitVectorHelperBenchmarks {
return BitVectorHelper.getNullCount(validityBuffer,
VALIDITY_BUFFER_CAPACITY);
}
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public boolean allBitsNullBenchmark() {
+ return BitVectorHelper.checkAllBitsEqualTo(oneBitValidityBuffer,
VALIDITY_BUFFER_CAPACITY, true);
+ }
+
//@Test
public static void main(String [] args) throws RunnerException {
Options opt = new OptionsBuilder()
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
index 329330e..cd16f72 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
@@ -17,6 +17,10 @@
package org.apache.arrow.vector;
+import static io.netty.util.internal.PlatformDependent.getByte;
+import static io.netty.util.internal.PlatformDependent.getInt;
+import static io.netty.util.internal.PlatformDependent.getLong;
+
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.DataSizeRoundingUtil;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -179,6 +183,74 @@ public class BitVectorHelper {
return 8 * sizeInBytes - count;
}
+ /**
+ * Tests if all bits in a validity buffer are equal 0 or 1, according to the
specified parameter.
+ * @param validityBuffer the validity buffer.
+ * @param valueCount the bit count.
+ * @param checkOneBits if set to true, the method checks if all bits are
equal to 1;
+ * otherwise, it checks if all bits are equal to 0.
+ * @return true if all bits are 0 or 1 according to the parameter, and false
otherwise.
+ */
+ public static boolean checkAllBitsEqualTo(
+ final ArrowBuf validityBuffer, final int valueCount, final boolean
checkOneBits) {
+ if (valueCount == 0) {
+ return true;
+ }
+ final int sizeInBytes = getValidityBufferSize(valueCount);
+
+ // boundary check
+ validityBuffer.checkBytes(0, sizeInBytes);
+
+ // If value count is not a multiple of 8, then calculate number of used
bits in the last byte
+ final int remainder = valueCount % 8;
+ final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
+
+ // the integer number to compare against
+ final int intToCompare = checkOneBits ? -1 : 0;
+
+ int index = 0;
+ while (index + 8 <= fullBytesCount) {
+ long longValue = getLong(validityBuffer.memoryAddress() + index);
+ if (longValue != (long) intToCompare) {
+ return false;
+ }
+ index += 8;
+ }
+
+ while (index + 4 <= fullBytesCount) {
+ int intValue = getInt(validityBuffer.memoryAddress() + index);
+ if (intValue != intToCompare) {
+ return false;
+ }
+ index += 4;
+ }
+
+ while (index < fullBytesCount) {
+ byte byteValue = getByte(validityBuffer.memoryAddress() + index);
+ if (byteValue != (byte) intToCompare) {
+ return false;
+ }
+ index += 1;
+ }
+
+ // handling with the last bits
+ if (remainder != 0) {
+ byte byteValue = getByte(validityBuffer.memoryAddress() + sizeInBytes -
1);
+ byte mask = (byte) ((1 << remainder) - 1);
+ byteValue = (byte) (byteValue & mask);
+ if (checkOneBits) {
+ if ((mask & byteValue) != mask) {
+ return false;
+ }
+ } else {
+ if (byteValue != (byte) 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
/** Returns the byte at index from data right-shifted by offset. */
public static byte getBitsFromCurrentByte(final ArrowBuf data, final int
index, final int offset) {
return (byte) ((data.getByte(index) & 0xFF) >>> offset);
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
index bbd3ff2..ba837a2 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
@@ -25,6 +25,7 @@ import java.util.stream.Collectors;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BitVectorHelper;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.pojo.FieldType;
@@ -139,12 +140,7 @@ public abstract class AbstractStructVector extends
AbstractContainerVector {
}
private boolean nullFilled(ValueVector vector) {
- for (int r = 0; r < vector.getValueCount(); r++) {
- if (!vector.isNull(r)) {
- return false;
- }
- }
- return true;
+ return BitVectorHelper.checkAllBitsEqualTo(vector.getValidityBuffer(),
vector.getValueCount(), false);
}
/**
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
index f62371d..9d52427 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
@@ -18,12 +18,16 @@
package org.apache.arrow.vector;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.RootAllocator;
import org.junit.Test;
import io.netty.buffer.ArrowBuf;
import io.netty.buffer.PooledByteBufAllocatorL;
+import io.netty.util.internal.PlatformDependent;
public class TestBitVectorHelper {
@Test
@@ -63,4 +67,86 @@ public class TestBitVectorHelper {
count = BitVectorHelper.getNullCount(validityBuffer, 11);
assertEquals(count, 5);
}
+
+ @Test
+ public void testAllBitsNull() {
+ final int bufferLength = 32 * 1024;
+ try (RootAllocator allocator = new RootAllocator(bufferLength);
+ ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {
+
+ validityBuffer.setZero(0, bufferLength);
+ int bitLength = 1024;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, false));
+
+ bitLength = 1027;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1025;
+ BitVectorHelper.setValidityBit(validityBuffer, 12, 1);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1025;
+ BitVectorHelper.setValidityBit(validityBuffer, 1024, 1);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1026;
+ BitVectorHelper.setValidityBit(validityBuffer, 1024, 1);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1027;
+ BitVectorHelper.setValidityBit(validityBuffer, 1025, 1);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1031;
+ BitVectorHelper.setValidityBit(validityBuffer, 1029, 1);
+ BitVectorHelper.setValidityBit(validityBuffer, 1030, 1);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, false));
+ }
+ }
+
+ @Test
+ public void testAllBitsSet() {
+ final int bufferLength = 32 * 1024;
+ try (RootAllocator allocator = new RootAllocator(bufferLength);
+ ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(),
bufferLength, (byte) -1);
+ int bitLength = 1024;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, true));
+
+ bitLength = 1028;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(),
bufferLength, (byte) -1);
+ bitLength = 1025;
+ BitVectorHelper.setValidityBit(validityBuffer, 12, 0);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(),
bufferLength, (byte) -1);
+ bitLength = 1025;
+ BitVectorHelper.setValidityBit(validityBuffer, 1024, 0);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(),
bufferLength, (byte) -1);
+ bitLength = 1026;
+ BitVectorHelper.setValidityBit(validityBuffer, 1024, 0);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(),
bufferLength, (byte) -1);
+ bitLength = 1027;
+ BitVectorHelper.setValidityBit(validityBuffer, 1025, 0);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(),
bufferLength, (byte) -1);
+ bitLength = 1031;
+ BitVectorHelper.setValidityBit(validityBuffer, 1029, 0);
+ BitVectorHelper.setValidityBit(validityBuffer, 1030, 0);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer,
bitLength, true));
+ }
+ }
}