Repository: arrow Updated Branches: refs/heads/master 4956e90a7 -> 848a0f782
ARROW-1444: [JAVA] fix last byte copy in BitVector splitAndTransfer cc @jacques-n Author: siddharth <siddha...@dremio.com> Closes #1023 from siddharthteotia/ARROW-1444 and squashes the following commits: eb5f6578 [siddharth] ARROW-1444: fix last byte copy in BitVector splitAndTransfer Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/848a0f78 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/848a0f78 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/848a0f78 Branch: refs/heads/master Commit: 848a0f782dddac788d01eb8ea230957b8372e2dd Parents: 4956e90 Author: siddharth <siddha...@dremio.com> Authored: Sat Sep 2 10:57:11 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Sat Sep 2 10:57:11 2017 -0400 ---------------------------------------------------------------------- .../java/org/apache/arrow/vector/BitVector.java | 59 +++++-- .../org/apache/arrow/vector/TestBitVector.java | 163 +++++++++++++++---- 2 files changed, 171 insertions(+), 51 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/848a0f78/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java index 44001d4..8a60273 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java @@ -276,9 +276,11 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe public void splitAndTransferTo(int startIndex, int length, BitVector target) { assert startIndex + length <= valueCount; - int firstByte = getByteIndex(startIndex); - int byteSize = getSizeFromCount(length); + int firstByteSource = getByteIndex(startIndex); + int lastByteSource = getByteIndex(valueCount - 1); + int byteSizeTarget = getSizeFromCount(length); int offset = startIndex % 8; + if (length > 0) { if (offset == 0) { target.clear(); @@ -286,32 +288,59 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe if (target.data != null) { target.data.release(); } - target.data = data.slice(firstByte, byteSize); + target.data = data.slice(firstByteSource, byteSizeTarget); target.data.retain(1); - } else { + } + else { // Copy data // When the first bit starts from the middle of a byte (offset != 0), copy data from src BitVector. // Each byte in the target is composed by a part in i-th byte, another part in (i+1)-th byte. - // The last byte copied to target is a bit tricky : - // 1) if length requires partly byte (length % 8 !=0), copy the remaining bits only. - // 2) otherwise, copy the last byte in the same way as to the prior bytes. + target.clear(); - target.allocateNew(length); + target.allocateNew(byteSizeTarget * 8); + // TODO maybe do this one word at a time, rather than byte? - for (int i = 0; i < byteSize - 1; i++) { - target.data.setByte(i, (((this.data.getByte(firstByte + i) & 0xFF) >>> offset) + (this.data.getByte(firstByte + i + 1) << (8 - offset)))); + + for (int i = 0; i < byteSizeTarget - 1; i++) { + byte b1 = getBitsFromCurrentByte(this.data, firstByteSource + i, offset); + byte b2 = getBitsFromNextByte(this.data, firstByteSource + i + 1, offset); + + target.data.setByte(i, (b1 + b2)); } - if (length % 8 != 0) { - target.data.setByte(byteSize - 1, ((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset)); - } else { - target.data.setByte(byteSize - 1, - (((this.data.getByte(firstByte + byteSize - 1) & 0xFF) >>> offset) + (this.data.getByte(firstByte + byteSize) << (8 - offset)))); + + /* Copying the last piece is done in the following manner: + * if the source vector has 1 or more bytes remaining, we copy + * the last piece as a byte formed by shifting data + * from the current byte and the next byte. + * + * if the source vector has no more bytes remaining + * (we are at the last byte), we copy the last piece as a byte + * by shifting data from the current byte. + */ + if((firstByteSource + byteSizeTarget - 1) < lastByteSource) { + byte b1 = getBitsFromCurrentByte(this.data, firstByteSource + byteSizeTarget - 1, offset); + byte b2 = getBitsFromNextByte(this.data, firstByteSource + byteSizeTarget, offset); + + target.data.setByte(byteSizeTarget - 1, b1 + b2); + } + else { + byte b1 = getBitsFromCurrentByte(this.data, firstByteSource + byteSizeTarget - 1, offset); + + target.data.setByte(byteSizeTarget - 1, b1); } } } target.getMutator().setValueCount(length); } + private static byte getBitsFromCurrentByte(ArrowBuf data, int index, int offset) { + return (byte)((data.getByte(index) & 0xFF) >>> offset); + } + + private static byte getBitsFromNextByte(ArrowBuf data, int index, int offset) { + return (byte)((data.getByte(index) << (8 - offset))); + } + private class TransferImpl implements TransferPair { BitVector to; http://git-wip-us.apache.org/repos/asf/arrow/blob/848a0f78/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java index 1631660..82e61be 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java @@ -99,47 +99,138 @@ public class TestBitVector { } } - final TransferPair transferPair = sourceVector.getTransferPair(allocator); - final BitVector toVector = (BitVector) transferPair.getTo(); - final BitVector.Accessor toAccessor = toVector.getAccessor(); - final BitVector.Mutator toMutator = toVector.getMutator(); - - /* - * form test cases such that we cover: - * - * (1) the start index is exactly where a particular byte starts in the source bit vector - * (2) the start index is randomly positioned within a byte in the source bit vector - * (2.1) the length is a multiple of 8 - * (2.2) the length is not a multiple of 8 - */ - final int[][] transferLengths = {{0, 8}, /* (1) */ - {8, 10}, /* (1) */ - {18, 0}, /* zero length scenario */ - {18, 8}, /* (2.1) */ - {26, 0}, /* zero length scenario */ - {26, 14} /* (2.2) */ - }; - - for (final int[] transferLength : transferLengths) { - final int start = transferLength[0]; - final int length = transferLength[1]; - - transferPair.splitAndTransfer(start, length); - - /* check the toVector output after doing splitAndTransfer */ - for (int i = 0; i < length; i++) { - int result = toAccessor.get(i); - if ((i & 1) == 1) { - assertEquals(Integer.toString(1), Integer.toString(result)); - } else { - assertEquals(Integer.toString(0), Integer.toString(result)); + try (final BitVector toVector = new BitVector("toVector", allocator)) { + final TransferPair transferPair = sourceVector.makeTransferPair(toVector); + final BitVector.Accessor toAccessor = toVector.getAccessor(); + final BitVector.Mutator toMutator = toVector.getMutator(); + + /* + * form test cases such that we cover: + * + * (1) the start index is exactly where a particular byte starts in the source bit vector + * (2) the start index is randomly positioned within a byte in the source bit vector + * (2.1) the length is a multiple of 8 + * (2.2) the length is not a multiple of 8 + */ + final int[][] transferLengths = {{0, 8}, {8, 10}, {18, 0}, {18, 8}, {26, 0}, {26, 14}}; + + for (final int[] transferLength : transferLengths) { + final int start = transferLength[0]; + final int length = transferLength[1]; + + transferPair.splitAndTransfer(start, length); + + /* check the toVector output after doing splitAndTransfer */ + for (int i = 0; i < length; i++) { + int actual = toAccessor.get(i); + int expected = sourceAccessor.get(start + i); + assertEquals("different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i, + expected, actual); } } + } + } + } + + @Test + public void testSplitAndTransfer1() throws Exception { + + try (final BitVector sourceVector = new BitVector("bitvector", allocator)) { + final BitVector.Mutator sourceMutator = sourceVector.getMutator(); + final BitVector.Accessor sourceAccessor = sourceVector.getAccessor(); + + sourceVector.allocateNew(8190); + + /* populate the bitvector */ + for (int i = 0; i < 8190; i++) { + sourceMutator.set(i, 1); + } + + sourceMutator.setValueCount(8190); + + /* check the vector output */ + for (int i = 0; i < 8190; i++) { + int result = sourceAccessor.get(i); + assertEquals(Integer.toString(1), Integer.toString(result)); + } + + try (final BitVector toVector = new BitVector("toVector", allocator)) { + final TransferPair transferPair = sourceVector.makeTransferPair(toVector); + final BitVector.Accessor toAccessor = toVector.getAccessor(); + final BitVector.Mutator toMutator = toVector.getMutator(); + + final int[][] transferLengths = {{0, 4095}, {4095, 4095}}; + + for (final int[] transferLength : transferLengths) { + final int start = transferLength[0]; + final int length = transferLength[1]; + + transferPair.splitAndTransfer(start, length); + + /* check the toVector output after doing splitAndTransfer */ + for (int i = 0; i < length; i++) { + int actual = toAccessor.get(i); + int expected = sourceAccessor.get(start + i); + assertEquals("different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i, + expected, actual); + } + } + } + } + } + + @Test + public void testSplitAndTransfer2() throws Exception { + + try (final BitVector sourceVector = new BitVector("bitvector", allocator)) { + final BitVector.Mutator sourceMutator = sourceVector.getMutator(); + final BitVector.Accessor sourceAccessor = sourceVector.getAccessor(); + + sourceVector.allocateNew(32); + + /* populate the bitvector */ + for (int i = 0; i < 32; i++) { + if ((i & 1) == 1) { + sourceMutator.set(i, 1); + } else { + sourceMutator.set(i, 0); + } + } + + sourceMutator.setValueCount(32); - toVector.clear(); + /* check the vector output */ + for (int i = 0; i < 32; i++) { + int result = sourceAccessor.get(i); + if ((i & 1) == 1) { + assertEquals(Integer.toString(1), Integer.toString(result)); + } else { + assertEquals(Integer.toString(0), Integer.toString(result)); + } } - sourceVector.close(); + try (final BitVector toVector = new BitVector("toVector", allocator)) { + final TransferPair transferPair = sourceVector.makeTransferPair(toVector); + final BitVector.Accessor toAccessor = toVector.getAccessor(); + final BitVector.Mutator toMutator = toVector.getMutator(); + + final int[][] transferLengths = {{5,22}, {5,24}, {5,25}, {5,27}, {0,31}, {5,7}, {2,3}}; + + for (final int[] transferLength : transferLengths) { + final int start = transferLength[0]; + final int length = transferLength[1]; + + transferPair.splitAndTransfer(start, length); + + /* check the toVector output after doing splitAndTransfer */ + for (int i = 0; i < length; i++) { + int actual = toAccessor.get(i); + int expected = sourceAccessor.get(start + i); + assertEquals("different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i, + expected, actual); + } + } + } } }