lidavidm commented on code in PR #41861:
URL: https://github.com/apache/arrow/pull/41861#discussion_r1619608574


##########
java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java:
##########
@@ -826,7 +839,120 @@ public void transferTo(BaseVariableWidthViewVector 
target) {
    */
   public void splitAndTransferTo(int startIndex, int length,
                                  BaseVariableWidthViewVector target) {
-    throw new UnsupportedOperationException("splitAndTransferTo function not 
supported!");
+    Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + 
length <= valueCount,
+        "Invalid parameters startIndex: %s, length: %s for valueCount: %s", 
startIndex, length, valueCount);
+    compareTypes(target, "splitAndTransferTo");
+    target.clear();
+    if (length > 0) {
+      splitAndTransferValidityBuffer(startIndex, length, target);
+      splitAndTransferViewBuffer(startIndex, length, target);
+      splitAndTransferDataBuffers(startIndex, length, target);
+      target.setLastSet(length - 1);
+      target.setValueCount(length);
+    }
+  }
+
+  /* allocate validity buffer */
+  private void allocateValidityBuffer(final long size) {
+    final int curSize = (int) size;
+    validityBuffer = allocator.buffer(curSize);
+    validityBuffer.readerIndex(0);
+    initValidityBuffer();
+  }
+
+  /*
+   * Transfer the validity.
+   */
+  private void splitAndTransferValidityBuffer(int startIndex, int length,
+      BaseVariableWidthViewVector target) {
+    if (length <= 0) {
+      return;
+    }
+
+    final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+    final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+    final int byteSizeTarget = getValidityBufferSizeFromCount(length);
+    final int offset = startIndex % 8;
+
+    if (offset == 0) {
+      // slice
+      if (target.validityBuffer != null) {
+        target.validityBuffer.getReferenceManager().release();
+      }
+      final ArrowBuf slicedValidityBuffer = 
validityBuffer.slice(firstByteSource, byteSizeTarget);
+      target.validityBuffer = transferBuffer(slicedValidityBuffer, 
target.allocator);
+      return;
+    }
+
+    /* Copy data
+     * When the first bit starts from the middle of a byte (offset != 0),
+     * copy data from src BitVector.
+     * Each byte in the target is composed by a part in i-th byte,
+     * another part in (i+1)-th byte.
+     */
+    target.allocateValidityBuffer(byteSizeTarget);
+
+    for (int i = 0; i < byteSizeTarget - 1; i++) {
+      byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, 
firstByteSource + i, offset);
+      byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, 
firstByteSource + i + 1, offset);
+
+      target.validityBuffer.setByte(i, (b1 + b2));
+    }
+    /* Copying the last piece is done in the following manner:
+     * if the source vector has 1 or more bytes remaining, we copy
+     * the last piece as a byte formed by shifting data
+     * from the current byte and the next byte.
+     *
+     * if the source vector has no more bytes remaining
+     * (we are at the last byte), we copy the last piece as a byte
+     * by shifting data from the current byte.
+     */
+    if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+      byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+          firstByteSource + byteSizeTarget - 1, offset);
+      byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer,
+          firstByteSource + byteSizeTarget, offset);
+
+      target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+    } else {
+      byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+          firstByteSource + byteSizeTarget - 1, offset);
+      target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+    }
+  }
+
+  private void splitAndTransferViewBuffer(int startIndex, int length,
+      BaseVariableWidthViewVector target) {
+    final int startingByte = startIndex * ELEMENT_SIZE;
+    final int lengthInBytes = length * ELEMENT_SIZE;
+
+    if (length == 0) {
+      return;
+    }
+
+    if (target.viewBuffer != null) {
+      target.viewBuffer.getReferenceManager().release();
+    }
+
+    final ArrowBuf slicedViewBuffer = viewBuffer.slice(startingByte, 
lengthInBytes);
+    target.viewBuffer = transferBuffer(slicedViewBuffer, target.allocator);
+  }
+
+  private void splitAndTransferDataBuffers(int startIndex, int length,
+      BaseVariableWidthViewVector target) {
+    for (int i = startIndex; i < startIndex + length; i++) {
+      final int stringLength = getValueLength(i);
+      if (stringLength > INLINE_SIZE) {
+        final int bufIndex = viewBuffer.getInt(((long) i * ELEMENT_SIZE) +
+            LENGTH_WIDTH + PREFIX_WIDTH);
+        final int bufOffset = viewBuffer.getInt(((long) i * ELEMENT_SIZE) +
+            LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH);
+        final ArrowBuf dataBuf = dataBuffers.get(bufIndex);
+        final ArrowBuf slicedDataBuffer = dataBuf.slice(bufOffset, 
stringLength);
+        ArrowBuf currentDataBuf = 
target.allocateOrGetLastDataBuffer(stringLength);
+        currentDataBuf.setBytes(currentDataBuf.writerIndex(), 
slicedDataBuffer, 0, stringLength);
+      }

Review Comment:
   How does this work when we're not rewriting the view buffers up above?



##########
java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java:
##########
@@ -209,6 +225,39 @@ public void test() throws Exception {
     }
   }
 
+  @Test /* ViewVarCharVector */
+  public void testView() throws Exception {

Review Comment:
   Instead of the comment, just rename the test?



##########
java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java:
##########
@@ -826,7 +839,120 @@ public void transferTo(BaseVariableWidthViewVector 
target) {
    */
   public void splitAndTransferTo(int startIndex, int length,
                                  BaseVariableWidthViewVector target) {
-    throw new UnsupportedOperationException("splitAndTransferTo function not 
supported!");
+    Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + 
length <= valueCount,
+        "Invalid parameters startIndex: %s, length: %s for valueCount: %s", 
startIndex, length, valueCount);
+    compareTypes(target, "splitAndTransferTo");
+    target.clear();
+    if (length > 0) {
+      splitAndTransferValidityBuffer(startIndex, length, target);
+      splitAndTransferViewBuffer(startIndex, length, target);
+      splitAndTransferDataBuffers(startIndex, length, target);
+      target.setLastSet(length - 1);
+      target.setValueCount(length);
+    }
+  }
+
+  /* allocate validity buffer */
+  private void allocateValidityBuffer(final long size) {
+    final int curSize = (int) size;
+    validityBuffer = allocator.buffer(curSize);
+    validityBuffer.readerIndex(0);
+    initValidityBuffer();
+  }
+
+  /*
+   * Transfer the validity.
+   */
+  private void splitAndTransferValidityBuffer(int startIndex, int length,
+      BaseVariableWidthViewVector target) {
+    if (length <= 0) {
+      return;
+    }
+
+    final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+    final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+    final int byteSizeTarget = getValidityBufferSizeFromCount(length);
+    final int offset = startIndex % 8;
+
+    if (offset == 0) {
+      // slice
+      if (target.validityBuffer != null) {
+        target.validityBuffer.getReferenceManager().release();
+      }
+      final ArrowBuf slicedValidityBuffer = 
validityBuffer.slice(firstByteSource, byteSizeTarget);
+      target.validityBuffer = transferBuffer(slicedValidityBuffer, 
target.allocator);
+      return;
+    }
+
+    /* Copy data

Review Comment:
   There's not an existing helper for this? I don't think every vector should 
have its own copy of this code...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to