vibhatha commented on code in PR #40340:
URL: https://github.com/apache/arrow/pull/40340#discussion_r1565605267


##########
java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java:
##########
@@ -0,0 +1,1434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static 
org.apache.arrow.vector.util.DataSizeRoundingUtil.roundUpToMultipleOf16;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BaseVariableWidthViewVector is a base class providing functionality for 
strings/bytes types in view format.
+ *
+ */
+public abstract class BaseVariableWidthViewVector extends BaseValueVector 
implements VariableWidthFieldVector {
+  // A single element of a view comprises 16 bytes
+  protected static final int ELEMENT_SIZE = 16;
+  public static final int INITIAL_VIEW_VALUE_ALLOCATION = 4096;
+  private static final int INITIAL_BYTE_COUNT = INITIAL_VIEW_VALUE_ALLOCATION 
* ELEMENT_SIZE;
+  private static final int MAX_BUFFER_SIZE = (int) 
Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE);
+  private int lastValueCapacity;
+  private long lastValueAllocationSizeInBytes;
+
+  /*
+  * Variable Width View Vector comprises the following format
+  *
+  * Short strings, length <= 12
+  * | Bytes 0-3  | Bytes 4-15                            |
+  * |------------|---------------------------------------|
+  * | length     | data (padded with 0)                  |
+  * |------------|---------------------------------------|
+  *
+  * Long strings, length > 12
+  * | Bytes 0-3  | Bytes 4-7  | Bytes 8-11 | Bytes 12-15 |
+  * |------------|------------|------------|-------------|
+  * | length     | prefix     | buf.index  | offset      |
+  * |------------|------------|------------|-------------|
+  *
+  * */
+  // 12 byte unsigned int to track inline views
+  protected static final int INLINE_SIZE = 12;
+  // The first 4 bytes of view are allocated for length
+  protected static final int LENGTH_WIDTH = 4;
+  // The second 4 bytes of view are allocated for prefix width
+  protected static final int PREFIX_WIDTH = 4;
+  // The third 4 bytes of view are allocated for buffer index
+  protected static final int BUF_INDEX_WIDTH = 4;
+  protected static final byte[] EMPTY_BYTE_ARRAY = new byte[]{};
+  protected ArrowBuf validityBuffer;
+  // The view buffer is used to store the variable width view elements
+  protected ArrowBuf viewBuffer;
+  // The external buffer which stores the long strings
+  protected List<ArrowBuf> dataBuffers;
+  protected int initialDataBufferSize;
+  protected int valueCount;
+  protected int lastSet;
+  protected final Field field;
+
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param field The field materialized by this vector
+   * @param allocator The allocator to use for creating/resizing buffers
+   */
+  public BaseVariableWidthViewVector(Field field, final BufferAllocator 
allocator) {
+    super(allocator);
+    this.field = field;
+    lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
+    lastValueCapacity = INITIAL_VIEW_VALUE_ALLOCATION;
+    valueCount = 0;
+    lastSet = -1;
+    validityBuffer = allocator.getEmpty();
+    viewBuffer = allocator.getEmpty();
+    dataBuffers = new ArrayList<>();
+  }
+
+  @Override
+  public String getName() {
+    return field.getName();
+  }
+
+  /* TODO:
+   * see if getNullCount() can be made faster -- O(1)
+   */
+
+  /* TODO:
+   * Once the entire hierarchy has been refactored, move common functions
+   * like getNullCount(), splitAndTransferValidityBuffer to top level
+   * base class BaseValueVector.
+   *
+   * Along with this, some class members (validityBuffer) can also be
+   * abstracted out to top level base class.
+   *
+   * Right now BaseValueVector is the top level base class for other
+   * vector types in ValueVector hierarchy (non-nullable) and those
+   * vectors have not yet been refactored/removed so moving things to
+   * the top class as of now is not a good idea.
+   */
+
+  /* TODO:
+   * Implement TransferPair functionality
+   * https://github.com/apache/arrow/issues/40932
+   *
+   */
+
+  /**
+   * Get buffer that manages the validity (NULL or NON-NULL nature) of
+   * elements in the vector. Consider it as a buffer for internal bit vector
+   * data structure.
+   *
+   * @return buffer
+   */
+  @Override
+  public ArrowBuf getValidityBuffer() {
+    return validityBuffer;
+  }
+
+  /**
+   * Get the buffer that stores the data for elements in the vector.
+   *
+   * @return buffer
+   */
+  @Override
+  public ArrowBuf getDataBuffer() {
+    return viewBuffer;
+  }
+
+  /**
+   * BaseVariableWidthViewVector doesn't support offset buffer.
+   *
+   * @return throws UnsupportedOperationException
+   */
+  @Override
+  public ArrowBuf getOffsetBuffer() {
+    throw new UnsupportedOperationException("Offset buffer is not supported in 
BaseVariableWidthViewVector");
+  }
+
+  /**
+   * BaseVariableWidthViewVector doesn't support offset buffer.
+   *
+   * @return throws UnsupportedOperationException
+   */
+  @Override
+  public long getOffsetBufferAddress() {
+    throw new UnsupportedOperationException("Offset buffer is not supported in 
BaseVariableWidthViewVector");
+  }
+
+  /**
+   * Get the memory address of buffer that manages the validity
+   * (NULL or NON-NULL nature) of elements in the vector.
+   *
+   * @return starting address of the buffer
+   */
+  @Override
+  public long getValidityBufferAddress() {
+    return validityBuffer.memoryAddress();
+  }
+
+  /**
+   * Get the memory address of buffer that stores the data for elements
+   * in the vector.
+   *
+   * @return starting address of the buffer
+   */
+  @Override
+  public long getDataBufferAddress() {
+    return viewBuffer.memoryAddress();
+  }
+
+  /**
+   * Sets the desired value capacity for the vector. This function doesn't
+   * allocate any memory for the vector.
+   *
+   * @param valueCount desired number of elements in the vector
+   */
+  @Override
+  public void setInitialCapacity(int valueCount) {
+    final long size = (long) valueCount * ELEMENT_SIZE;
+    checkDataBufferSize(size);
+    lastValueAllocationSizeInBytes = (int) size;
+    lastValueCapacity = valueCount;
+  }
+
+  /**
+   * Sets the desired value capacity for the vector. This function doesn't
+   * allocate any memory for the vector.
+   *
+   * @param valueCount desired number of elements in the vector
+   * @param density average number of bytes per variable width view element
+   */
+  @Override
+  public void setInitialCapacity(int valueCount, double density) {
+    final long size = (long) valueCount * ELEMENT_SIZE;
+    initialDataBufferSize = (int) (valueCount * density);
+    checkDataBufferSize(size);
+    lastValueAllocationSizeInBytes = (int) size;
+    lastValueCapacity = valueCount;
+  }
+
+  /**
+   * Get the density of this ListVector.
+   * @return density
+   */
+  public double getDensity() {
+    if (valueCount == 0) {
+      return 0.0D;
+    }
+    final double totalListSize = getTotalLengthUptoIndex(valueCount);
+    return totalListSize / valueCount;
+  }
+
+  /**
+   * Get the current capacity which does not exceed either validity buffer or 
value buffer.
+   * Note: Here the `getValueCapacity` has a relationship with the value 
buffer.
+   *
+   * @return number of elements that vector can hold.
+   */
+  @Override
+  public int getValueCapacity() {
+    final int validityCapacity = getValidityBufferValueCapacity();
+    final int valueBufferCapacity = Math.max(capAtMaxInt(viewBuffer.capacity() 
/ ELEMENT_SIZE), 0);
+    return Math.min(valueBufferCapacity, validityCapacity);
+  }
+
+  private int getValidityBufferValueCapacity() {
+    return capAtMaxInt(validityBuffer.capacity() * 8);
+  }
+
+  /**
+   * zero out the vector and the data in associated buffers.
+   */
+  public void zeroVector() {
+    initValidityBuffer();
+    viewBuffer.setZero(0, viewBuffer.capacity());
+    clearDataBuffers();
+  }
+
+  /* zero out the validity buffer */
+  private void initValidityBuffer() {
+    validityBuffer.setZero(0, validityBuffer.capacity());
+  }
+
+  /**
+   * Reset the vector to initial state. Same as {@link #zeroVector()}.
+   * Note that this method doesn't release any memory.
+   */
+  @Override
+  public void reset() {
+    zeroVector();
+    lastSet = -1;
+    valueCount = 0;
+  }
+
+  /**
+   * Close the vector and release the associated buffers.
+   */
+  @Override
+  public void close() {
+    clear();
+  }
+
+  /**
+   * Same as {@link #close()}.
+   */
+  @Override
+  public void clear() {
+    validityBuffer = releaseBuffer(validityBuffer);
+    viewBuffer = releaseBuffer(viewBuffer);
+    clearDataBuffers();
+    lastSet = -1;
+    valueCount = 0;
+  }
+
+  /**
+  * Release the data buffers and clear the list.
+  */
+  public void clearDataBuffers() {
+    for (ArrowBuf buffer : dataBuffers) {
+      releaseBuffer(buffer);
+    }
+    dataBuffers.clear();
+  }
+
+  /**
+   * Get the inner vectors.
+   *
+   * @deprecated This API will be removed as the current implementations no 
longer support inner vectors.
+   *
+   * @return the inner vectors for this field as defined by the TypeLayout
+   */
+  @Deprecated
+  @Override
+  public List<BufferBacked> getFieldInnerVectors() {
+    throw new UnsupportedOperationException("There are no inner vectors. Use 
getFieldBuffers");
+  }
+
+  /**
+   * Initialize the children in schema for this Field. This operation is a
+   * NO-OP for scalar types since they don't have any children.
+   * @param children the schema
+   * @throws IllegalArgumentException if children is a non-empty list for 
scalar types.
+   */
+  @Override
+  public void initializeChildrenFromFields(List<Field> children) {
+    if (!children.isEmpty()) {
+      throw new IllegalArgumentException("primitive type vector cannot have 
children");
+    }
+  }
+
+  /**
+   * Get the inner child vectors.
+   * @return list of child vectors for complex types, empty list for scalar 
vector types
+   */
+  @Override
+  public List<FieldVector> getChildrenFromFields() {
+    return Collections.emptyList();
+  }
+
+
+  /**
+   * Load the buffers of this vector with provided source buffers.
+   * The caller manages the source buffers and populates them before invoking
+   * this method.
+   * @param fieldNode  the fieldNode indicating the value count
+   * @param ownBuffers the buffers for this Field (own buffers only, children 
not included)
+   */
+  @Override
+  public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> 
ownBuffers) {
+    // TODO: https://github.com/apache/arrow/issues/40931
+    throw new UnsupportedOperationException("loadFieldBuffers is not supported 
for BaseVariableWidthViewVector");
+  }
+
+  /**
+   * Get the buffers belonging to this vector.
+   * @return the inner buffers.
+   */
+  @Override
+  public List<ArrowBuf> getFieldBuffers() {
+    // before flight/IPC, we must bring the vector to a consistent state.
+    // this is because, it is possible that the offset buffers of some 
trailing values
+    // are not updated. this may cause some data in the data buffer being lost.
+    // for details, please see TestValueVector#testUnloadVariableWidthVector.
+    fillHoles(valueCount);
+
+    List<ArrowBuf> result = new ArrayList<>(2 + dataBuffers.size());
+    setReaderAndWriterIndex();
+    result.add(validityBuffer);
+    result.add(viewBuffer);
+    // append data buffers
+    result.addAll(dataBuffers);
+
+    return result;
+  }
+
+  /**
+   * Set the reader and writer indexes for the inner buffers.
+   */
+  private void setReaderAndWriterIndex() {
+    validityBuffer.readerIndex(0);
+    viewBuffer.readerIndex(0);
+    if (valueCount == 0) {
+      validityBuffer.writerIndex(0);
+      viewBuffer.writerIndex(0);
+    } else {
+      validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+      viewBuffer.writerIndex(valueCount * ELEMENT_SIZE);
+    }
+  }
+
+  /**
+   * Same as {@link #allocateNewSafe()}.
+   */
+  @Override
+  public void allocateNew() {
+    allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+  }
+
+  /**
+   * Allocate memory for the vector. We internally use a default value count
+   * of 4096 to allocate memory for at least these many elements in the
+   * vector. See {@link #allocateNew(long, int)} for allocating memory for 
specific
+   * number of elements in the vector.
+   *
+   * @return false if memory allocation fails, true otherwise.
+   */
+  @Override
+  public boolean allocateNewSafe() {
+    try {
+      allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+      return true;
+    } catch (Exception e) {
+      return false;
+    }
+  }
+
+  /**
+   * Allocate memory for the vector to support storing at least the provided 
number of
+   * elements in the vector. This method must be called prior to using the 
ValueVector.
+   *
+   * @param totalBytes desired total memory capacity
+   * @param valueCount the desired number of elements in the vector
+   * @throws org.apache.arrow.memory.OutOfMemoryException if memory allocation 
fails
+   */
+  @Override
+  public void allocateNew(long totalBytes, int valueCount) {
+    assert totalBytes >= 0;
+
+    checkDataBufferSize(totalBytes);
+
+    /* we are doing a new allocation -- release the current buffers */
+    clear();
+
+    try {
+      allocateBytes(totalBytes, valueCount);
+    } catch (Exception e) {
+      clear();
+      throw e;
+    }
+  }
+
+  @Override
+  public void allocateNew(int valueCount) {
+    allocateNew(lastValueAllocationSizeInBytes, valueCount);
+  }
+
+  /* Check if the data buffer size is within bounds. */
+  private void checkDataBufferSize(long size) {
+    if (size > MAX_BUFFER_SIZE || size < 0) {
+      throw new OversizedAllocationException("Memory required for vector " +
+          "is (" + size + "), which is overflow or more than max allowed (" + 
MAX_BUFFER_SIZE + "). " +
+          "You could consider using LargeVarCharVector/LargeVarBinaryVector 
for large strings/large bytes types");
+    }
+  }
+
+  /* allocate the inner buffers */
+  private void allocateBytes(final long valueBufferSize, final int valueCount) 
{
+    /* allocate data buffer */
+    viewBuffer = allocator.buffer(valueBufferSize);
+    viewBuffer.readerIndex(0);
+
+    validityBuffer = allocator.buffer((valueCount + 7) / 8);
+    initValidityBuffer();
+
+    lastValueCapacity = getValueCapacity();
+    lastValueAllocationSizeInBytes = capAtMaxInt(viewBuffer.capacity());
+  }
+
+  /**
+   * Resize the vector to increase the capacity. The internal behavior is to
+   * double the current value capacity.
+   */
+  @Override
+  public void reAlloc() {
+    reallocViewBuffer();
+    reallocViewDataBuffer();
+    reallocValidityBuffer();
+  }
+
+  /**
+   * Reallocate the view buffer. View Buffer stores the views for
+   * VIEWVARCHAR or VIEWVARBINARY elements in the vector. The behavior is to 
double
+   * the size of buffer.
+   * @throws OversizedAllocationException if the desired new size is more than
+   *                                      max allowed
+   * @throws OutOfMemoryException if the internal memory allocation fails
+   */
+  public void reallocViewBuffer() {
+    long currentViewBufferCapacity = viewBuffer.capacity();
+
+    long newAllocationSize = currentViewBufferCapacity * 2;
+    if (newAllocationSize == 0) {
+      if (lastValueAllocationSizeInBytes > 0) {
+        newAllocationSize = lastValueAllocationSizeInBytes;
+      } else {
+        newAllocationSize = INITIAL_BYTE_COUNT * 2L;
+      }
+    }
+
+    reallocViewBuffer(newAllocationSize);
+  }
+
+  /**
+   * Reallocate the data buffer associated with view buffer.
+   */
+  public void reallocViewDataBuffer() {
+    long currentDataBufferCapacity = 0;
+    if (!dataBuffers.isEmpty()) {
+      currentDataBufferCapacity = dataBuffers.get(dataBuffers.size() - 
1).capacity();
+    }
+
+    long newAllocationSize = currentDataBufferCapacity * 2;
+    if (newAllocationSize == 0) {
+      if (lastValueAllocationSizeInBytes > 0) {
+        newAllocationSize = lastValueAllocationSizeInBytes;
+      } else {
+        newAllocationSize = INITIAL_BYTE_COUNT * 2L;
+      }
+    }
+
+    reallocViewDataBuffer(newAllocationSize);
+  }
+
+  /**
+   * Reallocate the view buffer to given size. View Buffer stores the views for
+   * VIEWVARCHAR or VIEWVARBINARY elements in the vector. The actual allocated 
size may be larger
+   * than the request one because it will round up the provided value to the 
nearest
+   * power of two.
+   *
+   * @param desiredAllocSize the desired new allocation size
+   * @throws OversizedAllocationException if the desired new size is more than
+   *                                      max allowed
+   * @throws OutOfMemoryException if the internal memory allocation fails
+   */
+  public void reallocViewBuffer(long desiredAllocSize) {
+    if (desiredAllocSize == 0) {
+      return;
+    }
+    long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
+    assert newAllocationSize >= 1;
+
+    checkDataBufferSize(newAllocationSize);
+    // for each set operation, we have to allocate 16 bytes
+    // here we are adjusting the desired allocation-based allocation size
+    // to align with the 16bytes requirement.
+    newAllocationSize = roundUpToMultipleOf16(newAllocationSize);
+
+    final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+    newBuf.setBytes(0, viewBuffer, 0, viewBuffer.capacity());
+
+    viewBuffer.getReferenceManager().release();
+    viewBuffer = newBuf;
+    lastValueAllocationSizeInBytes = viewBuffer.capacity();
+  }
+
+  /**
+   * Reallocate the data buffer for views.
+   *
+   * @param desiredAllocSize allocation size in bytes
+   */
+  public void reallocViewDataBuffer(long desiredAllocSize) {
+    if (desiredAllocSize == 0) {
+      return;
+    }
+
+    if (dataBuffers.isEmpty()) {
+      return;
+    }
+
+    ArrowBuf currentBuf = dataBuffers.get(dataBuffers.size() - 1);
+    if (currentBuf.capacity() - currentBuf.writerIndex() >= desiredAllocSize) {
+      return;
+    }
+
+    final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
+    assert newAllocationSize >= 1;
+
+    checkDataBufferSize(newAllocationSize);
+
+    final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+    dataBuffers.add(newBuf);
+  }
+
+  /**
+  *  Reallocate Validity buffer.
+  */
+  public void reallocValidityBuffer() {
+    int targetValidityCount = capAtMaxInt((validityBuffer.capacity() * 8) * 2);
+    if (targetValidityCount == 0) {
+      if (lastValueCapacity > 0) {
+        targetValidityCount = lastValueCapacity;
+      } else {
+        targetValidityCount = 2 * INITIAL_VALUE_ALLOCATION;
+      }
+    }
+
+    long validityBufferSize = computeValidityBufferSize(targetValidityCount);
+
+    final ArrowBuf newValidityBuffer = allocator.buffer(validityBufferSize);
+    newValidityBuffer.setBytes(0, validityBuffer, 0, 
validityBuffer.capacity());
+    newValidityBuffer.setZero(validityBuffer.capacity(), 
newValidityBuffer.capacity() - validityBuffer.capacity());
+    validityBuffer.getReferenceManager().release();
+    validityBuffer = newValidityBuffer;
+
+    lastValueCapacity = getValueCapacity();
+  }
+
+  private long computeValidityBufferSize(int valueCount) {
+    return (valueCount + 7) / 8;
+  }
+
+  /**
+   * Get the size (number of bytes) of underlying view buffer.
+   * @return number of bytes in the view buffer
+   */
+  @Override
+  public int getByteCapacity() {
+    return capAtMaxInt(viewBuffer.capacity());
+  }
+
+  @Override
+  public int sizeOfValueBuffer() {
+    if (valueCount == 0) {
+      return 0;
+    }
+    int totalLength = 0;
+    for (int i = 0; i < valueCount; i++) {
+      totalLength += getLength(i);
+    }
+    return totalLength;
+  }
+
+  /**
+   * Get the size (number of bytes) of underlying buffers used by this
+   * vector.
+   * @return size of underlying buffers.
+   */
+  @Override
+  public int getBufferSize() {
+    return getBufferSizeFor(this.valueCount);
+  }
+
+  /**
+   * Get the potential buffer size for a particular number of records.
+   * @param valueCount desired number of elements in the vector
+   * @return estimated size of underlying buffers if the vector holds
+   *         a given number of elements
+   */
+  @Override
+  public int getBufferSizeFor(final int valueCount) {
+    if (valueCount == 0) {
+      return 0;
+    }
+
+    final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+    final int viewBufferSize = valueCount * ELEMENT_SIZE;
+    final int dataBufferSize = getDataBufferSize();
+    return validityBufferSize + viewBufferSize + dataBufferSize;
+  }
+
+  private int getDataBufferSize() {
+    int dataBufferSize = 0;
+    for (ArrowBuf buf : dataBuffers) {
+      dataBufferSize += (int) buf.writerIndex();
+    }
+    return dataBufferSize;
+  }
+
+  /**
+   * Get information about how this field is materialized.
+   * @return the field corresponding to this vector
+   */
+  @Override
+  public Field getField() {
+    return field;
+  }
+
+  /**
+   * Return the underlying buffers associated with this vector. Note that this 
doesn't
+   * impact the reference counts for this buffer, so it only should be used 
for in-context
+   * access. Also note that this buffer changes regularly, thus
+   * external classes shouldn't hold a reference to it (unless they change it).
+   * <p>
+   * Note: This method only returns validityBuffer and valueBuffer.
+   * But it doesn't return the data buffers.
+   * <p>
+   * TODO: Implement a strategy to retrieve the data buffers.
+   * <a href="https://github.com/apache/arrow/issues/40930";>data buffer 
retrieval.</a>
+   *
+   * @param clear Whether to clear vector before returning, the buffers will 
still be refcounted
+   *              but the returned array will be the only reference to them
+   * @return The underlying {@link ArrowBuf buffers} that is used by this
+   *         vector instance.
+   */
+  @Override
+  public ArrowBuf[] getBuffers(boolean clear) {
+    final ArrowBuf[] buffers;
+    setReaderAndWriterIndex();
+    if (getBufferSize() == 0) {
+      buffers = new ArrowBuf[0];
+    } else {
+      buffers = new ArrowBuf[2];
+      buffers[0] = validityBuffer;
+      buffers[1] = viewBuffer;
+    }
+    if (clear) {
+      for (final ArrowBuf buffer : buffers) {
+        buffer.getReferenceManager().retain();
+      }
+      clear();
+    }
+    return buffers;
+  }
+
+  /**
+   * Validate the scalar values held by this vector.
+   */
+  public void validateScalars() {
+    // No validation by default.
+  }
+
+  /**
+   * Construct a transfer pair of this vector and another vector of the same 
type.
+   * @param field The field materialized by this vector.
+   * @param allocator allocator for the target vector
+   * @param callBack not used
+   * @return TransferPair
+   */
+  @Override
+  public TransferPair getTransferPair(Field field, BufferAllocator allocator, 
CallBack callBack) {
+    return getTransferPair(field, allocator);
+  }
+
+  /**
+   * Construct a transfer pair of this vector and another vector of the same 
type.
+   * @param ref name of the target vector
+   * @param allocator allocator for the target vector
+   * @param callBack not used
+   * @return TransferPair
+   */
+  @Override
+  public TransferPair getTransferPair(String ref, BufferAllocator allocator, 
CallBack callBack) {
+    return getTransferPair(ref, allocator);
+  }
+
+  /**
+   * Construct a transfer pair of this vector and another vector of the same 
type.
+   * @param allocator allocator for the target vector
+   * @return TransferPair
+   */
+  @Override
+  public TransferPair getTransferPair(BufferAllocator allocator) {
+    return getTransferPair(getName(), allocator);
+  }
+
+  /**
+   * Construct a transfer pair of this vector and another vector of the same 
type.
+   * @param ref name of the target vector
+   * @param allocator allocator for the target vector
+   * @return TransferPair
+   */
+  @Override
+  public abstract TransferPair getTransferPair(String ref, BufferAllocator 
allocator);
+
+  /**
+   * Construct a transfer pair of this vector and another vector of the same 
type.
+   * @param field The field materialized by this vector.
+   * @param allocator allocator for the target vector
+   * @return TransferPair
+   */
+  @Override
+  public abstract TransferPair getTransferPair(Field field, BufferAllocator 
allocator);
+
+  /**
+   * Transfer this vector's data to another vector.
+   * The memory associated with this vector is transferred to the allocator of 
target vector
+   * for accounting and management purposes.
+   * @param target destination vector for transfer
+   */
+  public void transferTo(BaseVariableWidthViewVector target) {
+    throw new UnsupportedOperationException("trasferTo function not 
supported!");
+  }
+
+  /**
+   * Slice this vector at desired index and length and transfer the
+   * corresponding data to the target vector.
+   * @param startIndex start position of the split in source vector.
+   * @param length length of the split.
+   * @param target destination vector
+   */
+  public void splitAndTransferTo(int startIndex, int length,
+                                 BaseVariableWidthViewVector target) {
+    throw new UnsupportedOperationException("splitAndTransferTo function not 
supported!");
+  }
+
+  /*----------------------------------------------------------------*
+   |                                                                |
+   |                common getters and setters                      |
+   |                                                                |
+   *----------------------------------------------------------------*/
+
+
+  /**
+   * Get the number of elements that are null in the vector.
+   *
+   * @return the number of null elements.
+   */
+  @Override
+  public int getNullCount() {
+    return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+  }
+
+  /**
+   * Check if the given index is within the current value capacity
+   * of the vector.
+   *
+   * @param index  position to check
+   * @return true if the index is within the current value capacity
+   */
+  public boolean isSafe(int index) {
+    return index < getValueCapacity();
+  }
+
+  /**
+   * Check if an element at given index is null.
+   *
+   * @param index  position of an element
+   * @return true if an element at given index is null
+   */
+  @Override
+  public boolean isNull(int index) {
+    return (isSet(index) == 0);
+  }
+
+  /**
+   * Same as {@link #isNull(int)}.
+   *
+   * @param index  position of an element
+   * @return 1 if element at given index is not null, 0 otherwise
+   */
+  public int isSet(int index) {
+    final int byteIndex = index >> 3;
+    final byte b = validityBuffer.getByte(byteIndex);
+    final int bitIndex = index & 7;
+    return (b >> bitIndex) & 0x01;
+  }
+
+  /**
+   * Get the value count of vector. This will always be zero unless
+   * setValueCount(int) has been called prior to calling this.
+   *
+   * @return valueCount for the vector
+   */
+  @Override
+  public int getValueCount() {
+    return valueCount;
+  }
+
+  /**
+   * Sets the value count for the vector.
+   *
+   * @param valueCount   value count
+   */
+  @Override
+  public void setValueCount(int valueCount) {
+    assert valueCount >= 0;
+    this.valueCount = valueCount;
+    while (valueCount > getValueCapacity()) {
+      reallocViewBuffer();
+      reallocValidityBuffer();
+    }
+    fillHoles(valueCount);
+    lastSet = valueCount - 1;
+    setReaderAndWriterIndex();
+  }
+
+  /**
+   * Create holes in the vector upto the given index (exclusive).
+   * Holes will be created from the current last-set position in
+   * the vector.
+   *
+   * @param index target index
+   */
+  @Override
+  public void fillEmpties(int index) {
+    handleSafe(index, EMPTY_BYTE_ARRAY.length);
+    fillHoles(index);
+    lastSet = index - 1;
+  }
+
+  /**
+   * Set the index of the last non-null element in the vector.
+   * It is important to call this method with appropriate value
+   * before calling {@link #setValueCount(int)}.
+   *
+   * @param value desired index of last non-null element.
+   */
+  @Override
+  public void setLastSet(int value) {
+    lastSet = value;
+  }
+
+  /**
+   * Get the index of the last non-null element in the vector.
+   *
+   * @return index of the last non-null element
+   */
+  @Override
+  public int getLastSet() {
+    return lastSet;
+  }
+
+  /**
+   * Mark the particular position in the vector as non-null.
+   *
+   * @param index position of the element.
+   */
+  @Override
+  public void setIndexDefined(int index) {
+    // We need to check and reallocate the validity buffer
+    while (index >= getValueCapacity()) {
+      reallocValidityBuffer();
+    }
+    BitVectorHelper.setBit(validityBuffer, index);
+  }
+
+  /**
+   * Sets the value length for an element.
+   *
+   * @param index   position of the element to set
+   * @param length  length of the element
+   */
+  @Override
+  public void setValueLengthSafe(int index, int length) {
+    assert index >= 0;
+    handleSafe(index, length);
+    fillHoles(index);
+    lastSet = index;
+  }
+
+  /**
+   * Get the variable length element at specified index as Text.
+   *
+   * @param index position of an element to get
+   * @return greater than length 0 for a non-null element, 0 otherwise
+   */
+  @Override
+  public int getValueLength(int index) {
+    assert index >= 0;
+    if (isSet(index) == 0) {
+      return 0;
+    }
+    return getLength(index);
+  }
+
+  /**
+   * Set the variable length element at the specified index to the supplied
+   * byte array. This is same as using {@link #set(int, byte[], int, int)}
+   * with start as Zero and length as #value.length
+   *
+   * @param index   position of the element to set
+   * @param value   array of bytes to write
+   */
+  public void set(int index, byte[] value) {
+    assert index >= 0;
+    fillHoles(index);
+    BitVectorHelper.setBit(validityBuffer, index);
+    setBytes(index, value, 0, value.length);
+    lastSet = index;
+  }
+
+  /**
+   * Same as {@link #set(int, byte[])} except that it handles the
+   * case where index and length of a new element are beyond the existing
+   * capacity of the vector.
+   *
+   * @param index   position of the element to set
+   * @param value   array of bytes to write
+   */
+  @Override
+  public void setSafe(int index, byte[] value) {
+    assert index >= 0;
+    // check if the current index can be populated
+    handleSafe(index, value.length);
+    fillHoles(index);
+    BitVectorHelper.setBit(validityBuffer, index);
+    setBytes(index, value, 0, value.length);
+    lastSet = index;
+  }
+
+  /**
+   * Set the variable length element at the specified index to the supplied
+   * byte array.
+   *
+   * @param index   position of the element to set
+   * @param value   array of bytes to write
+   * @param start   start index in an array of bytes
+   * @param length  length of data in an array of bytes
+   */
+  public void set(int index, byte[] value, int start, int length) {
+    assert index >= 0;
+    fillHoles(index);
+    BitVectorHelper.setBit(validityBuffer, index);
+    setBytes(index, value, start, length);
+    lastSet = index;
+  }
+
+  /**
+   * Same as {@link #set(int, byte[], int, int)} except that it handles the
+   * case where index and length of a new element are beyond the existing
+   * capacity of the vector.
+   *
+   * @param index   position of the element to set
+   * @param value   array of bytes to write
+   * @param start   start index in an array of bytes
+   * @param length  length of data in an array of bytes
+   */
+  public void setSafe(int index, byte[] value, int start, int length) {
+    assert index >= 0;
+    handleSafe(index, length);
+    fillHoles(index);
+    BitVectorHelper.setBit(validityBuffer, index);
+    setBytes(index, value, start, length);
+    lastSet = index;
+  }
+
+  /**
+   * Set the variable length element at the specified index to the
+   * content in supplied ByteBuffer.
+   *
+   * @param index   position of the element to set
+   * @param value   ByteBuffer with data
+   * @param start   start index in ByteBuffer
+   * @param length  length of data in ByteBuffer
+   */
+  public void set(int index, ByteBuffer value, int start, int length) {
+    assert index >= 0;
+    fillHoles(index);
+    BitVectorHelper.setBit(validityBuffer, index);
+    setBytes(index, value.array(), start, length);
+    lastSet = index;
+  }
+
+  /**
+   * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the
+   * case where index and length of a new element are beyond the existing
+   * capacity of the vector.
+   *
+   * @param index   position of the element to set
+   * @param value   ByteBuffer with data
+   * @param start   start index in ByteBuffer
+   * @param length  length of data in ByteBuffer
+   */
+  public void setSafe(int index, ByteBuffer value, int start, int length) {
+    assert index >= 0;
+    handleSafe(index, length);
+    fillHoles(index);
+    BitVectorHelper.setBit(validityBuffer, index);
+    setBytes(index, value.array(), start, length);
+    lastSet = index;
+  }
+
+  /**
+   * Set the element at the given index to null.
+   *
+   * @param index position of an element
+   */
+  @Override
+  public void setNull(int index) {
+    // We need to check and reallocate the validity buffer
+    while (index >= getValueCapacity()) {
+      reallocValidityBuffer();
+    }
+    BitVectorHelper.unsetBit(validityBuffer, index);
+  }
+
+  /**
+   * Store the given value at a particular position in the vector. isSet 
indicates
+   * whether the value is NULL or not.
+   * @param index position of the new value
+   * @param isSet Zero for NULL value, 1 otherwise
+   * @param start start position of data in buffer
+   * @param end end position of data in buffer
+   * @param buffer data buffer containing the variable width element to be 
stored
+   *               in the vector
+   */
+  public void set(int index, int isSet, int start, int end, ArrowBuf buffer) {
+    assert index >= 0;
+    final int dataLength = end - start;
+    fillHoles(index);
+    BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+    byte[] data = new byte[dataLength];
+    buffer.getBytes(start, data, 0, dataLength);
+    setBytes(index, data, start, dataLength);
+    lastSet = index;
+  }
+
+  /**
+   * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles 
the case
+   * when index is greater than or equal to current value capacity of the
+   * vector.
+   * @param index position of the new value
+   * @param isSet Zero for NULL value, 1 otherwise
+   * @param start start position of data in buffer
+   * @param end end position of data in buffer
+   * @param buffer data buffer containing the variable width element to be 
stored
+   *               in the vector
+   */
+  public void setSafe(int index, int isSet, int start, int end, ArrowBuf 
buffer) {
+    assert index >= 0;
+    final int dataLength = end - start;
+    handleSafe(index, dataLength);
+    fillHoles(index);
+    BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+    byte[] data = new byte[dataLength];
+    buffer.getBytes(start, data, 0, dataLength);
+    setBytes(index, data, 0, dataLength);
+    lastSet = index;
+  }
+
+  /**
+   * Store the given value at a particular position in the vector. isSet 
indicates
+   * whether the value is NULL or not.
+   * @param index position of the new value
+   * @param start start position of data in buffer
+   * @param length length of data in buffer
+   * @param buffer data buffer containing the variable width element to be 
stored
+   *               in the vector
+   */
+  public void set(int index, int start, int length, ArrowBuf buffer) {
+    assert index >= 0;
+    fillHoles(index);
+    BitVectorHelper.setBit(validityBuffer, index);
+    byte[] data = new byte[length];
+    buffer.getBytes(start, data, 0, length);
+    setBytes(index, data, start, length);
+    lastSet = index;
+  }
+
+  /**
+   * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles 
the case
+   * when index is greater than or equal to current value capacity of the
+   * vector.
+   * @param index position of the new value
+   * @param start start position of data in buffer
+   * @param length length of data in buffer
+   * @param buffer data buffer containing the variable width element to be 
stored
+   *               in the vector
+   */
+  public void setSafe(int index, int start, int length, ArrowBuf buffer) {
+    assert index >= 0;
+    handleSafe(index, length);
+    fillHoles(index);
+    BitVectorHelper.setBit(validityBuffer, index);
+    byte[] data = new byte[length];
+    buffer.getBytes(start, data, 0, length);
+    setBytes(index, data, start, length);
+    lastSet = index;
+  }
+
+
+  /*----------------------------------------------------------------*
+   |                                                                |
+   |                helper methods for setters                      |
+   |                                                                |
+   *----------------------------------------------------------------*/
+
+
+  protected final void fillHoles(int index) {
+    for (int i = lastSet + 1; i < index; i++) {
+      setBytes(i, EMPTY_BYTE_ARRAY, 0, EMPTY_BYTE_ARRAY.length);
+    }
+    lastSet = index - 1;
+  }
+
+  /**
+   * Get the length of the view.
+   * @param index The index of the element in the vector.
+   * @return The length of the element at the given index.
+   */
+  protected final int getLength(int index) {
+    if (index < 0 || index >= viewBuffer.capacity() / ELEMENT_SIZE) {
+      throw new IndexOutOfBoundsException("Index out of bounds: " + index);
+    }
+    if (isSet(index) == 0) {
+      return 0;
+    }
+    return viewBuffer.getInt(((long) index * ELEMENT_SIZE));
+  }
+
+  protected ArrowBuf allocateOrGetLastBuffer(BufferAllocator allocator, int 
length, List<ArrowBuf> dataBuffers) {
+    long dataBufferSize;
+    if (initialDataBufferSize > 0) {
+      dataBufferSize = initialDataBufferSize;
+    } else {
+      dataBufferSize = lastValueAllocationSizeInBytes;
+    }
+
+    if (dataBuffers.isEmpty() || dataBuffers.get(dataBuffers.size() - 
1).capacity() -
+            dataBuffers.get(dataBuffers.size() - 1).writerIndex() < length) {
+      ArrowBuf newBuf = allocator.buffer(dataBufferSize);
+      dataBuffers.add(newBuf);
+    }

Review Comment:
   > And, what if length is greater than dataBufferSize?
   
   Correct we need to check the possible size with the length and allocate the 
adequate. But since, the dataBuffer can get more allocations, it would be 
better to allocate whatever the highest (`length` or `initialDataBufferSize` or 
`lastValueAllocationSizeInBytes`) 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to