vibhatha commented on code in PR #43888:
URL: https://github.com/apache/arrow/pull/43888#discussion_r1764990373


##########
java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java:
##########
@@ -0,0 +1,683 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.util.Preconditions.checkArgument;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A run-end encoded vector contains only two child vectors: a run_end vector 
of type int and a
+ * values vector of any type. There are no buffers associated with the parent 
vector.
+ */
+public class RunEndEncodedVector extends BaseValueVector implements 
FieldVector {
+  public static final FieldVector DEFAULT_VALUE_VECTOR = ZeroVector.INSTANCE;
+  public static final FieldVector DEFAULT_RUN_END_VECTOR = ZeroVector.INSTANCE;
+
+  public static RunEndEncodedVector empty(String name, BufferAllocator 
allocator) {
+    return new RunEndEncodedVector(
+        name, allocator, 
FieldType.notNullable(ArrowType.RunEndEncoded.INSTANCE), null);
+  }
+
+  protected final CallBack callBack;
+  protected Field field;
+  protected FieldVector runEndsVector;
+  protected FieldVector valuesVector;
+  protected int valueCount;
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param name The name of the instance.
+   * @param allocator The allocator to use for allocating/reallocating buffers.
+   * @param fieldType The type of the array that is run-end encoded.
+   * @param callBack A schema change callback.
+   */
+  public RunEndEncodedVector(
+      String name, BufferAllocator allocator, FieldType fieldType, CallBack 
callBack) {
+    this(new Field(name, fieldType, null), allocator, callBack);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param field The field materialized by this vector.
+   * @param allocator The allocator to use for allocating/reallocating buffers.
+   * @param callBack A schema change callback.
+   */
+  public RunEndEncodedVector(Field field, BufferAllocator allocator, CallBack 
callBack) {
+    this(field, allocator, DEFAULT_RUN_END_VECTOR, DEFAULT_VALUE_VECTOR, 
callBack);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param field The field materialized by this vector.
+   * @param allocator The allocator to use for allocating/reallocating buffers.
+   * @param runEndsVector The vector represents run ends. Only Zero vector or 
type int vector with
+   *     size 16, 32 is allowed
+   * @param valuesVector The vector represents values
+   * @param callBack A schema change callback.
+   */
+  public RunEndEncodedVector(
+      Field field,
+      BufferAllocator allocator,
+      FieldVector runEndsVector,
+      FieldVector valuesVector,
+      CallBack callBack) {
+    super(allocator);
+    this.field = field;
+    this.callBack = callBack;
+    this.valueCount = 0;
+    this.runEndsVector = runEndsVector;
+    this.valuesVector = valuesVector;
+  }
+
+  /** ValueVector interface */
+
+  /**
+   * Allocate new buffers. ValueVector implements logic to determine how much 
to allocate.
+   *
+   * @throws OutOfMemoryException Thrown if no memory can be allocated.
+   */
+  @Override
+  public void allocateNew() throws OutOfMemoryException {
+    if (!allocateNewSafe()) {
+      throw new OutOfMemoryException("Failure while allocating memory");
+    }
+  }
+
+  /**
+   * Allocates new buffers. ValueVector implements logic to determine how much 
to allocate.
+   *
+   * @return Returns true if allocation was successful.
+   */
+  @Override
+  public boolean allocateNewSafe() {
+    initializeChildrenFromFields(field.getChildren());
+    for (FieldVector v : getChildrenFromFields()) {
+      boolean isAllocated = v.allocateNewSafe();
+      if (!isAllocated) {
+        v.clear();
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Allocate new buffer with double capacity, and copy data into the new 
buffer. Replace vector's
+   * buffer with new buffer, and release old one
+   */
+  @Override
+  public void reAlloc() {
+    for (FieldVector v : getChildrenFromFields()) {
+      v.reAlloc();
+    }
+  }
+
+  @Override
+  public BufferAllocator getAllocator() {
+    return allocator;
+  }
+
+  @Override
+  protected FieldReader getReaderImpl() {
+    return null;
+  }
+
+  /**
+   * Set the initial record capacity.
+   *
+   * @param numRecords the initial record capacity.
+   */
+  @Override
+  public void setInitialCapacity(int numRecords) {}
+
+  /**
+   * Returns the maximum number of values that can be stored in this vector 
instance.
+   *
+   * @return the maximum number of values that can be stored in this vector 
instance.
+   */
+  @Override
+  public int getValueCapacity() {
+    return getChildrenFromFields().stream()
+        .mapToInt(item -> item != null ? item.getValueCapacity() : 0)
+        .min()
+        .orElseThrow(NoSuchElementException::new);
+  }
+
+  /** Alternative to clear(). Allows use as an AutoCloseable in 
try-with-resources. */
+  @Override
+  public void close() {
+    for (FieldVector v : getChildrenFromFields()) {
+      v.close();
+    }
+  }
+
+  /**
+   * Release any owned ArrowBuf and reset the ValueVector to the initial 
state. If the vector has
+   * any child vectors, they will also be cleared.
+   */
+  @Override
+  public void clear() {
+    for (FieldVector v : getChildrenFromFields()) {
+      v.clear();
+    }
+  }
+
+  /**
+   * Reset the ValueVector to the initial state without releasing any owned 
ArrowBuf. Buffer
+   * capacities will remain unchanged and any previous data will be zeroed 
out. This includes
+   * buffers for data, validity, offset, etc. If the vector has any child 
vectors, they will also be
+   * reset.
+   */
+  @Override
+  public void reset() {
+    for (FieldVector v : getChildrenFromFields()) {
+      v.reset();
+    }
+    valueCount = 0;
+  }
+
+  /**
+   * Get information about how this field is materialized.
+   *
+   * @return the field corresponding to this vector
+   */
+  @Override
+  public Field getField() {
+    return field;
+  }
+
+  @Override
+  public MinorType getMinorType() {
+    return MinorType.RUNENDENCODED;
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param allocator the target allocator
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer 
pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(BufferAllocator allocator) {
+    throw new UnsupportedOperationException(
+        "RunEndEncodedVector does not support 
getTransferPair(BufferAllocator)");
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param ref the name of the vector
+   * @param allocator the target allocator
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer 
pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+    return getTransferPair(ref, allocator, null);
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param field the Field object used by the target vector
+   * @param allocator the target allocator
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer 
pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
+    return getTransferPair(field, allocator, null);
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param ref the name of the vector
+   * @param allocator the target allocator
+   * @param callBack A schema change callback.
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer 
pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(String ref, BufferAllocator allocator, 
CallBack callBack) {
+    throw new UnsupportedOperationException(
+        "RunEndEncodedVector does not support getTransferPair(String, 
BufferAllocator, CallBack)");
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param field the Field object used by the target vector
+   * @param allocator the target allocator
+   * @param callBack A schema change callback.
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer 
pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(Field field, BufferAllocator allocator, 
CallBack callBack) {
+    throw new UnsupportedOperationException(
+        "RunEndEncodedVector does not support getTransferPair(Field, 
BufferAllocator, CallBack)");
+  }
+
+  /**
+   * Makes a new transfer pair used to transfer underlying buffers.
+   *
+   * @param target the target for the transfer
+   * @return a new {@link org.apache.arrow.vector.util.TransferPair transfer 
pair} that is used to
+   *     transfer underlying buffers into the target vector.
+   */
+  @Override
+  public TransferPair makeTransferPair(ValueVector target) {
+    throw new UnsupportedOperationException(
+        "RunEndEncodedVector does not support makeTransferPair(ValueVector)");
+  }
+
+  /**
+   * Get a reader for this vector.
+   *
+   * @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field 
reader} that supports
+   *     reading values from this vector.
+   */
+  @Override
+  public FieldReader getReader() {
+    throw new UnsupportedOperationException("Not yet implemented.");
+  }
+
+  /**
+   * Get a writer for this vector.
+   *
+   * @return a {@link org.apache.arrow.vector.complex.writer.FieldWriter field 
writer} that supports
+   *     writing values to this vector.
+   */
+  public FieldWriter getWriter() {
+    throw new UnsupportedOperationException("Not yet implemented.");
+  }
+
+  /**
+   * Get the number of bytes used by this vector.
+   *
+   * @return the number of bytes that is used by this vector instance.
+   */
+  @Override
+  public int getBufferSize() {
+    int bufferSize = 0;
+    for (FieldVector v : getChildrenFromFields()) {
+      bufferSize += v.getBufferSize();
+    }
+    return bufferSize;
+  }
+
+  /**
+   * Returns the number of bytes that is used by this vector if it holds the 
given number of values.
+   * The result will be the same as if setValueCount() were called, followed 
by calling
+   * getBufferSize(), but without any of the closing side-effects that 
setValueCount() implies wrt
+   * finishing off the population of a vector. Some operations might wish to 
use this to determine
+   * how much memory has been used by a vector so far, even though it is not 
finished being
+   * populated.
+   *
+   * @param valueCount the number of values to assume this vector contains
+   * @return the buffer size if this vector is holding valueCount values
+   */
+  @Override
+  public int getBufferSizeFor(int valueCount) {
+    return 0;
+  }
+
+  /**
+   * Return the underlying buffers associated with this vector. Note that this 
doesn't impact the
+   * reference counts for this buffer so it only should be used for in-context 
access. Also note
+   * that this buffer changes regularly thus external classes shouldn't hold a 
reference to it
+   * (unless they change it).
+   *
+   * @param clear Whether to clear vector before returning; the buffers will 
still be refcounted;
+   *     but the returned array will be the only reference to them
+   * @return The underlying {@link ArrowBuf buffers} that is used by this 
vector instance.
+   */
+  @Override
+  public ArrowBuf[] getBuffers(boolean clear) {
+    return null;
+  }
+
+  /**
+   * Gets the underlying buffer associated with validity vector.
+   *
+   * @return buffer
+   */
+  @Override
+  public ArrowBuf getValidityBuffer() {
+    return null;

Review Comment:
   @lidavidm should this be `null` or `unsupported`? I know in definition there 
is no validity buffer. And at the same time, it is not part of this API though 
we have to extend upon the existing interfaces we have to override. Maybe they 
should have a default implementation. I know it is kind of out of scope here. 
But WDYT? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to