This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 6f8f34bd34 GH-38254: [Java] Add reusable buffer getters to char/binary 
vectors (#38266)
6f8f34bd34 is described below

commit 6f8f34bd344c5cdf158aacb0215c3409d2996c5e
Author: James Duong <[email protected]>
AuthorDate: Mon Oct 23 09:45:26 2023 -0700

    GH-38254: [Java] Add reusable buffer getters to char/binary vectors (#38266)
    
    ### Rationale for this change
    Provide a way for a user to reuse a buffer when iterating over 
byte-array-based ValueVectors to avoid excessive
    reallocations.
    
    ### What changes are included in this PR?
    Add a reusable buffer interface that can be populated by character and 
binary vectors to avoid allocations when consuming vector content.
    
    Optimize getObject() on VarCharVector/LargeVarCharVector to avoid an extra 
allocation of a byte array (copy from ArrowBuf directly to the resulting Text).
    
    ### Are these changes tested?
    
    ### Are there any user-facing changes?
    
    Yes.
    
    **This PR includes breaking changes to public APIs.**
    
    * Closes: #38254
    
    Authored-by: James Duong <[email protected]>
    Signed-off-by: David Li <[email protected]>
---
 .../text/ArrowFlightJdbcVarCharVectorAccessor.java |   2 +-
 .../org/apache/arrow/memory/ReusableBuffer.java    |  47 ++++++++
 .../arrow/vector/BaseLargeVariableWidthVector.java |  33 +++---
 .../arrow/vector/BaseVariableWidthVector.java      |  27 +++--
 .../apache/arrow/vector/FixedSizeBinaryVector.java |  13 +++
 .../apache/arrow/vector/LargeVarBinaryVector.java  |  23 +++-
 .../apache/arrow/vector/LargeVarCharVector.java    |  39 +++++--
 .../org/apache/arrow/vector/VarBinaryVector.java   |  19 ++-
 .../org/apache/arrow/vector/VarCharVector.java     |  33 ++++--
 .../arrow/vector/util/ReusableByteArray.java       | 129 +++++++++++++++++++++
 .../java/org/apache/arrow/vector/util/Text.java    |  78 +------------
 .../arrow/vector/TestFixedSizeBinaryVector.java    |  16 +++
 .../arrow/vector/TestLargeVarBinaryVector.java     |  74 ++++++++----
 .../arrow/vector/TestLargeVarCharVector.java       |  21 ++++
 .../org/apache/arrow/vector/TestValueVector.java   |  44 +++++++
 .../arrow/vector/util/TestReusableByteArray.java   |  97 ++++++++++++++++
 16 files changed, 540 insertions(+), 155 deletions(-)

diff --git 
a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java
 
b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java
index aad8d9094c..d4075bbb75 100644
--- 
a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java
+++ 
b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java
@@ -200,7 +200,7 @@ public class ArrowFlightJdbcVarCharVectorAccessor extends 
ArrowFlightJdbcAccesso
 
     // Already in UTF-8
     final Text textValue = new Text(value);
-    return new ByteArrayInputStream(textValue.getBytes(), 0, 
textValue.getLength());
+    return new ByteArrayInputStream(textValue.getBytes(), 0, (int) 
textValue.getLength());
   }
 
   @Override
diff --git 
a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java
 
b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java
new file mode 100644
index 0000000000..3530b819aa
--- /dev/null
+++ 
b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * A lightweight, automatically expanding container for holding byte data.
+ * @param <T> The type of the underlying buffer.
+ */
+public interface ReusableBuffer<T> {
+  /**
+   * Get the number of valid bytes in the data.
+   *
+   * @return the number of valid bytes in the data
+   */
+  long getLength();
+
+  /**
+   * Get the buffer backing this ReusableBuffer.
+   */
+  T getBuffer();
+
+  /**
+   * Set the buffer to the contents of the given ArrowBuf.
+   * The internal buffer must resize if it cannot fit the contents
+   * of the data.
+   *
+   * @param srcBytes  the data to copy from
+   * @param start     the first position of the new data
+   * @param len       the number of bytes of the new data
+   */
+  void set(ArrowBuf srcBytes, long start, long len);
+}
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
index db922d6a70..fcac28bd08 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
@@ -180,8 +180,8 @@ public abstract class BaseLargeVariableWidthVector extends 
BaseValueVector
     if (valueCount == 0) {
       return 0.0D;
     }
-    final long startOffset = offsetBuffer.getLong(0);
-    final long endOffset = offsetBuffer.getLong((long) valueCount * 
OFFSET_WIDTH);
+    final long startOffset = getStartOffset(0);
+    final long endOffset = getStartOffset(valueCount);
     final double totalListSize = endOffset - startOffset;
     return totalListSize / valueCount;
   }
@@ -570,7 +570,7 @@ public abstract class BaseLargeVariableWidthVector extends 
BaseValueVector
     if (valueCount == 0) {
       return 0;
     }
-    return capAtMaxInt(offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH));
+    return capAtMaxInt(getStartOffset(valueCount));
   }
 
   /**
@@ -598,7 +598,7 @@ public abstract class BaseLargeVariableWidthVector extends 
BaseValueVector
     final long validityBufferSize = getValidityBufferSizeFromCount(valueCount);
     final long offsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH;
     /* get the end offset for this valueCount */
-    final long dataBufferSize = offsetBuffer.getLong((long) valueCount * 
OFFSET_WIDTH);
+    final long dataBufferSize = getStartOffset(valueCount);
     return capAtMaxInt(validityBufferSize + offsetBufferSize + dataBufferSize);
   }
 
@@ -702,7 +702,7 @@ public abstract class BaseLargeVariableWidthVector extends 
BaseValueVector
   public abstract TransferPair getTransferPair(Field field, BufferAllocator 
allocator);
 
   /**
-   * Transfer this vector'data to another vector. The memory associated
+   * Transfer this vector's data to another vector. The memory associated
    * with this vector is transferred to the allocator of target vector
    * for accounting and management purposes.
    * @param target destination vector for transfer
@@ -752,12 +752,12 @@ public abstract class BaseLargeVariableWidthVector 
extends BaseValueVector
    * in the target vector.
    */
   private void splitAndTransferOffsetBuffer(int startIndex, int length, 
BaseLargeVariableWidthVector target) {
-    final long start = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH);
-    final long end = offsetBuffer.getLong((long) (startIndex + length) * 
OFFSET_WIDTH);
+    final long start = getStartOffset(startIndex);
+    final long end = getStartOffset(startIndex + length);
     final long dataLength = end - start;
     target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH);
     for (int i = 0; i < length + 1; i++) {
-      final long relativeSourceOffset = offsetBuffer.getLong((long) 
(startIndex + i) * OFFSET_WIDTH) - start;
+      final long relativeSourceOffset = getStartOffset(startIndex + i) - start;
       target.offsetBuffer.setLong((long) i * OFFSET_WIDTH, 
relativeSourceOffset);
     }
     final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength);
@@ -973,8 +973,7 @@ public abstract class BaseLargeVariableWidthVector extends 
BaseValueVector
       return 0;
     }
     final long startOffset = getStartOffset(index);
-    final int dataLength =
-        (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - 
startOffset);
+    final int dataLength = (int) (getEndOffset(index) - startOffset);
     return dataLength;
   }
 
@@ -1320,7 +1319,7 @@ public abstract class BaseLargeVariableWidthVector 
extends BaseValueVector
       final long length = end - start;
       fillHoles(thisIndex);
       BitVectorHelper.setBit(this.validityBuffer, thisIndex);
-      final long copyStart = offsetBuffer.getLong((long) thisIndex * 
OFFSET_WIDTH);
+      final long copyStart = getStartOffset(thisIndex);
       from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, (int) 
length);
       offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + 
length);
     }
@@ -1352,7 +1351,7 @@ public abstract class BaseLargeVariableWidthVector 
extends BaseValueVector
       handleSafe(thisIndex, length);
       fillHoles(thisIndex);
       BitVectorHelper.setBit(this.validityBuffer, thisIndex);
-      final long copyStart = offsetBuffer.getLong((long) thisIndex * 
OFFSET_WIDTH);
+      final long copyStart = getStartOffset(thisIndex);
       from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, 
length);
       offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + 
length);
     }
@@ -1369,8 +1368,8 @@ public abstract class BaseLargeVariableWidthVector 
extends BaseValueVector
     if (isNull(index)) {
       reuse.set(null, 0, 0);
     } else {
-      long offset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
-      int length = (int) (offsetBuffer.getLong((long) (index + 1) * 
OFFSET_WIDTH) - offset);
+      long offset = getStartOffset(index);
+      int length = (int) (getEndOffset(index) - offset);
       reuse.set(valueBuffer, offset, length);
     }
     return reuse;
@@ -1387,7 +1386,7 @@ public abstract class BaseLargeVariableWidthVector 
extends BaseValueVector
       return ArrowBufPointer.NULL_HASH_CODE;
     }
     final long start = getStartOffset(index);
-    final long end = getStartOffset(index + 1);
+    final long end = getEndOffset(index);
     return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
   }
 
@@ -1395,4 +1394,8 @@ public abstract class BaseLargeVariableWidthVector 
extends BaseValueVector
   public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
     return visitor.visit(this, value);
   }
+
+  protected final long getEndOffset(int index) {
+    return offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+  }
 }
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index b57dd93438..a0a5e085a5 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -199,8 +199,8 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     if (valueCount == 0) {
       return 0.0D;
     }
-    final int startOffset = offsetBuffer.getInt(0);
-    final int endOffset = offsetBuffer.getInt((long) valueCount * 
OFFSET_WIDTH);
+    final int startOffset = getStartOffset(0);
+    final int endOffset = getStartOffset(valueCount);
     final double totalListSize = endOffset - startOffset;
     return totalListSize / valueCount;
   }
@@ -791,8 +791,8 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
    * in the target vector.
    */
   private void splitAndTransferOffsetBuffer(int startIndex, int length, 
BaseVariableWidthVector target) {
-    final int start = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH);
-    final int end = offsetBuffer.getInt((long) (startIndex + length) * 
OFFSET_WIDTH);
+    final int start = getStartOffset(startIndex);
+    final int end = getStartOffset(startIndex + length);
     final int dataLength = end - start;
 
     if (start == 0) {
@@ -801,7 +801,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     } else {
       target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH);
       for (int i = 0; i < length + 1; i++) {
-        final int relativeSourceOffset = offsetBuffer.getInt((long) 
(startIndex + i) * OFFSET_WIDTH) - start;
+        final int relativeSourceOffset = getStartOffset(startIndex + i) - 
start;
         target.offsetBuffer.setInt((long) i * OFFSET_WIDTH, 
relativeSourceOffset);
       }
     }
@@ -1032,8 +1032,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
       return 0;
     }
     final int startOffset = getStartOffset(index);
-    final int dataLength =
-            offsetBuffer.getInt((index + 1) * OFFSET_WIDTH) - startOffset;
+    final int dataLength = getEndOffset(index) - startOffset;
     return dataLength;
   }
 
@@ -1238,7 +1237,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     handleSafe(index, length);
     fillHoles(index);
     BitVectorHelper.setBit(validityBuffer, index);
-    final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+    final int startOffset = getStartOffset(index);
     offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + 
length);
     final ArrowBuf bb = buffer.slice(start, length);
     valueBuffer.setBytes(startOffset, bb);
@@ -1375,7 +1374,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
       final int length = end - start;
       fillHoles(thisIndex);
       BitVectorHelper.setBit(this.validityBuffer, thisIndex);
-      final int copyStart = offsetBuffer.getInt((long) thisIndex * 
OFFSET_WIDTH);
+      final int copyStart = getStartOffset(thisIndex);
       from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, 
length);
       offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + 
length);
     }
@@ -1398,7 +1397,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
       handleSafe(thisIndex, 0);
       fillHoles(thisIndex);
       BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
-      final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
+      final int copyStart = getStartOffset(thisIndex);
       offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
     } else {
       final int start = from.getOffsetBuffer().getInt((long) fromIndex * 
OFFSET_WIDTH);
@@ -1407,7 +1406,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
       handleSafe(thisIndex, length);
       fillHoles(thisIndex);
       BitVectorHelper.setBit(this.validityBuffer, thisIndex);
-      final int copyStart = offsetBuffer.getInt((long) thisIndex * 
OFFSET_WIDTH);
+      final int copyStart = getStartOffset(thisIndex);
       from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, 
length);
       offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + 
length);
     }
@@ -1424,8 +1423,8 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     if (isNull(index)) {
       reuse.set(null, 0, 0);
     } else {
-      int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
-      int length = offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - 
offset;
+      int offset = getStartOffset(index);
+      int length = getEndOffset(index) - offset;
       reuse.set(valueBuffer, offset, length);
     }
     return reuse;
@@ -1442,7 +1441,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
       return ArrowBufPointer.NULL_HASH_CODE;
     }
     final int start = getStartOffset(index);
-    final int end = getStartOffset(index + 1);
+    final int end = getEndOffset(index);
     return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
   }
 
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
index 967d560d78..52c57e2234 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
@@ -21,6 +21,7 @@ import static 
org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.complex.impl.FixedSizeBinaryReaderImpl;
 import org.apache.arrow.vector.complex.reader.FieldReader;
@@ -116,6 +117,18 @@ public class FixedSizeBinaryVector extends 
BaseFixedWidthVector {
     return dst;
   }
 
+  /**
+   * Read the value at the given position to the given output buffer.
+   * The caller is responsible for checking for nullity first.
+   *
+   * @param index position of element.
+   * @param buffer the buffer to write into.
+   */
+  public void read(int index, ReusableBuffer<?> buffer) {
+    final int startOffset = index * byteWidth;
+    buffer.set(valueBuffer, startOffset, byteWidth);
+  }
+
   /**
    * Get the element at the given index from the vector and
    * sets the state in holder. If element at given index
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
index 6806b958da..0750f68f4f 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
@@ -18,6 +18,7 @@
 package org.apache.arrow.vector;
 
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
 import org.apache.arrow.vector.complex.impl.LargeVarBinaryReaderImpl;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.holders.LargeVarBinaryHolder;
@@ -105,13 +106,25 @@ public final class LargeVarBinaryVector extends 
BaseLargeVariableWidthVector {
       return null;
     }
     final long startOffset = getStartOffset(index);
-    final int dataLength =
-        (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - 
startOffset);
-    final byte[] result = new byte[dataLength];
-    valueBuffer.getBytes(startOffset, result, 0, dataLength);
+    final long dataLength = getEndOffset(index) - startOffset;
+    final byte[] result = new byte[(int) dataLength];
+    valueBuffer.getBytes(startOffset, result, 0, (int) dataLength);
     return result;
   }
 
+  /**
+   * Read the value at the given position to the given output buffer.
+   * The caller is responsible for checking for nullity first.
+   *
+   * @param index position of element.
+   * @param buffer the buffer to write into.
+   */
+  public void read(int index, ReusableBuffer<?> buffer) {
+    final long startOffset = getStartOffset(index);
+    final long dataLength = getEndOffset(index) - startOffset;
+    buffer.set(valueBuffer, startOffset, dataLength);
+  }
+
   /**
    * Get the variable length element at specified index as Text.
    *
@@ -137,7 +150,7 @@ public final class LargeVarBinaryVector extends 
BaseLargeVariableWidthVector {
     }
     holder.isSet = 1;
     holder.start = getStartOffset(index);
-    holder.end = offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+    holder.end = getEndOffset(index);
     holder.buffer = valueBuffer;
   }
 
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
index 874079a0ef..6f08fcb81f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
@@ -17,7 +17,10 @@
 
 package org.apache.arrow.vector;
 
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
 import org.apache.arrow.vector.complex.impl.LargeVarCharReaderImpl;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.holders.LargeVarCharHolder;
@@ -106,10 +109,9 @@ public final class LargeVarCharVector extends 
BaseLargeVariableWidthVector {
       return null;
     }
     final long startOffset = getStartOffset(index);
-    final int dataLength =
-        (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - 
startOffset);
-    final byte[] result = new byte[dataLength];
-    valueBuffer.getBytes(startOffset, result, 0, dataLength);
+    final long dataLength = getEndOffset(index) - startOffset;
+    final byte[] result = new byte[(int) dataLength];
+    valueBuffer.getBytes(startOffset, result, 0, (int) dataLength);
     return result;
   }
 
@@ -120,12 +122,27 @@ public final class LargeVarCharVector extends 
BaseLargeVariableWidthVector {
    * @return Text object for non-null element, null otherwise
    */
   public Text getObject(int index) {
-    byte[] b = get(index);
-    if (b == null) {
+    assert index >= 0;
+    if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
       return null;
-    } else {
-      return new Text(b);
     }
+
+    final Text result = new Text();
+    read(index, result);
+    return result;
+  }
+
+  /**
+   * Read the value at the given position to the given output buffer.
+   * The caller is responsible for checking for nullity first.
+   *
+   * @param index position of element.
+   * @param buffer the buffer to write into.
+   */
+  public void read(int index, ReusableBuffer<?> buffer) {
+    final long startOffset = getStartOffset(index);
+    final long dataLength = getEndOffset(index) - startOffset;
+    buffer.set(valueBuffer, startOffset, dataLength);
   }
 
   /**
@@ -143,7 +160,7 @@ public final class LargeVarCharVector extends 
BaseLargeVariableWidthVector {
     }
     holder.isSet = 1;
     holder.start = getStartOffset(index);
-    holder.end = offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+    holder.end = getEndOffset(index);
     holder.buffer = valueBuffer;
   }
 
@@ -247,7 +264,7 @@ public final class LargeVarCharVector extends 
BaseLargeVariableWidthVector {
    * @param text    Text object with data
    */
   public void set(int index, Text text) {
-    set(index, text.getBytes(), 0, text.getLength());
+    set(index, text.getBytes(), 0, (int) text.getLength());
   }
 
   /**
@@ -259,7 +276,7 @@ public final class LargeVarCharVector extends 
BaseLargeVariableWidthVector {
    * @param text    Text object with data
    */
   public void setSafe(int index, Text text) {
-    setSafe(index, text.getBytes(), 0, text.getLength());
+    setSafe(index, text.getBytes(), 0, (int) text.getLength());
   }
 
   @Override
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
index b43cd33d05..87790c1168 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
@@ -20,6 +20,7 @@ package org.apache.arrow.vector;
 import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
 
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
 import org.apache.arrow.vector.complex.impl.VarBinaryReaderImpl;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
@@ -106,13 +107,25 @@ public final class VarBinaryVector extends 
BaseVariableWidthVector {
       return null;
     }
     final int startOffset = getStartOffset(index);
-    final int dataLength =
-            offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - 
startOffset;
+    final int dataLength = getEndOffset(index) - startOffset;
     final byte[] result = new byte[dataLength];
     valueBuffer.getBytes(startOffset, result, 0, dataLength);
     return result;
   }
 
+  /**
+   * Read the value at the given position to the given output buffer.
+   * The caller is responsible for checking for nullity first.
+   *
+   * @param index position of element.
+   * @param buffer the buffer to write into.
+   */
+  public void read(int index, ReusableBuffer<?> buffer) {
+    final int startOffset = getStartOffset(index);
+    final int dataLength = getEndOffset(index) - startOffset;
+    buffer.set(valueBuffer, startOffset, dataLength);
+  }
+
   /**
    * Get the variable length element at specified index as Text.
    *
@@ -138,7 +151,7 @@ public final class VarBinaryVector extends 
BaseVariableWidthVector {
     }
     holder.isSet = 1;
     holder.start = getStartOffset(index);
-    holder.end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+    holder.end = getEndOffset(index);
     holder.buffer = valueBuffer;
   }
 
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
index 9ac275f75a..7350dc99bb 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
@@ -20,6 +20,7 @@ package org.apache.arrow.vector;
 import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
 
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
 import org.apache.arrow.vector.complex.impl.VarCharReaderImpl;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.holders.NullableVarCharHolder;
@@ -105,8 +106,7 @@ public final class VarCharVector extends 
BaseVariableWidthVector {
       return null;
     }
     final int startOffset = getStartOffset(index);
-    final int dataLength =
-            offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - 
startOffset;
+    final int dataLength = getEndOffset(index) - startOffset;
     final byte[] result = new byte[dataLength];
     valueBuffer.getBytes(startOffset, result, 0, dataLength);
     return result;
@@ -119,12 +119,27 @@ public final class VarCharVector extends 
BaseVariableWidthVector {
    * @return Text object for non-null element, null otherwise
    */
   public Text getObject(int index) {
-    byte[] b = get(index);
-    if (b == null) {
+    assert index >= 0;
+    if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
       return null;
-    } else {
-      return new Text(b);
     }
+
+    final Text result = new Text();
+    read(index, result);
+    return result;
+  }
+
+  /**
+   * Read the value at the given position to the given output buffer.
+   * The caller is responsible for checking for nullity first.
+   *
+   * @param index position of element.
+   * @param buffer the buffer to write into.
+   */
+  public void read(int index, ReusableBuffer<?> buffer) {
+    final int startOffset = getStartOffset(index);
+    final int dataLength = getEndOffset(index) - startOffset;
+    buffer.set(valueBuffer, startOffset, dataLength);
   }
 
   /**
@@ -142,7 +157,7 @@ public final class VarCharVector extends 
BaseVariableWidthVector {
     }
     holder.isSet = 1;
     holder.start = getStartOffset(index);
-    holder.end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+    holder.end = getEndOffset(index);
     holder.buffer = valueBuffer;
   }
 
@@ -247,7 +262,7 @@ public final class VarCharVector extends 
BaseVariableWidthVector {
    * @param text    Text object with data
    */
   public void set(int index, Text text) {
-    set(index, text.getBytes(), 0, text.getLength());
+    set(index, text.getBytes(), 0, (int) text.getLength());
   }
 
   /**
@@ -259,7 +274,7 @@ public final class VarCharVector extends 
BaseVariableWidthVector {
    * @param text    Text object with data
    */
   public void setSafe(int index, Text text) {
-    setSafe(index, text.getBytes(), 0, text.getLength());
+    setSafe(index, text.getBytes(), 0, (int) text.getLength());
   }
 
   @Override
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java 
b/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java
new file mode 100644
index 0000000000..d938cd833a
--- /dev/null
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Arrays;
+import java.util.Base64;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.ReusableBuffer;
+
+/**
+ * A wrapper around byte arrays for repeated writing.
+ */
+public class ReusableByteArray implements ReusableBuffer<byte[]> {
+
+  protected static final byte[] EMPTY_BYTES = new byte[0];
+
+  protected byte[] bytes;
+  protected int length;
+
+  public ReusableByteArray() {
+    bytes = EMPTY_BYTES;
+  }
+
+  public ReusableByteArray(byte[] data) {
+    bytes = Arrays.copyOfRange(data, 0, data.length);
+    length = data.length;
+  }
+
+  /**
+   * Get the number of bytes in the byte array.
+   *
+   * @return the number of bytes in the byte array
+   */
+  @Override
+  public long getLength() {
+    return length;
+  }
+
+  @Override
+  public byte[] getBuffer() {
+    return bytes;
+  }
+
+  @Override
+  public void set(ArrowBuf srcBytes, long start, long len) {
+    setCapacity((int) len, false);
+    srcBytes.getBytes(start, bytes, 0, (int) len);
+    length = (int) len;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o == this) {
+      return true;
+    } else if (o == null) {
+      return false;
+    }
+    if (!(o instanceof ReusableByteArray)) {
+      return false;
+    }
+
+    final ReusableByteArray that = (ReusableByteArray) o;
+    if (this.getLength() != that.getLength()) {
+      return false;
+    }
+
+    for (int i = 0; i < length; i++) {
+      if (bytes[i] != that.bytes[i]) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    if (bytes == null) {
+      return 0;
+    }
+
+    int result = 1;
+    for (int i = 0; i < length; i++) {
+      result = 31 * result + bytes[i];
+    }
+
+    return result;
+  }
+
+  @Override
+  public String toString() {
+    return Base64.getEncoder().encodeToString(Arrays.copyOfRange(bytes, 0, 
length));
+  }
+
+  /**
+   * Sets the capacity of this object to <em>at least</em> <code>len</code> 
bytes. If the
+   * current buffer is longer, then the capacity and existing content of the 
buffer are unchanged.
+   * If <code>len</code> is larger than the current capacity, the Text 
object's capacity is
+   * increased to match.
+   *
+   * @param len      the number of bytes we need
+   * @param keepData should the old data be kept
+   */
+  protected void setCapacity(int len, boolean keepData) {
+    if (bytes == null || bytes.length < len) {
+      if (bytes != null && keepData) {
+        bytes = Arrays.copyOf(bytes, Math.max(len, length << 1));
+      } else {
+        bytes = new byte[len];
+      }
+    }
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java 
b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
index 778af0ca95..5f5f5d3bd6 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
@@ -29,7 +29,6 @@ import java.nio.charset.CodingErrorAction;
 import java.nio.charset.MalformedInputException;
 import java.text.CharacterIterator;
 import java.text.StringCharacterIterator;
-import java.util.Arrays;
 import java.util.Optional;
 
 import com.fasterxml.jackson.core.JsonGenerationException;
@@ -43,7 +42,7 @@ import com.fasterxml.jackson.databind.ser.std.StdSerializer;
  * Lifted from Hadoop 2.7.1
  */
 @JsonSerialize(using = Text.TextSerializer.class)
-public class Text {
+public class Text extends ReusableByteArray {
 
   private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
       new ThreadLocal<CharsetEncoder>() {
@@ -65,13 +64,9 @@ public class Text {
         }
       };
 
-  private static final byte[] EMPTY_BYTES = new byte[0];
-
-  private byte[] bytes;
-  private int length;
 
   public Text() {
-    bytes = EMPTY_BYTES;
+    super();
   }
 
   /**
@@ -123,15 +118,6 @@ public class Text {
     return bytes;
   }
 
-  /**
-   * Get the number of bytes in the byte array.
-   *
-   * @return the number of bytes in the byte array
-   */
-  public int getLength() {
-    return length;
-  }
-
   /**
    * Returns the Unicode Scalar Value (32-bit integer value) for the character 
at
    * <code>position</code>. Note that this method avoids using the converter 
or doing String
@@ -238,7 +224,7 @@ public class Text {
    * @param other the text to initialize from
    */
   public void set(Text other) {
-    set(other.getBytes(), 0, other.getLength());
+    set(other.getBytes(), 0, (int) other.getLength());
   }
 
   /**
@@ -278,25 +264,6 @@ public class Text {
     length = 0;
   }
 
-  /**
-   * Sets the capacity of this Text object to <em>at least</em> 
<code>len</code> bytes. If the
-   * current buffer is longer, then the capacity and existing content of the 
buffer are unchanged.
-   * If <code>len</code> is larger than the current capacity, the Text 
object's capacity is
-   * increased to match.
-   *
-   * @param len      the number of bytes we need
-   * @param keepData should the old data be kept
-   */
-  private void setCapacity(int len, boolean keepData) {
-    if (bytes == null || bytes.length < len) {
-      if (bytes != null && keepData) {
-        bytes = Arrays.copyOf(bytes, Math.max(len, length << 1));
-      } else {
-        bytes = new byte[len];
-      }
-    }
-  }
-
   @Override
   public String toString() {
     try {
@@ -322,47 +289,10 @@ public class Text {
 
   @Override
   public boolean equals(Object o) {
-    if (o == this) {
-      return true;
-    } else if (o == null) {
-      return false;
-    }
     if (!(o instanceof Text)) {
       return false;
     }
-
-    final Text that = (Text) o;
-    if (this.getLength() != that.getLength()) {
-      return false;
-    }
-
-    // copied from Arrays.equals so we don'thave to copy the byte arrays
-    for (int i = 0; i < length; i++) {
-      if (bytes[i] != that.bytes[i]) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  /**
-   * Copied from Arrays.hashCode so we don't have to copy the byte array.
-   *
-   * @return hashCode
-   */
-  @Override
-  public int hashCode() {
-    if (bytes == null) {
-      return 0;
-    }
-
-    int result = 1;
-    for (int i = 0; i < length; i++) {
-      result = 31 * result + bytes[i];
-    }
-
-    return result;
+    return super.equals(o);
   }
 
   // / STATIC UTILITIES FROM HERE DOWN
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
 
b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
index c413f4e23e..b9cd89e4ad 100644
--- 
a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
@@ -24,6 +24,7 @@ import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
 import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
+import org.apache.arrow.vector.util.ReusableByteArray;
 import org.apache.arrow.vector.util.TransferPair;
 import org.junit.After;
 import org.junit.Before;
@@ -286,4 +287,19 @@ public class TestFixedSizeBinaryVector {
     // Field inside a new vector created by reusing a field should be the same 
in memory as the original field.
     assertSame(fromVector.getField(), toVector.getField());
   }
+
+  @Test
+  public void testGetBytesRepeatedly() {
+    for (int i = 0; i < numValues; i++) {
+      vector.set(i, values[i]);
+    }
+    vector.setValueCount(numValues);
+
+    ReusableByteArray reusableByteArray = new ReusableByteArray();
+    for (int i = 0; i < numValues; i++) {
+      // verify results
+      vector.read(i, reusableByteArray);
+      assertArrayEquals(values[i], reusableByteArray.getBuffer());
+    }
+  }
 }
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
 
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
index ce7bb15bb1..ecababde8d 100644
--- 
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
@@ -17,14 +17,18 @@
 
 package org.apache.arrow.vector;
 
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 
+import java.util.Arrays;
+
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder;
+import org.apache.arrow.vector.util.ReusableByteArray;
 import org.apache.arrow.vector.util.TransferPair;
 import org.junit.After;
 import org.junit.Before;
@@ -56,21 +60,20 @@ public class TestLargeVarBinaryVector {
       binHolder.isSet = 1;
 
       String str = "hello";
-      ArrowBuf buf = allocator.buffer(16);
-      buf.setBytes(0, str.getBytes());
-
-      binHolder.start = 0;
-      binHolder.end = str.length();
-      binHolder.buffer = buf;
+      try (ArrowBuf buf = allocator.buffer(16)) {
+        buf.setBytes(0, str.getBytes());
 
-      vector.set(0, nullHolder);
-      vector.set(1, binHolder);
+        binHolder.start = 0;
+        binHolder.end = str.length();
+        binHolder.buffer = buf;
 
-      // verify results
-      assertTrue(vector.isNull(0));
-      assertEquals(str, new String(vector.get(1)));
+        vector.set(0, nullHolder);
+        vector.set(1, binHolder);
 
-      buf.close();
+        // verify results
+        assertTrue(vector.isNull(0));
+        assertEquals(str, new String(vector.get(1)));
+      }
     }
   }
 
@@ -86,21 +89,46 @@ public class TestLargeVarBinaryVector {
       binHolder.isSet = 1;
 
       String str = "hello world";
-      ArrowBuf buf = allocator.buffer(16);
-      buf.setBytes(0, str.getBytes());
+      try (ArrowBuf buf = allocator.buffer(16)) {
+        buf.setBytes(0, str.getBytes());
 
-      binHolder.start = 0;
-      binHolder.end = str.length();
-      binHolder.buffer = buf;
+        binHolder.start = 0;
+        binHolder.end = str.length();
+        binHolder.buffer = buf;
 
-      vector.setSafe(0, binHolder);
-      vector.setSafe(1, nullHolder);
+        vector.setSafe(0, binHolder);
+        vector.setSafe(1, nullHolder);
 
-      // verify results
-      assertEquals(str, new String(vector.get(0)));
-      assertTrue(vector.isNull(1));
+        // verify results
+        assertEquals(str, new String(vector.get(0)));
+        assertTrue(vector.isNull(1));
+      }
+    }
+  }
 
-      buf.close();
+  @Test
+  public void testGetBytesRepeatedly() {
+    try (LargeVarBinaryVector vector = new LargeVarBinaryVector("", 
allocator)) {
+      vector.allocateNew(5, 1);
+
+      final String str = "hello world";
+      final String str2 = "foo";
+      vector.setSafe(0, str.getBytes());
+      vector.setSafe(1, str2.getBytes());
+
+      // verify results
+      ReusableByteArray reusableByteArray = new ReusableByteArray();
+      vector.read(0, reusableByteArray);
+      byte[] oldBuffer = reusableByteArray.getBuffer();
+      assertArrayEquals(str.getBytes(), 
Arrays.copyOfRange(reusableByteArray.getBuffer(),
+          0, (int) reusableByteArray.getLength()));
+
+      vector.read(1, reusableByteArray);
+      assertArrayEquals(str2.getBytes(), 
Arrays.copyOfRange(reusableByteArray.getBuffer(),
+          0, (int) reusableByteArray.getLength()));
+
+      // There should not have been any reallocation since the newer value is 
smaller in length.
+      assertSame(oldBuffer, reusableByteArray.getBuffer());
     }
   }
 
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
index 5f7863c6f6..7d074c3936 100644
--- 
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
@@ -38,6 +38,7 @@ import 
org.apache.arrow.vector.testing.ValueVectorDataPopulator;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.Text;
 import org.apache.arrow.vector.util.TransferPair;
 import org.junit.After;
 import org.junit.Assert;
@@ -795,6 +796,26 @@ public class TestLargeVarCharVector {
     }
   }
 
+  @Test
+  public void testGetTextRepeatedly() {
+    try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", 
allocator)) {
+
+      ValueVectorDataPopulator.setVector(vector, STR1, STR2);
+      vector.setValueCount(2);
+
+      /* check the vector output */
+      Text text = new Text();
+      vector.read(0, text);
+      byte[] result = new byte[(int) text.getLength()];
+      System.arraycopy(text.getBytes(), 0, result, 0, (int) text.getLength());
+      assertArrayEquals(STR1, result);
+      vector.read(1, text);
+      result = new byte[(int) text.getLength()];
+      System.arraycopy(text.getBytes(), 0, result, 0, (int) text.getLength());
+      assertArrayEquals(STR2, text.getBytes());
+    }
+  }
+
   @Test
   public void testGetTransferPairWithField() {
     try (BufferAllocator childAllocator1 = 
allocator.newChildAllocator("child1", 1000000, 1000000);
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 0928d3eb03..fb96870804 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -25,6 +25,7 @@ import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 
 import java.nio.ByteBuffer;
@@ -56,6 +57,7 @@ import org.apache.arrow.vector.holders.NullableUInt4Holder;
 import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
 import org.apache.arrow.vector.holders.NullableVarCharHolder;
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
 import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -63,6 +65,7 @@ import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.ReusableByteArray;
 import org.apache.arrow.vector.util.Text;
 import org.apache.arrow.vector.util.TransferPair;
 import org.junit.After;
@@ -1107,6 +1110,22 @@ public class TestValueVector {
     }
   }
 
+  @Test
+  public void testGetTextRepeatedly() {
+    try (final VarCharVector vector = new VarCharVector("myvector", 
allocator)) {
+
+      ValueVectorDataPopulator.setVector(vector, STR1, STR2);
+      vector.setValueCount(2);
+
+      /* check the vector output */
+      Text text = new Text();
+      vector.read(0, text);
+      assertArrayEquals(STR1, text.getBytes());
+      vector.read(1, text);
+      assertArrayEquals(STR2, text.getBytes());
+    }
+  }
+
   @Test /* VarBinaryVector */
   public void testNullableVarType2() {
 
@@ -1156,6 +1175,31 @@ public class TestValueVector {
     }
   }
 
+  @Test
+  public void testGetBytesRepeatedly() {
+    try (VarBinaryVector vector = new VarBinaryVector("", allocator)) {
+      vector.allocateNew(5, 1);
+
+      final String str = "hello world";
+      final String str2 = "foo";
+      vector.setSafe(0, str.getBytes());
+      vector.setSafe(1, str2.getBytes());
+
+      // verify results
+      ReusableByteArray reusableByteArray = new ReusableByteArray();
+      vector.read(0, reusableByteArray);
+      assertArrayEquals(str.getBytes(), 
Arrays.copyOfRange(reusableByteArray.getBuffer(),
+          0, (int) reusableByteArray.getLength()));
+      byte[] oldBuffer = reusableByteArray.getBuffer();
+
+      vector.read(1, reusableByteArray);
+      assertArrayEquals(str2.getBytes(), 
Arrays.copyOfRange(reusableByteArray.getBuffer(),
+          0, (int) reusableByteArray.getLength()));
+
+      // There should not have been any reallocation since the newer value is 
smaller in length.
+      assertSame(oldBuffer, reusableByteArray.getBuffer());
+    }
+  }
 
   /*
    * generic tests
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java
 
b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java
new file mode 100644
index 0000000000..b11aa5638d
--- /dev/null
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+import java.util.Base64;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseValueVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestReusableByteArray {
+
+  private BufferAllocator allocator;
+
+  @Before
+  public void prepare() {
+    // Permit allocating 4 vectors of max size.
+    allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE);
+  }
+
+  @After
+  public void shutdown() {
+    allocator.close();
+  }
+
+  @Test
+  public void testSetByteArrayRepeatedly() {
+    ReusableByteArray byteArray = new ReusableByteArray();
+    try (ArrowBuf workingBuf = allocator.buffer(100)) {
+      final String str = "test";
+      workingBuf.setBytes(0, str.getBytes());
+      byteArray.set(workingBuf, 0, str.getBytes().length);
+      assertEquals(str.getBytes().length, byteArray.getLength());
+      assertArrayEquals(str.getBytes(), 
Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength()));
+      assertEquals(Base64.getEncoder().encodeToString(str.getBytes()), 
byteArray.toString());
+      assertEquals(new ReusableByteArray(str.getBytes()), byteArray);
+      assertEquals(new ReusableByteArray(str.getBytes()).hashCode(), 
byteArray.hashCode());
+
+      // Test a longer string. Should require reallocation.
+      final String str2 = "test_longer";
+      byte[] oldBuffer = byteArray.getBuffer();
+      workingBuf.clear();
+      workingBuf.setBytes(0, str2.getBytes());
+      byteArray.set(workingBuf, 0, str2.getBytes().length);
+      assertEquals(str2.getBytes().length, byteArray.getLength());
+      assertArrayEquals(str2.getBytes(), 
Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength()));
+      assertEquals(Base64.getEncoder().encodeToString(str2.getBytes()), 
byteArray.toString());
+      assertEquals(new ReusableByteArray(str2.getBytes()), byteArray);
+      assertEquals(new ReusableByteArray(str2.getBytes()).hashCode(), 
byteArray.hashCode());
+
+      // Verify reallocation needed.
+      assertNotSame(oldBuffer, byteArray.getBuffer());
+      assertTrue(byteArray.getBuffer().length > oldBuffer.length);
+
+      // Test writing a shorter string. Should not require reallocation.
+      final String str3 = "short";
+      oldBuffer = byteArray.getBuffer();
+      workingBuf.clear();
+      workingBuf.setBytes(0, str3.getBytes());
+      byteArray.set(workingBuf, 0, str3.getBytes().length);
+      assertEquals(str3.getBytes().length, byteArray.getLength());
+      assertArrayEquals(str3.getBytes(), 
Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength()));
+      assertEquals(Base64.getEncoder().encodeToString(str3.getBytes()), 
byteArray.toString());
+      assertEquals(new ReusableByteArray(str3.getBytes()), byteArray);
+      assertEquals(new ReusableByteArray(str3.getBytes()).hashCode(), 
byteArray.hashCode());
+
+      // Verify reallocation was not needed.
+      assertSame(oldBuffer, byteArray.getBuffer());
+    }
+  }
+}

Reply via email to