This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6f8f34bd34 GH-38254: [Java] Add reusable buffer getters to char/binary
vectors (#38266)
6f8f34bd34 is described below
commit 6f8f34bd344c5cdf158aacb0215c3409d2996c5e
Author: James Duong <[email protected]>
AuthorDate: Mon Oct 23 09:45:26 2023 -0700
GH-38254: [Java] Add reusable buffer getters to char/binary vectors (#38266)
### Rationale for this change
Provide a way for a user to reuse a buffer when iterating over
byte-array-based ValueVectors to avoid excessive
reallocations.
### What changes are included in this PR?
Add a reusable buffer interface that can be populated by character and
binary vectors to avoid allocations when consuming vector content.
Optimize getObject() on VarCharVector/LargeVarCharVector to avoid an extra
allocation of a byte array (copy from ArrowBuf directly to the resulting Text).
### Are these changes tested?
### Are there any user-facing changes?
Yes.
**This PR includes breaking changes to public APIs.**
* Closes: #38254
Authored-by: James Duong <[email protected]>
Signed-off-by: David Li <[email protected]>
---
.../text/ArrowFlightJdbcVarCharVectorAccessor.java | 2 +-
.../org/apache/arrow/memory/ReusableBuffer.java | 47 ++++++++
.../arrow/vector/BaseLargeVariableWidthVector.java | 33 +++---
.../arrow/vector/BaseVariableWidthVector.java | 27 +++--
.../apache/arrow/vector/FixedSizeBinaryVector.java | 13 +++
.../apache/arrow/vector/LargeVarBinaryVector.java | 23 +++-
.../apache/arrow/vector/LargeVarCharVector.java | 39 +++++--
.../org/apache/arrow/vector/VarBinaryVector.java | 19 ++-
.../org/apache/arrow/vector/VarCharVector.java | 33 ++++--
.../arrow/vector/util/ReusableByteArray.java | 129 +++++++++++++++++++++
.../java/org/apache/arrow/vector/util/Text.java | 78 +------------
.../arrow/vector/TestFixedSizeBinaryVector.java | 16 +++
.../arrow/vector/TestLargeVarBinaryVector.java | 74 ++++++++----
.../arrow/vector/TestLargeVarCharVector.java | 21 ++++
.../org/apache/arrow/vector/TestValueVector.java | 44 +++++++
.../arrow/vector/util/TestReusableByteArray.java | 97 ++++++++++++++++
16 files changed, 540 insertions(+), 155 deletions(-)
diff --git
a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java
b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java
index aad8d9094c..d4075bbb75 100644
---
a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java
+++
b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java
@@ -200,7 +200,7 @@ public class ArrowFlightJdbcVarCharVectorAccessor extends
ArrowFlightJdbcAccesso
// Already in UTF-8
final Text textValue = new Text(value);
- return new ByteArrayInputStream(textValue.getBytes(), 0,
textValue.getLength());
+ return new ByteArrayInputStream(textValue.getBytes(), 0, (int)
textValue.getLength());
}
@Override
diff --git
a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java
b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java
new file mode 100644
index 0000000000..3530b819aa
--- /dev/null
+++
b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory;
+
+/**
+ * A lightweight, automatically expanding container for holding byte data.
+ * @param <T> The type of the underlying buffer.
+ */
+public interface ReusableBuffer<T> {
+ /**
+ * Get the number of valid bytes in the data.
+ *
+ * @return the number of valid bytes in the data
+ */
+ long getLength();
+
+ /**
+ * Get the buffer backing this ReusableBuffer.
+ */
+ T getBuffer();
+
+ /**
+ * Set the buffer to the contents of the given ArrowBuf.
+ * The internal buffer must resize if it cannot fit the contents
+ * of the data.
+ *
+ * @param srcBytes the data to copy from
+ * @param start the first position of the new data
+ * @param len the number of bytes of the new data
+ */
+ void set(ArrowBuf srcBytes, long start, long len);
+}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
index db922d6a70..fcac28bd08 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
@@ -180,8 +180,8 @@ public abstract class BaseLargeVariableWidthVector extends
BaseValueVector
if (valueCount == 0) {
return 0.0D;
}
- final long startOffset = offsetBuffer.getLong(0);
- final long endOffset = offsetBuffer.getLong((long) valueCount *
OFFSET_WIDTH);
+ final long startOffset = getStartOffset(0);
+ final long endOffset = getStartOffset(valueCount);
final double totalListSize = endOffset - startOffset;
return totalListSize / valueCount;
}
@@ -570,7 +570,7 @@ public abstract class BaseLargeVariableWidthVector extends
BaseValueVector
if (valueCount == 0) {
return 0;
}
- return capAtMaxInt(offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH));
+ return capAtMaxInt(getStartOffset(valueCount));
}
/**
@@ -598,7 +598,7 @@ public abstract class BaseLargeVariableWidthVector extends
BaseValueVector
final long validityBufferSize = getValidityBufferSizeFromCount(valueCount);
final long offsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH;
/* get the end offset for this valueCount */
- final long dataBufferSize = offsetBuffer.getLong((long) valueCount *
OFFSET_WIDTH);
+ final long dataBufferSize = getStartOffset(valueCount);
return capAtMaxInt(validityBufferSize + offsetBufferSize + dataBufferSize);
}
@@ -702,7 +702,7 @@ public abstract class BaseLargeVariableWidthVector extends
BaseValueVector
public abstract TransferPair getTransferPair(Field field, BufferAllocator
allocator);
/**
- * Transfer this vector'data to another vector. The memory associated
+ * Transfer this vector's data to another vector. The memory associated
* with this vector is transferred to the allocator of target vector
* for accounting and management purposes.
* @param target destination vector for transfer
@@ -752,12 +752,12 @@ public abstract class BaseLargeVariableWidthVector
extends BaseValueVector
* in the target vector.
*/
private void splitAndTransferOffsetBuffer(int startIndex, int length,
BaseLargeVariableWidthVector target) {
- final long start = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH);
- final long end = offsetBuffer.getLong((long) (startIndex + length) *
OFFSET_WIDTH);
+ final long start = getStartOffset(startIndex);
+ final long end = getStartOffset(startIndex + length);
final long dataLength = end - start;
target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH);
for (int i = 0; i < length + 1; i++) {
- final long relativeSourceOffset = offsetBuffer.getLong((long)
(startIndex + i) * OFFSET_WIDTH) - start;
+ final long relativeSourceOffset = getStartOffset(startIndex + i) - start;
target.offsetBuffer.setLong((long) i * OFFSET_WIDTH,
relativeSourceOffset);
}
final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength);
@@ -973,8 +973,7 @@ public abstract class BaseLargeVariableWidthVector extends
BaseValueVector
return 0;
}
final long startOffset = getStartOffset(index);
- final int dataLength =
- (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) -
startOffset);
+ final int dataLength = (int) (getEndOffset(index) - startOffset);
return dataLength;
}
@@ -1320,7 +1319,7 @@ public abstract class BaseLargeVariableWidthVector
extends BaseValueVector
final long length = end - start;
fillHoles(thisIndex);
BitVectorHelper.setBit(this.validityBuffer, thisIndex);
- final long copyStart = offsetBuffer.getLong((long) thisIndex *
OFFSET_WIDTH);
+ final long copyStart = getStartOffset(thisIndex);
from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, (int)
length);
offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart +
length);
}
@@ -1352,7 +1351,7 @@ public abstract class BaseLargeVariableWidthVector
extends BaseValueVector
handleSafe(thisIndex, length);
fillHoles(thisIndex);
BitVectorHelper.setBit(this.validityBuffer, thisIndex);
- final long copyStart = offsetBuffer.getLong((long) thisIndex *
OFFSET_WIDTH);
+ final long copyStart = getStartOffset(thisIndex);
from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart,
length);
offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart +
length);
}
@@ -1369,8 +1368,8 @@ public abstract class BaseLargeVariableWidthVector
extends BaseValueVector
if (isNull(index)) {
reuse.set(null, 0, 0);
} else {
- long offset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
- int length = (int) (offsetBuffer.getLong((long) (index + 1) *
OFFSET_WIDTH) - offset);
+ long offset = getStartOffset(index);
+ int length = (int) (getEndOffset(index) - offset);
reuse.set(valueBuffer, offset, length);
}
return reuse;
@@ -1387,7 +1386,7 @@ public abstract class BaseLargeVariableWidthVector
extends BaseValueVector
return ArrowBufPointer.NULL_HASH_CODE;
}
final long start = getStartOffset(index);
- final long end = getStartOffset(index + 1);
+ final long end = getEndOffset(index);
return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
}
@@ -1395,4 +1394,8 @@ public abstract class BaseLargeVariableWidthVector
extends BaseValueVector
public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
return visitor.visit(this, value);
}
+
+ protected final long getEndOffset(int index) {
+ return offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+ }
}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index b57dd93438..a0a5e085a5 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -199,8 +199,8 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
if (valueCount == 0) {
return 0.0D;
}
- final int startOffset = offsetBuffer.getInt(0);
- final int endOffset = offsetBuffer.getInt((long) valueCount *
OFFSET_WIDTH);
+ final int startOffset = getStartOffset(0);
+ final int endOffset = getStartOffset(valueCount);
final double totalListSize = endOffset - startOffset;
return totalListSize / valueCount;
}
@@ -791,8 +791,8 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
* in the target vector.
*/
private void splitAndTransferOffsetBuffer(int startIndex, int length,
BaseVariableWidthVector target) {
- final int start = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH);
- final int end = offsetBuffer.getInt((long) (startIndex + length) *
OFFSET_WIDTH);
+ final int start = getStartOffset(startIndex);
+ final int end = getStartOffset(startIndex + length);
final int dataLength = end - start;
if (start == 0) {
@@ -801,7 +801,7 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
} else {
target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH);
for (int i = 0; i < length + 1; i++) {
- final int relativeSourceOffset = offsetBuffer.getInt((long)
(startIndex + i) * OFFSET_WIDTH) - start;
+ final int relativeSourceOffset = getStartOffset(startIndex + i) -
start;
target.offsetBuffer.setInt((long) i * OFFSET_WIDTH,
relativeSourceOffset);
}
}
@@ -1032,8 +1032,7 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
return 0;
}
final int startOffset = getStartOffset(index);
- final int dataLength =
- offsetBuffer.getInt((index + 1) * OFFSET_WIDTH) - startOffset;
+ final int dataLength = getEndOffset(index) - startOffset;
return dataLength;
}
@@ -1238,7 +1237,7 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
handleSafe(index, length);
fillHoles(index);
BitVectorHelper.setBit(validityBuffer, index);
- final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ final int startOffset = getStartOffset(index);
offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset +
length);
final ArrowBuf bb = buffer.slice(start, length);
valueBuffer.setBytes(startOffset, bb);
@@ -1375,7 +1374,7 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
final int length = end - start;
fillHoles(thisIndex);
BitVectorHelper.setBit(this.validityBuffer, thisIndex);
- final int copyStart = offsetBuffer.getInt((long) thisIndex *
OFFSET_WIDTH);
+ final int copyStart = getStartOffset(thisIndex);
from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart,
length);
offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart +
length);
}
@@ -1398,7 +1397,7 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
handleSafe(thisIndex, 0);
fillHoles(thisIndex);
BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
- final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
+ final int copyStart = getStartOffset(thisIndex);
offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
} else {
final int start = from.getOffsetBuffer().getInt((long) fromIndex *
OFFSET_WIDTH);
@@ -1407,7 +1406,7 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
handleSafe(thisIndex, length);
fillHoles(thisIndex);
BitVectorHelper.setBit(this.validityBuffer, thisIndex);
- final int copyStart = offsetBuffer.getInt((long) thisIndex *
OFFSET_WIDTH);
+ final int copyStart = getStartOffset(thisIndex);
from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart,
length);
offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart +
length);
}
@@ -1424,8 +1423,8 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
if (isNull(index)) {
reuse.set(null, 0, 0);
} else {
- int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
- int length = offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) -
offset;
+ int offset = getStartOffset(index);
+ int length = getEndOffset(index) - offset;
reuse.set(valueBuffer, offset, length);
}
return reuse;
@@ -1442,7 +1441,7 @@ public abstract class BaseVariableWidthVector extends
BaseValueVector
return ArrowBufPointer.NULL_HASH_CODE;
}
final int start = getStartOffset(index);
- final int end = getStartOffset(index + 1);
+ final int end = getEndOffset(index);
return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
index 967d560d78..52c57e2234 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
@@ -21,6 +21,7 @@ import static
org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.complex.impl.FixedSizeBinaryReaderImpl;
import org.apache.arrow.vector.complex.reader.FieldReader;
@@ -116,6 +117,18 @@ public class FixedSizeBinaryVector extends
BaseFixedWidthVector {
return dst;
}
+ /**
+ * Read the value at the given position to the given output buffer.
+ * The caller is responsible for checking for nullity first.
+ *
+ * @param index position of element.
+ * @param buffer the buffer to write into.
+ */
+ public void read(int index, ReusableBuffer<?> buffer) {
+ final int startOffset = index * byteWidth;
+ buffer.set(valueBuffer, startOffset, byteWidth);
+ }
+
/**
* Get the element at the given index from the vector and
* sets the state in holder. If element at given index
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
index 6806b958da..0750f68f4f 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
@@ -18,6 +18,7 @@
package org.apache.arrow.vector;
import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
import org.apache.arrow.vector.complex.impl.LargeVarBinaryReaderImpl;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.holders.LargeVarBinaryHolder;
@@ -105,13 +106,25 @@ public final class LargeVarBinaryVector extends
BaseLargeVariableWidthVector {
return null;
}
final long startOffset = getStartOffset(index);
- final int dataLength =
- (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) -
startOffset);
- final byte[] result = new byte[dataLength];
- valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ final long dataLength = getEndOffset(index) - startOffset;
+ final byte[] result = new byte[(int) dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, (int) dataLength);
return result;
}
+ /**
+ * Read the value at the given position to the given output buffer.
+ * The caller is responsible for checking for nullity first.
+ *
+ * @param index position of element.
+ * @param buffer the buffer to write into.
+ */
+ public void read(int index, ReusableBuffer<?> buffer) {
+ final long startOffset = getStartOffset(index);
+ final long dataLength = getEndOffset(index) - startOffset;
+ buffer.set(valueBuffer, startOffset, dataLength);
+ }
+
/**
* Get the variable length element at specified index as Text.
*
@@ -137,7 +150,7 @@ public final class LargeVarBinaryVector extends
BaseLargeVariableWidthVector {
}
holder.isSet = 1;
holder.start = getStartOffset(index);
- holder.end = offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+ holder.end = getEndOffset(index);
holder.buffer = valueBuffer;
}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
index 874079a0ef..6f08fcb81f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
@@ -17,7 +17,10 @@
package org.apache.arrow.vector;
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
import org.apache.arrow.vector.complex.impl.LargeVarCharReaderImpl;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.holders.LargeVarCharHolder;
@@ -106,10 +109,9 @@ public final class LargeVarCharVector extends
BaseLargeVariableWidthVector {
return null;
}
final long startOffset = getStartOffset(index);
- final int dataLength =
- (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) -
startOffset);
- final byte[] result = new byte[dataLength];
- valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ final long dataLength = getEndOffset(index) - startOffset;
+ final byte[] result = new byte[(int) dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, (int) dataLength);
return result;
}
@@ -120,12 +122,27 @@ public final class LargeVarCharVector extends
BaseLargeVariableWidthVector {
* @return Text object for non-null element, null otherwise
*/
public Text getObject(int index) {
- byte[] b = get(index);
- if (b == null) {
+ assert index >= 0;
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
return null;
- } else {
- return new Text(b);
}
+
+ final Text result = new Text();
+ read(index, result);
+ return result;
+ }
+
+ /**
+ * Read the value at the given position to the given output buffer.
+ * The caller is responsible for checking for nullity first.
+ *
+ * @param index position of element.
+ * @param buffer the buffer to write into.
+ */
+ public void read(int index, ReusableBuffer<?> buffer) {
+ final long startOffset = getStartOffset(index);
+ final long dataLength = getEndOffset(index) - startOffset;
+ buffer.set(valueBuffer, startOffset, dataLength);
}
/**
@@ -143,7 +160,7 @@ public final class LargeVarCharVector extends
BaseLargeVariableWidthVector {
}
holder.isSet = 1;
holder.start = getStartOffset(index);
- holder.end = offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+ holder.end = getEndOffset(index);
holder.buffer = valueBuffer;
}
@@ -247,7 +264,7 @@ public final class LargeVarCharVector extends
BaseLargeVariableWidthVector {
* @param text Text object with data
*/
public void set(int index, Text text) {
- set(index, text.getBytes(), 0, text.getLength());
+ set(index, text.getBytes(), 0, (int) text.getLength());
}
/**
@@ -259,7 +276,7 @@ public final class LargeVarCharVector extends
BaseLargeVariableWidthVector {
* @param text Text object with data
*/
public void setSafe(int index, Text text) {
- setSafe(index, text.getBytes(), 0, text.getLength());
+ setSafe(index, text.getBytes(), 0, (int) text.getLength());
}
@Override
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
index b43cd33d05..87790c1168 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
@@ -20,6 +20,7 @@ package org.apache.arrow.vector;
import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
import org.apache.arrow.vector.complex.impl.VarBinaryReaderImpl;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
@@ -106,13 +107,25 @@ public final class VarBinaryVector extends
BaseVariableWidthVector {
return null;
}
final int startOffset = getStartOffset(index);
- final int dataLength =
- offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) -
startOffset;
+ final int dataLength = getEndOffset(index) - startOffset;
final byte[] result = new byte[dataLength];
valueBuffer.getBytes(startOffset, result, 0, dataLength);
return result;
}
+ /**
+ * Read the value at the given position to the given output buffer.
+ * The caller is responsible for checking for nullity first.
+ *
+ * @param index position of element.
+ * @param buffer the buffer to write into.
+ */
+ public void read(int index, ReusableBuffer<?> buffer) {
+ final int startOffset = getStartOffset(index);
+ final int dataLength = getEndOffset(index) - startOffset;
+ buffer.set(valueBuffer, startOffset, dataLength);
+ }
+
/**
* Get the variable length element at specified index as Text.
*
@@ -138,7 +151,7 @@ public final class VarBinaryVector extends
BaseVariableWidthVector {
}
holder.isSet = 1;
holder.start = getStartOffset(index);
- holder.end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ holder.end = getEndOffset(index);
holder.buffer = valueBuffer;
}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
index 9ac275f75a..7350dc99bb 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
@@ -20,6 +20,7 @@ package org.apache.arrow.vector;
import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReusableBuffer;
import org.apache.arrow.vector.complex.impl.VarCharReaderImpl;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.holders.NullableVarCharHolder;
@@ -105,8 +106,7 @@ public final class VarCharVector extends
BaseVariableWidthVector {
return null;
}
final int startOffset = getStartOffset(index);
- final int dataLength =
- offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) -
startOffset;
+ final int dataLength = getEndOffset(index) - startOffset;
final byte[] result = new byte[dataLength];
valueBuffer.getBytes(startOffset, result, 0, dataLength);
return result;
@@ -119,12 +119,27 @@ public final class VarCharVector extends
BaseVariableWidthVector {
* @return Text object for non-null element, null otherwise
*/
public Text getObject(int index) {
- byte[] b = get(index);
- if (b == null) {
+ assert index >= 0;
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
return null;
- } else {
- return new Text(b);
}
+
+ final Text result = new Text();
+ read(index, result);
+ return result;
+ }
+
+ /**
+ * Read the value at the given position to the given output buffer.
+ * The caller is responsible for checking for nullity first.
+ *
+ * @param index position of element.
+ * @param buffer the buffer to write into.
+ */
+ public void read(int index, ReusableBuffer<?> buffer) {
+ final int startOffset = getStartOffset(index);
+ final int dataLength = getEndOffset(index) - startOffset;
+ buffer.set(valueBuffer, startOffset, dataLength);
}
/**
@@ -142,7 +157,7 @@ public final class VarCharVector extends
BaseVariableWidthVector {
}
holder.isSet = 1;
holder.start = getStartOffset(index);
- holder.end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ holder.end = getEndOffset(index);
holder.buffer = valueBuffer;
}
@@ -247,7 +262,7 @@ public final class VarCharVector extends
BaseVariableWidthVector {
* @param text Text object with data
*/
public void set(int index, Text text) {
- set(index, text.getBytes(), 0, text.getLength());
+ set(index, text.getBytes(), 0, (int) text.getLength());
}
/**
@@ -259,7 +274,7 @@ public final class VarCharVector extends
BaseVariableWidthVector {
* @param text Text object with data
*/
public void setSafe(int index, Text text) {
- setSafe(index, text.getBytes(), 0, text.getLength());
+ setSafe(index, text.getBytes(), 0, (int) text.getLength());
}
@Override
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java
b/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java
new file mode 100644
index 0000000000..d938cd833a
--- /dev/null
+++
b/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Arrays;
+import java.util.Base64;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.ReusableBuffer;
+
+/**
+ * A wrapper around byte arrays for repeated writing.
+ */
+public class ReusableByteArray implements ReusableBuffer<byte[]> {
+
+ protected static final byte[] EMPTY_BYTES = new byte[0];
+
+ protected byte[] bytes;
+ protected int length;
+
+ public ReusableByteArray() {
+ bytes = EMPTY_BYTES;
+ }
+
+ public ReusableByteArray(byte[] data) {
+ bytes = Arrays.copyOfRange(data, 0, data.length);
+ length = data.length;
+ }
+
+ /**
+ * Get the number of bytes in the byte array.
+ *
+ * @return the number of bytes in the byte array
+ */
+ @Override
+ public long getLength() {
+ return length;
+ }
+
+ @Override
+ public byte[] getBuffer() {
+ return bytes;
+ }
+
+ @Override
+ public void set(ArrowBuf srcBytes, long start, long len) {
+ setCapacity((int) len, false);
+ srcBytes.getBytes(start, bytes, 0, (int) len);
+ length = (int) len;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) {
+ return true;
+ } else if (o == null) {
+ return false;
+ }
+ if (!(o instanceof ReusableByteArray)) {
+ return false;
+ }
+
+ final ReusableByteArray that = (ReusableByteArray) o;
+ if (this.getLength() != that.getLength()) {
+ return false;
+ }
+
+ for (int i = 0; i < length; i++) {
+ if (bytes[i] != that.bytes[i]) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ if (bytes == null) {
+ return 0;
+ }
+
+ int result = 1;
+ for (int i = 0; i < length; i++) {
+ result = 31 * result + bytes[i];
+ }
+
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return Base64.getEncoder().encodeToString(Arrays.copyOfRange(bytes, 0,
length));
+ }
+
+ /**
+ * Sets the capacity of this object to <em>at least</em> <code>len</code>
bytes. If the
+ * current buffer is longer, then the capacity and existing content of the
buffer are unchanged.
+ * If <code>len</code> is larger than the current capacity, the Text
object's capacity is
+ * increased to match.
+ *
+ * @param len the number of bytes we need
+ * @param keepData should the old data be kept
+ */
+ protected void setCapacity(int len, boolean keepData) {
+ if (bytes == null || bytes.length < len) {
+ if (bytes != null && keepData) {
+ bytes = Arrays.copyOf(bytes, Math.max(len, length << 1));
+ } else {
+ bytes = new byte[len];
+ }
+ }
+ }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
index 778af0ca95..5f5f5d3bd6 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
@@ -29,7 +29,6 @@ import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
-import java.util.Arrays;
import java.util.Optional;
import com.fasterxml.jackson.core.JsonGenerationException;
@@ -43,7 +42,7 @@ import com.fasterxml.jackson.databind.ser.std.StdSerializer;
* Lifted from Hadoop 2.7.1
*/
@JsonSerialize(using = Text.TextSerializer.class)
-public class Text {
+public class Text extends ReusableByteArray {
private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
new ThreadLocal<CharsetEncoder>() {
@@ -65,13 +64,9 @@ public class Text {
}
};
- private static final byte[] EMPTY_BYTES = new byte[0];
-
- private byte[] bytes;
- private int length;
public Text() {
- bytes = EMPTY_BYTES;
+ super();
}
/**
@@ -123,15 +118,6 @@ public class Text {
return bytes;
}
- /**
- * Get the number of bytes in the byte array.
- *
- * @return the number of bytes in the byte array
- */
- public int getLength() {
- return length;
- }
-
/**
* Returns the Unicode Scalar Value (32-bit integer value) for the character
at
* <code>position</code>. Note that this method avoids using the converter
or doing String
@@ -238,7 +224,7 @@ public class Text {
* @param other the text to initialize from
*/
public void set(Text other) {
- set(other.getBytes(), 0, other.getLength());
+ set(other.getBytes(), 0, (int) other.getLength());
}
/**
@@ -278,25 +264,6 @@ public class Text {
length = 0;
}
- /**
- * Sets the capacity of this Text object to <em>at least</em>
<code>len</code> bytes. If the
- * current buffer is longer, then the capacity and existing content of the
buffer are unchanged.
- * If <code>len</code> is larger than the current capacity, the Text
object's capacity is
- * increased to match.
- *
- * @param len the number of bytes we need
- * @param keepData should the old data be kept
- */
- private void setCapacity(int len, boolean keepData) {
- if (bytes == null || bytes.length < len) {
- if (bytes != null && keepData) {
- bytes = Arrays.copyOf(bytes, Math.max(len, length << 1));
- } else {
- bytes = new byte[len];
- }
- }
- }
-
@Override
public String toString() {
try {
@@ -322,47 +289,10 @@ public class Text {
@Override
public boolean equals(Object o) {
- if (o == this) {
- return true;
- } else if (o == null) {
- return false;
- }
if (!(o instanceof Text)) {
return false;
}
-
- final Text that = (Text) o;
- if (this.getLength() != that.getLength()) {
- return false;
- }
-
- // copied from Arrays.equals so we don'thave to copy the byte arrays
- for (int i = 0; i < length; i++) {
- if (bytes[i] != that.bytes[i]) {
- return false;
- }
- }
-
- return true;
- }
-
- /**
- * Copied from Arrays.hashCode so we don't have to copy the byte array.
- *
- * @return hashCode
- */
- @Override
- public int hashCode() {
- if (bytes == null) {
- return 0;
- }
-
- int result = 1;
- for (int i = 0; i < length; i++) {
- result = 31 * result + bytes[i];
- }
-
- return result;
+ return super.equals(o);
}
// / STATIC UTILITIES FROM HERE DOWN
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
index c413f4e23e..b9cd89e4ad 100644
---
a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
+++
b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
@@ -24,6 +24,7 @@ import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
+import org.apache.arrow.vector.util.ReusableByteArray;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.After;
import org.junit.Before;
@@ -286,4 +287,19 @@ public class TestFixedSizeBinaryVector {
// Field inside a new vector created by reusing a field should be the same
in memory as the original field.
assertSame(fromVector.getField(), toVector.getField());
}
+
+ @Test
+ public void testGetBytesRepeatedly() {
+ for (int i = 0; i < numValues; i++) {
+ vector.set(i, values[i]);
+ }
+ vector.setValueCount(numValues);
+
+ ReusableByteArray reusableByteArray = new ReusableByteArray();
+ for (int i = 0; i < numValues; i++) {
+ // verify results
+ vector.read(i, reusableByteArray);
+ assertArrayEquals(values[i], reusableByteArray.getBuffer());
+ }
+ }
}
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
index ce7bb15bb1..ecababde8d 100644
---
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
+++
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
@@ -17,14 +17,18 @@
package org.apache.arrow.vector;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
+import java.util.Arrays;
+
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder;
+import org.apache.arrow.vector.util.ReusableByteArray;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.After;
import org.junit.Before;
@@ -56,21 +60,20 @@ public class TestLargeVarBinaryVector {
binHolder.isSet = 1;
String str = "hello";
- ArrowBuf buf = allocator.buffer(16);
- buf.setBytes(0, str.getBytes());
-
- binHolder.start = 0;
- binHolder.end = str.length();
- binHolder.buffer = buf;
+ try (ArrowBuf buf = allocator.buffer(16)) {
+ buf.setBytes(0, str.getBytes());
- vector.set(0, nullHolder);
- vector.set(1, binHolder);
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
- // verify results
- assertTrue(vector.isNull(0));
- assertEquals(str, new String(vector.get(1)));
+ vector.set(0, nullHolder);
+ vector.set(1, binHolder);
- buf.close();
+ // verify results
+ assertTrue(vector.isNull(0));
+ assertEquals(str, new String(vector.get(1)));
+ }
}
}
@@ -86,21 +89,46 @@ public class TestLargeVarBinaryVector {
binHolder.isSet = 1;
String str = "hello world";
- ArrowBuf buf = allocator.buffer(16);
- buf.setBytes(0, str.getBytes());
+ try (ArrowBuf buf = allocator.buffer(16)) {
+ buf.setBytes(0, str.getBytes());
- binHolder.start = 0;
- binHolder.end = str.length();
- binHolder.buffer = buf;
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
- vector.setSafe(0, binHolder);
- vector.setSafe(1, nullHolder);
+ vector.setSafe(0, binHolder);
+ vector.setSafe(1, nullHolder);
- // verify results
- assertEquals(str, new String(vector.get(0)));
- assertTrue(vector.isNull(1));
+ // verify results
+ assertEquals(str, new String(vector.get(0)));
+ assertTrue(vector.isNull(1));
+ }
+ }
+ }
- buf.close();
+ @Test
+ public void testGetBytesRepeatedly() {
+ try (LargeVarBinaryVector vector = new LargeVarBinaryVector("",
allocator)) {
+ vector.allocateNew(5, 1);
+
+ final String str = "hello world";
+ final String str2 = "foo";
+ vector.setSafe(0, str.getBytes());
+ vector.setSafe(1, str2.getBytes());
+
+ // verify results
+ ReusableByteArray reusableByteArray = new ReusableByteArray();
+ vector.read(0, reusableByteArray);
+ byte[] oldBuffer = reusableByteArray.getBuffer();
+ assertArrayEquals(str.getBytes(),
Arrays.copyOfRange(reusableByteArray.getBuffer(),
+ 0, (int) reusableByteArray.getLength()));
+
+ vector.read(1, reusableByteArray);
+ assertArrayEquals(str2.getBytes(),
Arrays.copyOfRange(reusableByteArray.getBuffer(),
+ 0, (int) reusableByteArray.getLength()));
+
+ // There should not have been any reallocation since the newer value is
smaller in length.
+ assertSame(oldBuffer, reusableByteArray.getBuffer());
}
}
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
index 5f7863c6f6..7d074c3936 100644
---
a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
+++
b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
@@ -38,6 +38,7 @@ import
org.apache.arrow.vector.testing.ValueVectorDataPopulator;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.Text;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.After;
import org.junit.Assert;
@@ -795,6 +796,26 @@ public class TestLargeVarCharVector {
}
}
+ @Test
+ public void testGetTextRepeatedly() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("myvector",
allocator)) {
+
+ ValueVectorDataPopulator.setVector(vector, STR1, STR2);
+ vector.setValueCount(2);
+
+ /* check the vector output */
+ Text text = new Text();
+ vector.read(0, text);
+ byte[] result = new byte[(int) text.getLength()];
+ System.arraycopy(text.getBytes(), 0, result, 0, (int) text.getLength());
+ assertArrayEquals(STR1, result);
+ vector.read(1, text);
+ result = new byte[(int) text.getLength()];
+ System.arraycopy(text.getBytes(), 0, result, 0, (int) text.getLength());
+ assertArrayEquals(STR2, text.getBytes());
+ }
+ }
+
@Test
public void testGetTransferPairWithField() {
try (BufferAllocator childAllocator1 =
allocator.newChildAllocator("child1", 1000000, 1000000);
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 0928d3eb03..fb96870804 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -25,6 +25,7 @@ import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import java.nio.ByteBuffer;
@@ -56,6 +57,7 @@ import org.apache.arrow.vector.holders.NullableUInt4Holder;
import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
import org.apache.arrow.vector.holders.NullableVarCharHolder;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
import org.apache.arrow.vector.types.Types;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -63,6 +65,7 @@ import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.ReusableByteArray;
import org.apache.arrow.vector.util.Text;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.After;
@@ -1107,6 +1110,22 @@ public class TestValueVector {
}
}
+ @Test
+ public void testGetTextRepeatedly() {
+ try (final VarCharVector vector = new VarCharVector("myvector",
allocator)) {
+
+ ValueVectorDataPopulator.setVector(vector, STR1, STR2);
+ vector.setValueCount(2);
+
+ /* check the vector output */
+ Text text = new Text();
+ vector.read(0, text);
+ assertArrayEquals(STR1, text.getBytes());
+ vector.read(1, text);
+ assertArrayEquals(STR2, text.getBytes());
+ }
+ }
+
@Test /* VarBinaryVector */
public void testNullableVarType2() {
@@ -1156,6 +1175,31 @@ public class TestValueVector {
}
}
+ @Test
+ public void testGetBytesRepeatedly() {
+ try (VarBinaryVector vector = new VarBinaryVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ final String str = "hello world";
+ final String str2 = "foo";
+ vector.setSafe(0, str.getBytes());
+ vector.setSafe(1, str2.getBytes());
+
+ // verify results
+ ReusableByteArray reusableByteArray = new ReusableByteArray();
+ vector.read(0, reusableByteArray);
+ assertArrayEquals(str.getBytes(),
Arrays.copyOfRange(reusableByteArray.getBuffer(),
+ 0, (int) reusableByteArray.getLength()));
+ byte[] oldBuffer = reusableByteArray.getBuffer();
+
+ vector.read(1, reusableByteArray);
+ assertArrayEquals(str2.getBytes(),
Arrays.copyOfRange(reusableByteArray.getBuffer(),
+ 0, (int) reusableByteArray.getLength()));
+
+ // There should not have been any reallocation since the newer value is
smaller in length.
+ assertSame(oldBuffer, reusableByteArray.getBuffer());
+ }
+ }
/*
* generic tests
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java
b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java
new file mode 100644
index 0000000000..b11aa5638d
--- /dev/null
+++
b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+import java.util.Base64;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseValueVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestReusableByteArray {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ // Permit allocating 4 vectors of max size.
+ allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testSetByteArrayRepeatedly() {
+ ReusableByteArray byteArray = new ReusableByteArray();
+ try (ArrowBuf workingBuf = allocator.buffer(100)) {
+ final String str = "test";
+ workingBuf.setBytes(0, str.getBytes());
+ byteArray.set(workingBuf, 0, str.getBytes().length);
+ assertEquals(str.getBytes().length, byteArray.getLength());
+ assertArrayEquals(str.getBytes(),
Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength()));
+ assertEquals(Base64.getEncoder().encodeToString(str.getBytes()),
byteArray.toString());
+ assertEquals(new ReusableByteArray(str.getBytes()), byteArray);
+ assertEquals(new ReusableByteArray(str.getBytes()).hashCode(),
byteArray.hashCode());
+
+ // Test a longer string. Should require reallocation.
+ final String str2 = "test_longer";
+ byte[] oldBuffer = byteArray.getBuffer();
+ workingBuf.clear();
+ workingBuf.setBytes(0, str2.getBytes());
+ byteArray.set(workingBuf, 0, str2.getBytes().length);
+ assertEquals(str2.getBytes().length, byteArray.getLength());
+ assertArrayEquals(str2.getBytes(),
Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength()));
+ assertEquals(Base64.getEncoder().encodeToString(str2.getBytes()),
byteArray.toString());
+ assertEquals(new ReusableByteArray(str2.getBytes()), byteArray);
+ assertEquals(new ReusableByteArray(str2.getBytes()).hashCode(),
byteArray.hashCode());
+
+ // Verify reallocation needed.
+ assertNotSame(oldBuffer, byteArray.getBuffer());
+ assertTrue(byteArray.getBuffer().length > oldBuffer.length);
+
+ // Test writing a shorter string. Should not require reallocation.
+ final String str3 = "short";
+ oldBuffer = byteArray.getBuffer();
+ workingBuf.clear();
+ workingBuf.setBytes(0, str3.getBytes());
+ byteArray.set(workingBuf, 0, str3.getBytes().length);
+ assertEquals(str3.getBytes().length, byteArray.getLength());
+ assertArrayEquals(str3.getBytes(),
Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength()));
+ assertEquals(Base64.getEncoder().encodeToString(str3.getBytes()),
byteArray.toString());
+ assertEquals(new ReusableByteArray(str3.getBytes()), byteArray);
+ assertEquals(new ReusableByteArray(str3.getBytes()).hashCode(),
byteArray.hashCode());
+
+ // Verify reallocation was not needed.
+ assertSame(oldBuffer, byteArray.getBuffer());
+ }
+ }
+}