This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-java.git
The following commit(s) were added to refs/heads/main by this push:
new db8b6aab GH-48: Implement VectorAppender for
BaseVariableWidthViewVector (#454)
db8b6aab is described below
commit db8b6aabd26d6526f10fed780a42791c755cac23
Author: ViggoC <[email protected]>
AuthorDate: Wed Jan 22 14:27:49 2025 +0800
GH-48: Implement VectorAppender for BaseVariableWidthViewVector (#454)
Fixes #48.
---
.../apache/arrow/vector/util/VectorAppender.java | 65 +++++++++++-
.../java/org/apache/arrow/vector/TestUtils.java | 10 ++
.../arrow/vector/TestVariableWidthViewVector.java | 41 +++-----
.../vector/testing/ValueVectorDataPopulator.java | 13 +++
.../arrow/vector/util/TestVectorAppender.java | 117 +++++++++++++++++++++
5 files changed, 218 insertions(+), 28 deletions(-)
diff --git
a/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
b/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
index e703571b..0dc96a4d 100644
--- a/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
+++ b/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
@@ -19,6 +19,8 @@ package org.apache.arrow.vector.util;
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
import java.util.HashSet;
+import java.util.List;
+import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.util.MemoryUtil;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.BaseFixedWidthVector;
@@ -91,7 +93,6 @@ public class VectorAppender implements
VectorVisitor<ValueVector, Void> {
deltaVector.getDataBuffer(),
deltaVector.getValueCount(),
targetVector.getDataBuffer());
-
} else {
MemoryUtil.copyMemory(
deltaVector.getDataBuffer().memoryAddress(),
@@ -247,8 +248,66 @@ public class VectorAppender implements
VectorVisitor<ValueVector, Void> {
}
@Override
- public ValueVector visit(BaseVariableWidthViewVector left, Void value) {
- throw new UnsupportedOperationException("View vectors are not supported.");
+ public ValueVector visit(BaseVariableWidthViewVector deltaVector, Void
value) {
+ Preconditions.checkArgument(
+ typeVisitor.equals(deltaVector),
+ "The targetVector to append must have the same type as the
targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // nothing to append, return
+ }
+
+ int oldTargetValueCount = targetVector.getValueCount();
+ int newValueCount = oldTargetValueCount + deltaVector.getValueCount();
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ // Do not call BaseVariableWidthViewVector#reAlloc() here,
+ // because reallocViewDataBuffer() is always unnecessary
+ ((BaseVariableWidthViewVector) targetVector).reallocValidityBuffer();
+ ((BaseVariableWidthViewVector) targetVector).reallocViewBuffer();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(),
+ oldTargetValueCount,
+ deltaVector.getValidityBuffer(),
+ deltaVector.getValueCount(),
+ targetVector.getValidityBuffer());
+
+ // append data buffers
+ BaseVariableWidthViewVector targetViewVector =
(BaseVariableWidthViewVector) targetVector;
+ List<ArrowBuf> targetDataBuffers = targetViewVector.getDataBuffers();
+ final int oldTargetDataBufferCount = targetDataBuffers.size();
+ List<ArrowBuf> deltaVectorDataBuffers = deltaVector.getDataBuffers();
+ deltaVectorDataBuffers.forEach(buf -> buf.getReferenceManager().retain());
+ targetDataBuffers.addAll(deltaVectorDataBuffers);
+
+ // append view buffer
+ ArrowBuf targetViewBuffer = targetVector.getDataBuffer();
+ MemoryUtil.copyMemory(
+ deltaVector.getDataBuffer().memoryAddress(),
+ targetViewBuffer.memoryAddress()
+ + (long) BaseVariableWidthViewVector.ELEMENT_SIZE *
oldTargetValueCount,
+ (long) BaseVariableWidthViewVector.ELEMENT_SIZE *
deltaVector.getValueCount());
+
+ // update view buffer
+ for (int i = oldTargetValueCount; i < newValueCount; i++) {
+ if (targetViewVector.isSet(i) > 0
+ && targetViewVector.getValueLength(i) >
BaseVariableWidthViewVector.INLINE_SIZE) {
+ long start =
+ (long) i * BaseVariableWidthViewVector.ELEMENT_SIZE
+ + BaseVariableWidthViewVector.LENGTH_WIDTH
+ + BaseVariableWidthViewVector.PREFIX_WIDTH;
+ // shift buf id
+ int bufferId = targetViewBuffer.getInt(start);
+ targetViewBuffer.setInt(start, bufferId + oldTargetDataBufferCount);
+ }
+ }
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
}
@Override
diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
b/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
index 3845652a..82295f80 100644
--- a/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
+++ b/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
@@ -16,6 +16,7 @@
*/
package org.apache.arrow.vector;
+import java.util.Random;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -52,4 +53,13 @@ public class TestUtils {
Class<T> c, String name, MinorType type, BufferAllocator allocator) {
return
c.cast(FieldType.nullable(type.getType()).createNewSingleVector(name,
allocator, null));
}
+
+ public static String generateRandomString(int length) {
+ Random random = new Random();
+ StringBuilder sb = new StringBuilder(length);
+ for (int i = 0; i < length; i++) {
+ sb.append(random.nextInt(10)); // 0-9
+ }
+ return sb.toString();
+ }
}
diff --git
a/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
b/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
index a4533dba..7a3a1bae 100644
---
a/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
+++
b/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
@@ -160,7 +160,7 @@ public class TestVariableWidthViewVector {
try (final ViewVarCharVector viewVarCharVector = new
ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(48, 4);
final int valueCount = 4;
- String str4 = generateRandomString(34);
+ String str4 = TestUtils.generateRandomString(34);
viewVarCharVector.set(0, STR1);
viewVarCharVector.set(1, STR2);
viewVarCharVector.set(2, STR3);
@@ -216,7 +216,7 @@ public class TestVariableWidthViewVector {
try (final ViewVarCharVector viewVarCharVector = new
ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(48, 4);
final int valueCount = 4;
- String str4 = generateRandomString(35);
+ String str4 = TestUtils.generateRandomString(35);
viewVarCharVector.set(0, STR1);
viewVarCharVector.set(1, STR2);
viewVarCharVector.set(2, STR3);
@@ -271,7 +271,7 @@ public class TestVariableWidthViewVector {
public void testSetSafe() {
try (final ViewVarCharVector viewVarCharVector = new
ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(1, 1);
- byte[] str6 = generateRandomString(40).getBytes();
+ byte[] str6 = TestUtils.generateRandomString(40).getBytes();
final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5,
str6);
// set data to a position out of capacity index
@@ -305,8 +305,8 @@ public class TestVariableWidthViewVector {
try (final ViewVarCharVector viewVarCharVector = new
ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(128, 6);
final int valueCount = 6;
- String str4 = generateRandomString(35);
- String str6 = generateRandomString(40);
+ String str4 = TestUtils.generateRandomString(35);
+ String str6 = TestUtils.generateRandomString(40);
viewVarCharVector.set(0, STR1);
viewVarCharVector.set(1, STR2);
viewVarCharVector.set(2, STR3);
@@ -405,7 +405,7 @@ public class TestVariableWidthViewVector {
setAndCheck(viewVarCharVector, i, strings.get(size - i - 1),
stringHolder);
}
- String longString = generateRandomString(128);
+ String longString = TestUtils.generateRandomString(128);
setAndCheck(viewVarCharVector, 6, longString.getBytes(), stringHolder);
}
}
@@ -441,7 +441,7 @@ public class TestVariableWidthViewVector {
setAndCheck(viewVarBinaryVector, i, strings.get(size - i - 1), holder);
}
- String longString = generateRandomString(128);
+ String longString = TestUtils.generateRandomString(128);
setAndCheck(viewVarBinaryVector, 6, longString.getBytes(), holder);
}
}
@@ -1169,7 +1169,7 @@ public class TestVariableWidthViewVector {
vector.setValueCount(5);
// overwrite index 2 with a long string
- String longString = generateRandomString(128);
+ String longString = TestUtils.generateRandomString(128);
byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8);
// since the append-only approach is used and the remaining capacity
// is not enough to store the new string; a new buffer will be allocated.
@@ -1373,7 +1373,7 @@ public class TestVariableWidthViewVector {
// since a new buffer is added to the dataBuffers
final ArrowBuf currentDataBuf = vector.dataBuffers.get(0);
final long remainingCapacity = currentDataBuf.capacity() -
currentDataBuf.writerIndex();
- String longerString = generateRandomString(35);
+ String longerString = TestUtils.generateRandomString(35);
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
assertTrue(remainingCapacity < longerStringBytes.length);
@@ -1406,7 +1406,7 @@ public class TestVariableWidthViewVector {
// the remaining capacity is enough to store in the same data buffer
final ArrowBuf currentDataBuf = vector.dataBuffers.get(0);
final long remainingCapacity = currentDataBuf.capacity() -
currentDataBuf.writerIndex();
- String longerString = generateRandomString(24);
+ String longerString = TestUtils.generateRandomString(24);
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
assertTrue(remainingCapacity > longerStringBytes.length);
@@ -1505,7 +1505,7 @@ public class TestVariableWidthViewVector {
vector.setValueCount(5);
// overwrite index 2 with a long string
- String longString = generateRandomString(128);
+ String longString = TestUtils.generateRandomString(128);
byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8);
vector.setSafe(2, longStringBytes);
@@ -1671,7 +1671,7 @@ public class TestVariableWidthViewVector {
vector.setSafe(2, STR7);
vector.setValueCount(3);
- String longerString = generateRandomString(35);
+ String longerString = TestUtils.generateRandomString(35);
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
vector.setSafe(1, longerStringBytes);
@@ -1697,7 +1697,7 @@ public class TestVariableWidthViewVector {
vector.setSafe(4, STR6);
vector.setValueCount(5);
- String longerString = generateRandomString(24);
+ String longerString = TestUtils.generateRandomString(24);
byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
vector.setSafe(2, longerStringBytes);
@@ -1869,7 +1869,7 @@ public class TestVariableWidthViewVector {
// to avoid re-allocation. This is to test copyFrom() without
re-allocation.
final int numberOfValues = initialCapacity / 2 /
ViewVarCharVector.ELEMENT_SIZE;
- final String prefixString = generateRandomString(12);
+ final String prefixString = TestUtils.generateRandomString(12);
for (int i = 0; i < numberOfValues; i++) {
if (i % 3 == 0) {
@@ -1965,7 +1965,7 @@ public class TestVariableWidthViewVector {
final int numberOfValues = initialCapacity /
ViewVarCharVector.ELEMENT_SIZE;
- final String prefixString = generateRandomString(12);
+ final String prefixString = TestUtils.generateRandomString(12);
for (int i = 0; i < numberOfValues; i++) {
if (i % 3 == 0) {
@@ -2746,7 +2746,7 @@ public class TestVariableWidthViewVector {
*/
@Test
public void testSplitAndTransferWithMultipleDataBuffers() {
- final String str4 = generateRandomString(35);
+ final String str4 = TestUtils.generateRandomString(35);
final byte[][] data = new byte[][] {STR1, STR2, STR3,
str4.getBytes(StandardCharsets.UTF_8)};
final int startIndex = 1;
final int length = 3;
@@ -2851,13 +2851,4 @@ public class TestVariableWidthViewVector {
}
}
}
-
- private String generateRandomString(int length) {
- Random random = new Random();
- StringBuilder sb = new StringBuilder(length);
- for (int i = 0; i < length; i++) {
- sb.append(random.nextInt(10)); // 0-9
- }
- return sb.toString();
- }
}
diff --git
a/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
b/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index f599dfa5..849fe6d6 100644
---
a/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++
b/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -60,6 +60,7 @@ import org.apache.arrow.vector.UInt8Vector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VariableWidthFieldVector;
+import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
@@ -606,6 +607,18 @@ public class ValueVectorDataPopulator {
vector.setValueCount(length);
}
+ /** Populate values for ViewVarCharVector. */
+ public static void setVector(ViewVarCharVector vector, String... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8));
+ }
+ }
+ vector.setValueCount(length);
+ }
+
/** Populate values for LargeVarCharVector. */
public static void setVector(LargeVarCharVector vector, String... values) {
final int length = values.length;
diff --git
a/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
b/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
index 19eafd1b..e1b3889d 100644
--- a/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
+++ b/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
@@ -24,16 +24,21 @@ import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BaseVariableWidthViewVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.TestUtils;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.compare.Range;
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
import org.apache.arrow.vector.compare.TypeEqualsVisitor;
@@ -171,6 +176,118 @@ public class TestVectorAppender {
}
}
+ @Test
+ public void testAppendVariableWidthViewVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+ try (ViewVarCharVector target = new ViewVarCharVector("", allocator);
+ ViewVarCharVector delta = new ViewVarCharVector("", allocator)) {
+ target.allocateNew(5, length1);
+ delta.allocateNew(5, length2);
+
+ ValueVectorDataPopulator.setVector(
+ target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+ ValueVectorDataPopulator.setVector(delta, "a10", "a11", "a12", "a13",
null);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (ViewVarCharVector expected = new ViewVarCharVector("expected",
allocator)) {
+ expected.allocateNew();
+ ValueVectorDataPopulator.setVector(
+ expected, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8",
"a9", "a10", "a11",
+ "a12", "a13", null);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyVariableWidthViewVector() {
+ try (ViewVarCharVector target = new ViewVarCharVector("", allocator);
+ ViewVarCharVector delta = new ViewVarCharVector("", allocator)) {
+ ValueVectorDataPopulator.setVector(
+ target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (ViewVarCharVector expected = new ViewVarCharVector("expected",
allocator)) {
+ ValueVectorDataPopulator.setVector(
+ expected, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8",
"a9");
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendShortLongVariableWidthViewVector() {
+ try (ViewVarCharVector target = new ViewVarCharVector("", allocator);
+ ViewVarCharVector delta = new ViewVarCharVector("", allocator)) {
+ String[] targetValues =
+ IntStream.range(-5, 5)
+ .mapToObj(
+ i ->
TestUtils.generateRandomString(BaseVariableWidthViewVector.INLINE_SIZE + i))
+ .toArray(String[]::new);
+ ValueVectorDataPopulator.setVector(target, targetValues);
+
+ String[] deltaValues =
+ IntStream.range(-3, 3)
+ .mapToObj(
+ i ->
TestUtils.generateRandomString(BaseVariableWidthViewVector.INLINE_SIZE + i))
+ .toArray(String[]::new);
+ ValueVectorDataPopulator.setVector(delta, deltaValues);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(2, target.getDataBuffers().size());
+ try (ViewVarCharVector expected = new ViewVarCharVector("expected",
allocator)) {
+ ValueVectorDataPopulator.setVector(
+ expected,
+ Stream.concat(Arrays.stream(targetValues),
Arrays.stream(deltaValues))
+ .toArray(String[]::new));
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendLongVariableWidthViewVector() {
+ try (ViewVarCharVector target = new ViewVarCharVector("", allocator);
+ ViewVarCharVector delta = new ViewVarCharVector("", allocator)) {
+
+ String[] targetValues = randomLongViewVarCharVector(target);
+ String[] deltaValues = randomLongViewVarCharVector(delta);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(4, target.getDataBuffers().size());
+ try (ViewVarCharVector expected = new ViewVarCharVector("expected",
allocator)) {
+ ValueVectorDataPopulator.setVector(
+ expected,
+ Stream.concat(Arrays.stream(targetValues),
Arrays.stream(deltaValues))
+ .toArray(String[]::new));
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ private static String[] randomLongViewVarCharVector(ViewVarCharVector
target) {
+ assertEquals(0, target.getDataBuffers().size());
+ int initial = 64;
+ int stringCount = 128;
+ target.setInitialCapacity(initial);
+ String[] targetValues =
+ IntStream.range(0, stringCount)
+ .mapToObj(i ->
TestUtils.generateRandomString(BaseVariableWidthViewVector.ELEMENT_SIZE))
+ .toArray(String[]::new);
+ ValueVectorDataPopulator.setVector(target, targetValues);
+ assertEquals(2, target.getDataBuffers().size());
+ return targetValues;
+ }
+
@Test
public void testAppendEmptyVariableWidthVector() {
try (VarCharVector target = new VarCharVector("", allocator);