This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-java.git


The following commit(s) were added to refs/heads/main by this push:
     new db8b6aab GH-48: Implement VectorAppender for 
BaseVariableWidthViewVector (#454)
db8b6aab is described below

commit db8b6aabd26d6526f10fed780a42791c755cac23
Author: ViggoC <[email protected]>
AuthorDate: Wed Jan 22 14:27:49 2025 +0800

    GH-48: Implement VectorAppender for BaseVariableWidthViewVector (#454)
    
    Fixes #48.
---
 .../apache/arrow/vector/util/VectorAppender.java   |  65 +++++++++++-
 .../java/org/apache/arrow/vector/TestUtils.java    |  10 ++
 .../arrow/vector/TestVariableWidthViewVector.java  |  41 +++-----
 .../vector/testing/ValueVectorDataPopulator.java   |  13 +++
 .../arrow/vector/util/TestVectorAppender.java      | 117 +++++++++++++++++++++
 5 files changed, 218 insertions(+), 28 deletions(-)

diff --git 
a/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java 
b/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
index e703571b..0dc96a4d 100644
--- a/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
+++ b/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
@@ -19,6 +19,8 @@ package org.apache.arrow.vector.util;
 import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
 
 import java.util.HashSet;
+import java.util.List;
+import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.util.MemoryUtil;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.BaseFixedWidthVector;
@@ -91,7 +93,6 @@ public class VectorAppender implements 
VectorVisitor<ValueVector, Void> {
           deltaVector.getDataBuffer(),
           deltaVector.getValueCount(),
           targetVector.getDataBuffer());
-
     } else {
       MemoryUtil.copyMemory(
           deltaVector.getDataBuffer().memoryAddress(),
@@ -247,8 +248,66 @@ public class VectorAppender implements 
VectorVisitor<ValueVector, Void> {
   }
 
   @Override
-  public ValueVector visit(BaseVariableWidthViewVector left, Void value) {
-    throw new UnsupportedOperationException("View vectors are not supported.");
+  public ValueVector visit(BaseVariableWidthViewVector deltaVector, Void 
value) {
+    Preconditions.checkArgument(
+        typeVisitor.equals(deltaVector),
+        "The targetVector to append must have the same type as the 
targetVector being appended");
+
+    if (deltaVector.getValueCount() == 0) {
+      return targetVector; // nothing to append, return
+    }
+
+    int oldTargetValueCount = targetVector.getValueCount();
+    int newValueCount = oldTargetValueCount + deltaVector.getValueCount();
+
+    // make sure there is enough capacity
+    while (targetVector.getValueCapacity() < newValueCount) {
+      // Do not call BaseVariableWidthViewVector#reAlloc() here,
+      // because reallocViewDataBuffer() is always unnecessary
+      ((BaseVariableWidthViewVector) targetVector).reallocValidityBuffer();
+      ((BaseVariableWidthViewVector) targetVector).reallocViewBuffer();
+    }
+
+    // append validity buffer
+    BitVectorHelper.concatBits(
+        targetVector.getValidityBuffer(),
+        oldTargetValueCount,
+        deltaVector.getValidityBuffer(),
+        deltaVector.getValueCount(),
+        targetVector.getValidityBuffer());
+
+    // append data buffers
+    BaseVariableWidthViewVector targetViewVector = 
(BaseVariableWidthViewVector) targetVector;
+    List<ArrowBuf> targetDataBuffers = targetViewVector.getDataBuffers();
+    final int oldTargetDataBufferCount = targetDataBuffers.size();
+    List<ArrowBuf> deltaVectorDataBuffers = deltaVector.getDataBuffers();
+    deltaVectorDataBuffers.forEach(buf -> buf.getReferenceManager().retain());
+    targetDataBuffers.addAll(deltaVectorDataBuffers);
+
+    // append view buffer
+    ArrowBuf targetViewBuffer = targetVector.getDataBuffer();
+    MemoryUtil.copyMemory(
+        deltaVector.getDataBuffer().memoryAddress(),
+        targetViewBuffer.memoryAddress()
+            + (long) BaseVariableWidthViewVector.ELEMENT_SIZE * 
oldTargetValueCount,
+        (long) BaseVariableWidthViewVector.ELEMENT_SIZE * 
deltaVector.getValueCount());
+
+    // update view buffer
+    for (int i = oldTargetValueCount; i < newValueCount; i++) {
+      if (targetViewVector.isSet(i) > 0
+          && targetViewVector.getValueLength(i) > 
BaseVariableWidthViewVector.INLINE_SIZE) {
+        long start =
+            (long) i * BaseVariableWidthViewVector.ELEMENT_SIZE
+                + BaseVariableWidthViewVector.LENGTH_WIDTH
+                + BaseVariableWidthViewVector.PREFIX_WIDTH;
+        // shift buf id
+        int bufferId = targetViewBuffer.getInt(start);
+        targetViewBuffer.setInt(start, bufferId + oldTargetDataBufferCount);
+      }
+    }
+
+    targetVector.setValueCount(newValueCount);
+    return targetVector;
   }
 
   @Override
diff --git a/vector/src/test/java/org/apache/arrow/vector/TestUtils.java 
b/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
index 3845652a..82295f80 100644
--- a/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
+++ b/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
@@ -16,6 +16,7 @@
  */
 package org.apache.arrow.vector;
 
+import java.util.Random;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -52,4 +53,13 @@ public class TestUtils {
       Class<T> c, String name, MinorType type, BufferAllocator allocator) {
     return 
c.cast(FieldType.nullable(type.getType()).createNewSingleVector(name, 
allocator, null));
   }
+
+  public static String generateRandomString(int length) {
+    Random random = new Random();
+    StringBuilder sb = new StringBuilder(length);
+    for (int i = 0; i < length; i++) {
+      sb.append(random.nextInt(10)); // 0-9
+    }
+    return sb.toString();
+  }
 }
diff --git 
a/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java 
b/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
index a4533dba..7a3a1bae 100644
--- 
a/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
+++ 
b/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java
@@ -160,7 +160,7 @@ public class TestVariableWidthViewVector {
     try (final ViewVarCharVector viewVarCharVector = new 
ViewVarCharVector("myvector", allocator)) {
       viewVarCharVector.allocateNew(48, 4);
       final int valueCount = 4;
-      String str4 = generateRandomString(34);
+      String str4 = TestUtils.generateRandomString(34);
       viewVarCharVector.set(0, STR1);
       viewVarCharVector.set(1, STR2);
       viewVarCharVector.set(2, STR3);
@@ -216,7 +216,7 @@ public class TestVariableWidthViewVector {
     try (final ViewVarCharVector viewVarCharVector = new 
ViewVarCharVector("myvector", allocator)) {
       viewVarCharVector.allocateNew(48, 4);
       final int valueCount = 4;
-      String str4 = generateRandomString(35);
+      String str4 = TestUtils.generateRandomString(35);
       viewVarCharVector.set(0, STR1);
       viewVarCharVector.set(1, STR2);
       viewVarCharVector.set(2, STR3);
@@ -271,7 +271,7 @@ public class TestVariableWidthViewVector {
   public void testSetSafe() {
     try (final ViewVarCharVector viewVarCharVector = new 
ViewVarCharVector("myvector", allocator)) {
       viewVarCharVector.allocateNew(1, 1);
-      byte[] str6 = generateRandomString(40).getBytes();
+      byte[] str6 = TestUtils.generateRandomString(40).getBytes();
       final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5, 
str6);
 
       // set data to a position out of capacity index
@@ -305,8 +305,8 @@ public class TestVariableWidthViewVector {
     try (final ViewVarCharVector viewVarCharVector = new 
ViewVarCharVector("myvector", allocator)) {
       viewVarCharVector.allocateNew(128, 6);
       final int valueCount = 6;
-      String str4 = generateRandomString(35);
-      String str6 = generateRandomString(40);
+      String str4 = TestUtils.generateRandomString(35);
+      String str6 = TestUtils.generateRandomString(40);
       viewVarCharVector.set(0, STR1);
       viewVarCharVector.set(1, STR2);
       viewVarCharVector.set(2, STR3);
@@ -405,7 +405,7 @@ public class TestVariableWidthViewVector {
         setAndCheck(viewVarCharVector, i, strings.get(size - i - 1), 
stringHolder);
       }
 
-      String longString = generateRandomString(128);
+      String longString = TestUtils.generateRandomString(128);
       setAndCheck(viewVarCharVector, 6, longString.getBytes(), stringHolder);
     }
   }
@@ -441,7 +441,7 @@ public class TestVariableWidthViewVector {
         setAndCheck(viewVarBinaryVector, i, strings.get(size - i - 1), holder);
       }
 
-      String longString = generateRandomString(128);
+      String longString = TestUtils.generateRandomString(128);
       setAndCheck(viewVarBinaryVector, 6, longString.getBytes(), holder);
     }
   }
@@ -1169,7 +1169,7 @@ public class TestVariableWidthViewVector {
       vector.setValueCount(5);
 
       // overwrite index 2 with a long string
-      String longString = generateRandomString(128);
+      String longString = TestUtils.generateRandomString(128);
       byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8);
       // since the append-only approach is used and the remaining capacity
       // is not enough to store the new string; a new buffer will be allocated.
@@ -1373,7 +1373,7 @@ public class TestVariableWidthViewVector {
       // since a new buffer is added to the dataBuffers
       final ArrowBuf currentDataBuf = vector.dataBuffers.get(0);
       final long remainingCapacity = currentDataBuf.capacity() - 
currentDataBuf.writerIndex();
-      String longerString = generateRandomString(35);
+      String longerString = TestUtils.generateRandomString(35);
       byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
       assertTrue(remainingCapacity < longerStringBytes.length);
 
@@ -1406,7 +1406,7 @@ public class TestVariableWidthViewVector {
       // the remaining capacity is enough to store in the same data buffer
       final ArrowBuf currentDataBuf = vector.dataBuffers.get(0);
       final long remainingCapacity = currentDataBuf.capacity() - 
currentDataBuf.writerIndex();
-      String longerString = generateRandomString(24);
+      String longerString = TestUtils.generateRandomString(24);
       byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
       assertTrue(remainingCapacity > longerStringBytes.length);
 
@@ -1505,7 +1505,7 @@ public class TestVariableWidthViewVector {
       vector.setValueCount(5);
 
       // overwrite index 2 with a long string
-      String longString = generateRandomString(128);
+      String longString = TestUtils.generateRandomString(128);
       byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8);
 
       vector.setSafe(2, longStringBytes);
@@ -1671,7 +1671,7 @@ public class TestVariableWidthViewVector {
       vector.setSafe(2, STR7);
       vector.setValueCount(3);
 
-      String longerString = generateRandomString(35);
+      String longerString = TestUtils.generateRandomString(35);
       byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
 
       vector.setSafe(1, longerStringBytes);
@@ -1697,7 +1697,7 @@ public class TestVariableWidthViewVector {
       vector.setSafe(4, STR6);
       vector.setValueCount(5);
 
-      String longerString = generateRandomString(24);
+      String longerString = TestUtils.generateRandomString(24);
       byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8);
 
       vector.setSafe(2, longerStringBytes);
@@ -1869,7 +1869,7 @@ public class TestVariableWidthViewVector {
       // to avoid re-allocation. This is to test copyFrom() without 
re-allocation.
       final int numberOfValues = initialCapacity / 2 / 
ViewVarCharVector.ELEMENT_SIZE;
 
-      final String prefixString = generateRandomString(12);
+      final String prefixString = TestUtils.generateRandomString(12);
 
       for (int i = 0; i < numberOfValues; i++) {
         if (i % 3 == 0) {
@@ -1965,7 +1965,7 @@ public class TestVariableWidthViewVector {
 
       final int numberOfValues = initialCapacity / 
ViewVarCharVector.ELEMENT_SIZE;
 
-      final String prefixString = generateRandomString(12);
+      final String prefixString = TestUtils.generateRandomString(12);
 
       for (int i = 0; i < numberOfValues; i++) {
         if (i % 3 == 0) {
@@ -2746,7 +2746,7 @@ public class TestVariableWidthViewVector {
    */
   @Test
   public void testSplitAndTransferWithMultipleDataBuffers() {
-    final String str4 = generateRandomString(35);
+    final String str4 = TestUtils.generateRandomString(35);
     final byte[][] data = new byte[][] {STR1, STR2, STR3, 
str4.getBytes(StandardCharsets.UTF_8)};
     final int startIndex = 1;
     final int length = 3;
@@ -2851,13 +2851,4 @@ public class TestVariableWidthViewVector {
       }
     }
   }
-
-  private String generateRandomString(int length) {
-    Random random = new Random();
-    StringBuilder sb = new StringBuilder(length);
-    for (int i = 0; i < length; i++) {
-      sb.append(random.nextInt(10)); // 0-9
-    }
-    return sb.toString();
-  }
 }
diff --git 
a/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
 
b/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index f599dfa5..849fe6d6 100644
--- 
a/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++ 
b/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -60,6 +60,7 @@ import org.apache.arrow.vector.UInt8Vector;
 import org.apache.arrow.vector.VarBinaryVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VariableWidthFieldVector;
+import org.apache.arrow.vector.ViewVarCharVector;
 import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
 import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
 import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
@@ -606,6 +607,18 @@ public class ValueVectorDataPopulator {
     vector.setValueCount(length);
   }
 
+  /** Populate values for ViewVarCharVector. */
+  public static void setVector(ViewVarCharVector vector, String... values) {
+    final int length = values.length;
+    vector.allocateNewSafe();
+    for (int i = 0; i < length; i++) {
+      if (values[i] != null) {
+        vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8));
+      }
+    }
+    vector.setValueCount(length);
+  }
+
   /** Populate values for LargeVarCharVector. */
   public static void setVector(LargeVarCharVector vector, String... values) {
     final int length = values.length;
diff --git 
a/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java 
b/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
index 19eafd1b..e1b3889d 100644
--- a/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
+++ b/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
@@ -24,16 +24,21 @@ import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BaseVariableWidthViewVector;
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.TestUtils;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ViewVarCharVector;
 import org.apache.arrow.vector.compare.Range;
 import org.apache.arrow.vector.compare.RangeEqualsVisitor;
 import org.apache.arrow.vector.compare.TypeEqualsVisitor;
@@ -171,6 +176,118 @@ public class TestVectorAppender {
     }
   }
 
+  @Test
+  public void testAppendVariableWidthViewVector() {
+    final int length1 = 10;
+    final int length2 = 5;
+    try (ViewVarCharVector target = new ViewVarCharVector("", allocator);
+        ViewVarCharVector delta = new ViewVarCharVector("", allocator)) {
+      target.allocateNew(5, length1);
+      delta.allocateNew(5, length2);
+
+      ValueVectorDataPopulator.setVector(
+          target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+      ValueVectorDataPopulator.setVector(delta, "a10", "a11", "a12", "a13", 
null);
+
+      VectorAppender appender = new VectorAppender(target);
+      delta.accept(appender, null);
+
+      try (ViewVarCharVector expected = new ViewVarCharVector("expected", 
allocator)) {
+        expected.allocateNew();
+        ValueVectorDataPopulator.setVector(
+            expected, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", 
"a9", "a10", "a11",
+            "a12", "a13", null);
+        assertVectorsEqual(expected, target);
+      }
+    }
+  }
+
+  @Test
+  public void testAppendEmptyVariableWidthViewVector() {
+    try (ViewVarCharVector target = new ViewVarCharVector("", allocator);
+        ViewVarCharVector delta = new ViewVarCharVector("", allocator)) {
+      ValueVectorDataPopulator.setVector(
+          target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+
+      VectorAppender appender = new VectorAppender(target);
+      delta.accept(appender, null);
+
+      try (ViewVarCharVector expected = new ViewVarCharVector("expected", 
allocator)) {
+        ValueVectorDataPopulator.setVector(
+            expected, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", 
"a9");
+        assertVectorsEqual(expected, target);
+      }
+    }
+  }
+
+  @Test
+  public void testAppendShortLongVariableWidthViewVector() {
+    try (ViewVarCharVector target = new ViewVarCharVector("", allocator);
+        ViewVarCharVector delta = new ViewVarCharVector("", allocator)) {
+      String[] targetValues =
+          IntStream.range(-5, 5)
+              .mapToObj(
+                  i -> 
TestUtils.generateRandomString(BaseVariableWidthViewVector.INLINE_SIZE + i))
+              .toArray(String[]::new);
+      ValueVectorDataPopulator.setVector(target, targetValues);
+
+      String[] deltaValues =
+          IntStream.range(-3, 3)
+              .mapToObj(
+                  i -> 
TestUtils.generateRandomString(BaseVariableWidthViewVector.INLINE_SIZE + i))
+              .toArray(String[]::new);
+      ValueVectorDataPopulator.setVector(delta, deltaValues);
+
+      VectorAppender appender = new VectorAppender(target);
+      delta.accept(appender, null);
+
+      assertEquals(2, target.getDataBuffers().size());
+      try (ViewVarCharVector expected = new ViewVarCharVector("expected", 
allocator)) {
+        ValueVectorDataPopulator.setVector(
+            expected,
+            Stream.concat(Arrays.stream(targetValues), 
Arrays.stream(deltaValues))
+                .toArray(String[]::new));
+        assertVectorsEqual(expected, target);
+      }
+    }
+  }
+
+  @Test
+  public void testAppendLongVariableWidthViewVector() {
+    try (ViewVarCharVector target = new ViewVarCharVector("", allocator);
+        ViewVarCharVector delta = new ViewVarCharVector("", allocator)) {
+
+      String[] targetValues = randomLongViewVarCharVector(target);
+      String[] deltaValues = randomLongViewVarCharVector(delta);
+
+      VectorAppender appender = new VectorAppender(target);
+      delta.accept(appender, null);
+
+      assertEquals(4, target.getDataBuffers().size());
+      try (ViewVarCharVector expected = new ViewVarCharVector("expected", 
allocator)) {
+        ValueVectorDataPopulator.setVector(
+            expected,
+            Stream.concat(Arrays.stream(targetValues), 
Arrays.stream(deltaValues))
+                .toArray(String[]::new));
+        assertVectorsEqual(expected, target);
+      }
+    }
+  }
+
+  private static String[] randomLongViewVarCharVector(ViewVarCharVector 
target) {
+    assertEquals(0, target.getDataBuffers().size());
+    int initial = 64;
+    int stringCount = 128;
+    target.setInitialCapacity(initial);
+    String[] targetValues =
+        IntStream.range(0, stringCount)
+            .mapToObj(i -> 
TestUtils.generateRandomString(BaseVariableWidthViewVector.ELEMENT_SIZE))
+            .toArray(String[]::new);
+    ValueVectorDataPopulator.setVector(target, targetValues);
+    assertEquals(2, target.getDataBuffers().size());
+    return targetValues;
+  }
+
   @Test
   public void testAppendEmptyVariableWidthVector() {
     try (VarCharVector target = new VarCharVector("", allocator);

Reply via email to