This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 5fe87a3cd1 GH-44626: [Java] fix SplitAndTransfer throws for empty 
MapVector (#44627)
5fe87a3cd1 is described below

commit 5fe87a3cd12af9c0981c66aa551b543f19f4df5c
Author: Maksim Yegorov <[email protected]>
AuthorDate: Thu Nov 7 20:15:39 2024 -0500

    GH-44626: [Java] fix SplitAndTransfer throws for empty MapVector (#44627)
    
    ### Rationale for this change
    
    Empty MapVector.splitAndTransfer throws 
`java.lang.IndexOutOfBoundsException`. Details in  
https://github.com/apache/arrow/issues/44626
    
    ### What changes are included in this PR?
    
    Fixed for MapVector as for other vector types in #41066
    
    ### Are these changes tested?
    
    Added unit test mimicking the scenario we've observed where MapVector's 
offset buffer capacity is 0.
    * GitHub Issue: #44626
    
    Authored-by: Maksim Yegorov 
<[email protected]>
    Signed-off-by: David Li <[email protected]>
---
 .../org/apache/arrow/vector/complex/MapVector.java | 32 +++++++++---------
 .../apache/arrow/vector/TestSplitAndTransfer.java  | 38 ++++++++++++++++++++++
 2 files changed, 55 insertions(+), 15 deletions(-)

diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
index 5c6b9b2255..23cda8401b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -211,23 +211,25 @@ public class MapVector extends ListVector {
           startIndex,
           length,
           valueCount);
-      final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
-      final int sliceLength =
-          offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - 
startPoint;
       to.clear();
-      to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
-      /* splitAndTransfer offset buffer */
-      for (int i = 0; i < length + 1; i++) {
-        final int relativeOffset =
-            offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
-        to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
+      if (length > 0) {
+        final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
+        final int sliceLength =
+            offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - 
startPoint;
+        to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
+        /* splitAndTransfer offset buffer */
+        for (int i = 0; i < length + 1; i++) {
+          final int relativeOffset =
+              offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - 
startPoint;
+          to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
+        }
+        /* splitAndTransfer validity buffer */
+        splitAndTransferValidityBuffer(startIndex, length, to);
+        /* splitAndTransfer data buffer */
+        dataTransferPair.splitAndTransfer(startPoint, sliceLength);
+        to.lastSet = length - 1;
+        to.setValueCount(length);
       }
-      /* splitAndTransfer validity buffer */
-      splitAndTransferValidityBuffer(startIndex, length, to);
-      /* splitAndTransfer data buffer */
-      dataTransferPair.splitAndTransfer(startPoint, sliceLength);
-      to.lastSet = length - 1;
-      to.setValueCount(length);
     }
 
     /*
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
index 6aace95621..adf4eba10c 100644
--- 
a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
@@ -16,6 +16,7 @@
  */
 package org.apache.arrow.vector;
 
+import static java.util.Arrays.asList;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -23,7 +24,9 @@ import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -36,6 +39,7 @@ import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 import org.junit.jupiter.api.AfterEach;
@@ -223,6 +227,40 @@ public class TestSplitAndTransfer {
     // no allocations to clear for ZeroVector
   }
 
+  @Test
+  public void testListVectorWithEmptyMapVector() {
+    // List<element: Map(false)<entries: Struct<key: Utf8 not null, value: 
Utf8> not null>>
+    int valueCount = 1;
+    List<Field> children = new ArrayList<>();
+    children.add(new Field("key", FieldType.notNullable(new ArrowType.Utf8()), 
null));
+    children.add(new Field("value", FieldType.nullable(new ArrowType.Utf8()), 
null));
+    Field structField =
+        new Field("entries", FieldType.notNullable(ArrowType.Struct.INSTANCE), 
children);
+
+    Field mapField =
+        new Field("element", FieldType.notNullable(new ArrowType.Map(false)), 
asList(structField));
+
+    Field listField = new Field("list", FieldType.nullable(new 
ArrowType.List()), asList(mapField));
+
+    ListVector fromListVector = (ListVector) listField.createVector(allocator);
+    fromListVector.allocateNew();
+    fromListVector.setValueCount(valueCount);
+
+    // child vector is empty
+    MapVector dataVector = (MapVector) fromListVector.getDataVector();
+    dataVector.allocateNew();
+    // unset capacity to mimic observed failure mode
+    dataVector.getOffsetBuffer().capacity(0);
+
+    TransferPair transferPair = 
fromListVector.getTransferPair(fromListVector.getAllocator());
+    transferPair.splitAndTransfer(0, valueCount);
+    ListVector toListVector = (ListVector) transferPair.getTo();
+
+    assertEquals(valueCount, toListVector.getValueCount());
+    fromListVector.clear();
+    toListVector.clear();
+  }
+
   @Test /* VarCharVector */
   public void test() throws Exception {
     try (final VarCharVector varCharVector = new VarCharVector("myvector", 
allocator)) {

Reply via email to