[ 
https://issues.apache.org/jira/browse/ARROW-1962?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16313889#comment-16313889
 ] 

ASF GitHub Bot commented on ARROW-1962:
---------------------------------------

BryanCutler closed pull request #1455: ARROW-1962: [Java] Adding reset to 
ValueVector interface
URL: https://github.com/apache/arrow/pull/1455
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/java/vector/src/main/codegen/templates/UnionVector.java 
b/java/vector/src/main/codegen/templates/UnionVector.java
index 9377bd0da..aa8178a92 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -29,7 +29,6 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
-import org.apache.arrow.vector.BaseDataValueVector;
 import org.apache.arrow.vector.complex.impl.ComplexCopier;
 import org.apache.arrow.vector.util.CallBack;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -318,6 +317,13 @@ public void clear() {
     internalMap.clear();
   }
 
+  @Override
+  public void reset() {
+    valueCount = 0;
+    typeBuffer.setZero(0, typeBuffer.capacity());
+    internalMap.reset();
+  }
+
   @Override
   public Field getField() {
     List<org.apache.arrow.vector.types.pojo.Field> childFields = new 
ArrayList<>();
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java
deleted file mode 100644
index 80675135e..000000000
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.arrow.vector;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
-
-import io.netty.buffer.ArrowBuf;
-import org.apache.arrow.vector.util.CallBack;
-import org.apache.arrow.vector.util.TransferPair;
-
-
-public abstract class BaseDataValueVector extends BaseValueVector implements 
BufferBacked {
-
-  public static void load(ArrowFieldNode fieldNode, List<BufferBacked> 
vectors, List<ArrowBuf> buffers) {
-    int expectedSize = vectors.size();
-    if (buffers.size() != expectedSize) {
-      throw new IllegalArgumentException("Illegal buffer count, expected " + 
expectedSize + ", got: " + buffers.size());
-    }
-    for (int i = 0; i < expectedSize; i++) {
-      vectors.get(i).load(fieldNode, buffers.get(i));
-    }
-  }
-
-  public static void truncateBufferBasedOnSize(List<ArrowBuf> buffers, int 
bufferIndex, int byteSize) {
-    if (bufferIndex >= buffers.size()) {
-      throw new IllegalArgumentException("no buffer at index " + bufferIndex + 
": " + buffers);
-    }
-    ArrowBuf buffer = buffers.get(bufferIndex);
-    if (buffer.writerIndex() < byteSize) {
-      throw new IllegalArgumentException("can not truncate buffer to a larger 
size " + byteSize + ": " + buffer.writerIndex());
-    }
-    buffer.writerIndex(byteSize);
-  }
-
-  public static List<ArrowBuf> unload(List<BufferBacked> vectors) {
-    List<ArrowBuf> result = new ArrayList<>(vectors.size());
-    for (BufferBacked vector : vectors) {
-      result.add(vector.unLoad());
-    }
-    return result;
-  }
-
-  protected ArrowBuf data;
-
-  public BaseDataValueVector(String name, BufferAllocator allocator) {
-    super(name, allocator);
-    data = allocator.getEmpty();
-  }
-
-  @Override
-  public void clear() {
-    data.release();
-    data = allocator.getEmpty();
-    super.clear();
-  }
-
-  @Override
-  public TransferPair getTransferPair(String ref, BufferAllocator allocator, 
CallBack callBack) {
-    return getTransferPair(ref, allocator);
-  }
-
-  @Override
-  public ArrowBuf[] getBuffers(boolean clear) {
-    ArrowBuf[] out;
-    if (getBufferSize() == 0) {
-      out = new ArrowBuf[0];
-    } else {
-      out = new ArrowBuf[]{data};
-      data.readerIndex(0);
-      if (clear) {
-        data.retain(1);
-      }
-    }
-    if (clear) {
-      clear();
-    }
-    return out;
-  }
-
-  @Override
-  public int getBufferSize() {
-    if (getValueCount() == 0) {
-      return 0;
-    }
-    return data.writerIndex();
-  }
-
-  public ArrowBuf getBuffer() {
-    return data;
-  }
-
-  @Override
-  public void load(ArrowFieldNode fieldNode, ArrowBuf data) {
-    this.data.release();
-    this.data = data.retain(allocator);
-  }
-
-  @Override
-  public ArrowBuf unLoad() {
-    return this.data.readerIndex(0);
-  }
-
-  /**
-   * This method has a similar effect of allocateNew() without actually 
clearing and reallocating
-   * the value vector. The purpose is to move the value vector to a "mutate" 
state
-   */
-  public void reset() {
-  }
-}
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index 77026d4e5..702db9f52 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -208,7 +208,9 @@ private void initValueBuffer() {
    * Reset the vector to initial state. Same as {@link #zeroVector()}.
    * Note that this method doesn't release any memory.
    */
+  @Override
   public void reset() {
+    valueCount = 0;
     zeroVector();
   }
 
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index fbadb35d7..fff329a9b 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -193,6 +193,7 @@ private int getOffsetBufferValueCapacity() {
   public void zeroVector() {
     initValidityBuffer();
     initOffsetBuffer();
+    valueBuffer.setZero(0, valueBuffer.capacity());
   }
 
   /* zero out the validity buffer */
@@ -212,6 +213,7 @@ private void initOffsetBuffer() {
   public void reset() {
     zeroVector();
     lastSet = -1;
+    valueCount = 0;
   }
 
   /**
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
index e77c1b127..24cf59a0d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
@@ -100,10 +100,19 @@
   void close();
 
   /**
-   * Release the underlying ArrowBuf and reset the ValueVector to empty.
+   * Release any owned ArrowBuf and reset the ValueVector to the initial 
state. If the
+   * vector has any child vectors, they will also be cleared.
    */
   void clear();
 
+  /**
+   * Reset the ValueVector to the initial state without releasing any owned 
ArrowBuf.
+   * Buffer capacities will remain unchanged and any previous data will be 
zeroed out.
+   * This includes buffers for data, validity, offset, etc. If the vector has 
any
+   * child vectors, they will also be reset.
+   */
+  void reset();
+
   /**
    * Get information about how this field is materialized.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
index 962a1c947..2d3c543d0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
@@ -72,6 +72,10 @@ public void close() {
   public void clear() {
   }
 
+  @Override
+  public void reset() {
+  }
+
   @Override
   public Field getField() {
     return new Field(DATA_VECTOR_NAME, FieldType.nullable(new Null()), null);
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
index 9a23fd8c3..d0a664ac0 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
@@ -186,6 +186,13 @@ public void clear() {
     super.clear();
   }
 
+  @Override
+  public void reset() {
+    offsetBuffer.setZero(0, offsetBuffer.capacity());
+    vector.reset();
+    valueCount = 0;
+  }
+
   @Override
   public ArrowBuf[] getBuffers(boolean clear) {
     final ArrowBuf[] buffers;
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index 93a8127cf..9314a2566 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -271,6 +271,13 @@ public void clear() {
     super.clear();
   }
 
+  @Override
+  public void reset() {
+    validityBuffer.setZero(0, validityBuffer.capacity());
+    vector.reset();
+    valueCount = 0;
+  }
+
   @Override
   public ArrowBuf[] getBuffers(boolean clear) {
     setReaderAndWriterIndex();
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index 0c1daf490..8aeeb7e5a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -513,6 +513,13 @@ public void clear() {
     lastSet = 0;
   }
 
+  @Override
+  public void reset() {
+    super.reset();
+    validityBuffer.setZero(0, validityBuffer.capacity());
+    lastSet = 0;
+  }
+
   /**
    * Return the underlying buffers associated with this vector. Note that this 
doesn't
    * impact the reference counts for this buffer so it only should be used for 
in-context
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
index e13084588..6eab6ef3d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -302,6 +302,14 @@ public void clear() {
     valueCount = 0;
   }
 
+  @Override
+  public void reset() {
+    for (final ValueVector v : getChildren()) {
+      v.reset();
+    }
+    valueCount = 0;
+  }
+
   @Override
   public Field getField() {
     List<Field> children = new ArrayList<>();
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
index d887b7306..fb84d23b5 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
@@ -305,6 +305,15 @@ public void clear() {
     super.clear();
   }
 
+  /**
+   * Reset this vector to empty, does not release buffers
+   */
+  @Override
+  public void reset() {
+    super.reset();
+    validityBuffer.setZero(0, validityBuffer.capacity());
+  }
+
   /**
    * Release the validity buffer
    */
@@ -493,8 +502,4 @@ public void setValueCount(int valueCount) {
     super.setValueCount(valueCount);
     this.valueCount = valueCount;
   }
-
-  public void reset() {
-    valueCount = 0;
-  }
 }
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
index 28903b117..84ea9657f 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
@@ -19,13 +19,23 @@
 package org.apache.arrow.vector;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
+import io.netty.buffer.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.*;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.nio.charset.StandardCharsets;
+
 public class TestVectorReset {
 
   private BufferAllocator allocator;
@@ -40,15 +50,100 @@ public void terminate() throws Exception {
     allocator.close();
   }
 
+  private void resetVectorAndVerify(ValueVector vector, ArrowBuf[] bufs) {
+    int[] sizeBefore = new int[bufs.length];
+    for (int i = 0; i < bufs.length; i++) {
+      sizeBefore[i] = bufs[i].capacity();
+    }
+    vector.reset();
+    for (int i = 0; i < bufs.length; i++) {
+      assertEquals(sizeBefore[i], bufs[i].capacity());
+      verifyBufferZeroed(bufs[i]);
+    }
+    assertEquals(0, vector.getValueCount());
+  }
+
+  private void verifyBufferZeroed(ArrowBuf buf) {
+    for (int i = 0; i < buf.capacity(); i++) {
+      assertTrue((byte) 0 == buf.getByte(i));
+    }
+  }
+
   @Test
   public void testFixedTypeReset() {
-    try (final UInt4Vector vector = new UInt4Vector("", allocator)) {
-      vector.allocateNew();
-      final int sizeBefore = vector.getBufferSize();
-      vector.reAlloc();
-      vector.reset();
-      final int sizeAfter = vector.getBufferSize();
-      assertEquals(sizeBefore, sizeAfter);
+    try (final UInt4Vector vector = new UInt4Vector("UInt4", allocator)) {
+      vector.allocateNewSafe();
+      vector.setNull(0);
+      vector.setValueCount(1);
+      resetVectorAndVerify(vector, vector.getBuffers(false));
+    }
+  }
+
+  @Test
+  public void testVariableTypeReset() {
+    try (final VarCharVector vector = new VarCharVector("VarChar", allocator)) 
{
+      vector.allocateNewSafe();
+      vector.set(0, "a".getBytes(StandardCharsets.UTF_8));
+      vector.setLastSet(0);
+      vector.setValueCount(1);
+      resetVectorAndVerify(vector, vector.getBuffers(false));
+      assertEquals(-1, vector.getLastSet());
+    }
+  }
+
+  @Test
+  public void testListTypeReset() {
+    try (final ListVector variableList = new ListVector("VarList", allocator, 
FieldType.nullable(MinorType.INT.getType()), null);
+         final FixedSizeListVector fixedList = new 
FixedSizeListVector("FixedList", allocator, FieldType.nullable(new 
FixedSizeList(2)), null)
+    ) {
+      // ListVector
+      variableList.allocateNewSafe();
+      variableList.startNewValue(0);
+      variableList.endValue(0, 0);
+      variableList.setValueCount(1);
+      resetVectorAndVerify(variableList, variableList.getBuffers(false));
+      assertEquals(0, variableList.getLastSet());
+
+      // FixedSizeListVector
+      fixedList.allocateNewSafe();
+      fixedList.setNull(0);
+      fixedList.setValueCount(1);
+      resetVectorAndVerify(fixedList, fixedList.getBuffers(false));
+    }
+  }
+
+  @Test
+  public void testMapTypeReset() {
+    try (final MapVector mapVector = new MapVector("Map", allocator, 
FieldType.nullable(MinorType.INT.getType()), null);
+         final NullableMapVector nullableMapVector = new 
NullableMapVector("NullableMap", allocator, 
FieldType.nullable(MinorType.INT.getType()), null)
+    ) {
+      // MapVector
+      mapVector.allocateNewSafe();
+      IntVector mapChild = mapVector.addOrGet("child", FieldType.nullable(new 
Int(32, true)), IntVector.class);
+      mapChild.setNull(0);
+      mapVector.setValueCount(1);
+      resetVectorAndVerify(mapVector, mapVector.getBuffers(false));
+
+      // NullableMapVector
+      nullableMapVector.allocateNewSafe();
+      nullableMapVector.setNull(0);
+      nullableMapVector.setValueCount(1);
+      resetVectorAndVerify(nullableMapVector, 
nullableMapVector.getBuffers(false));
+    }
+  }
+
+  @Test
+  public void testUnionTypeReset() {
+    try (final UnionVector vector = new UnionVector("Union", allocator, null);
+         final IntVector dataVector = new IntVector("Int", allocator)
+    ) {
+      vector.getBufferSize();
+      vector.allocateNewSafe();
+      dataVector.allocateNewSafe();
+      vector.addVector(dataVector);
+      dataVector.setNull(0);
+      vector.setValueCount(1);
+      resetVectorAndVerify(vector, vector.getBuffers(false));
     }
   }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [Java] Add reset() to ValueVector interface
> -------------------------------------------
>
>                 Key: ARROW-1962
>                 URL: https://issues.apache.org/jira/browse/ARROW-1962
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: Java - Vectors
>            Reporter: Bryan Cutler
>            Assignee: Bryan Cutler
>              Labels: pull-request-available
>             Fix For: 0.9.0
>
>
> The {{reset()}} method exists in some ValueVectors but not all.  Its meaning 
> is that it will bring the vector to an empty state, but not release any 
> buffers (as opposed to clear() which resets and releases buffers).
> It should be added to the {{ValueVector}} interface and implemented in the 
> vector hierarchy where it currently is not. 



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to