This is an automated email from the ASF dual-hosted git repository.

cutlerb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 936115c  ARROW-2594: [Java] When realloc Vectors, zero out all 
unfilled bytes of new buffer
936115c is described below

commit 936115c5f0141ee9ef358150e5a7c716387537d0
Author: Bryan Cutler <cutl...@gmail.com>
AuthorDate: Thu May 17 11:20:45 2018 -0700

    ARROW-2594: [Java] When realloc Vectors, zero out all unfilled bytes of new 
buffer
    
    Currently when reallocating vectors, only the second half of the new buffer 
will be zeroed out assuming that it is doubled from the previous buffer and the 
first half is already populated or cleaned.  This isn't the case if the vector 
had been cleared and the buffer is empty causing incorrect values in the new 
buffer if it was recycled from an old one.
    
    Added a new test with a ListVector that should reuse a previous buffer 
after being cleared.
    
    Author: Bryan Cutler <cutl...@gmail.com>
    
    Closes #2054 from BryanCutler/java-vector-realloc-clear-buffer-ARROW-2594 
and squashes the following commits:
    
    28b8095 <Bryan Cutler> added a comment about clear
    be3ee8f <Bryan Cutler> remove extra spaces
    5a39790 <Bryan Cutler> zero out any newly allocated buffer bytes
---
 .../src/main/codegen/templates/UnionVector.java    |  3 +-
 .../apache/arrow/vector/BaseFixedWidthVector.java  |  3 +-
 .../arrow/vector/BaseVariableWidthVector.java      |  3 +-
 .../vector/complex/BaseRepeatedValueVector.java    |  3 +-
 .../arrow/vector/complex/FixedSizeListVector.java  |  2 +-
 .../apache/arrow/vector/complex/ListVector.java    |  3 +-
 .../apache/arrow/vector/complex/StructVector.java  |  2 +-
 .../org/apache/arrow/vector/TestListVector.java    | 45 ++++++++++++++++++++++
 8 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/java/vector/src/main/codegen/templates/UnionVector.java 
b/java/vector/src/main/codegen/templates/UnionVector.java
index 1cfa066..8b27f39 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -290,8 +290,7 @@ public class UnionVector implements FieldVector {
 
     final ArrowBuf newBuf = allocator.buffer((int)newAllocationSize);
     newBuf.setBytes(0, typeBuffer, 0, currentBufferCapacity);
-    final int halfNewCapacity = newBuf.capacity() / 2;
-    newBuf.setZero(halfNewCapacity, halfNewCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - 
currentBufferCapacity);
     typeBuffer.release(1);
     typeBuffer = newBuf;
     typeBufferAllocationSizeInBytes = (int)newAllocationSize;
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index b275ab2..5c45636 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -450,8 +450,7 @@ public abstract class BaseFixedWidthVector extends 
BaseValueVector
 
     final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
     newBuf.setBytes(0, buffer, 0, currentBufferCapacity);
-    final int halfNewCapacity = newBuf.capacity() / 2;
-    newBuf.setZero(halfNewCapacity, halfNewCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - 
currentBufferCapacity);
     buffer.release(1);
     buffer = newBuf;
     if (dataBuffer) {
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index 5437056..53e123c 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -549,8 +549,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
 
     final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
     newBuf.setBytes(0, buffer, 0, currentBufferCapacity);
-    final int halfNewCapacity = newBuf.capacity() / 2;
-    newBuf.setZero(halfNewCapacity, halfNewCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - 
currentBufferCapacity);
     buffer.release(1);
     buffer = newBuf;
     if (offsetBuffer) {
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
index 2dd2894..bd4f7aa 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
@@ -117,8 +117,7 @@ public abstract class BaseRepeatedValueVector extends 
BaseValueVector implements
 
     final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
     newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity);
-    final int halfNewCapacity = newBuf.capacity() / 2;
-    newBuf.setZero(halfNewCapacity, halfNewCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - 
currentBufferCapacity);
     offsetBuffer.release(1);
     offsetBuffer = newBuf;
     offsetAllocationSizeInBytes = (int) newAllocationSize;
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index eadbab4..f863bb6 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -217,8 +217,8 @@ public class FixedSizeListVector extends BaseValueVector 
implements FieldVector,
     }
 
     final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
-    newBuf.setZero(0, newBuf.capacity());
     newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - 
currentBufferCapacity);
     validityBuffer.release(1);
     validityBuffer = newBuf;
     validityAllocationSizeInBytes = (int) newAllocationSize;
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index d3eeaf2..d34d68c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -301,8 +301,7 @@ public class ListVector extends BaseRepeatedValueVector 
implements FieldVector,
 
     final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
     newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
-    final int halfNewCapacity = newBuf.capacity() / 2;
-    newBuf.setZero(halfNewCapacity, halfNewCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - 
currentBufferCapacity);
     validityBuffer.release(1);
     validityBuffer = newBuf;
     validityAllocationSizeInBytes = (int) newAllocationSize;
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
index 05571bb..6938ea6 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
@@ -414,8 +414,8 @@ public class StructVector extends NonNullableStructVector 
implements FieldVector
     }
 
     final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
-    newBuf.setZero(0, newBuf.capacity());
     newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - 
currentBufferCapacity);
     validityBuffer.release(1);
     validityBuffer = newBuf;
     validityAllocationSizeInBytes = (int) newAllocationSize;
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
index aea7caf..fbac551 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -827,4 +827,49 @@ public class TestListVector {
       assertEquals(1, vector.getDataVector().getValueCapacity());
     }
   }
+
+  @Test
+  public void testClearAndReuse() {
+    try (final ListVector vector = ListVector.empty("list", allocator)) {
+      BigIntVector bigIntVector = (BigIntVector) 
vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector();
+      vector.setInitialCapacity(10);
+      vector.allocateNew();
+
+      vector.startNewValue(0);
+      bigIntVector.setSafe(0, 7);
+      vector.endValue(0, 1);
+      vector.startNewValue(1);
+      bigIntVector.setSafe(1, 8);
+      vector.endValue(1, 1);
+      vector.setValueCount(2);
+
+      Object result = vector.getObject(0);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(new Long(7), resultSet.get(0));
+
+      result = vector.getObject(1);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(new Long(8), resultSet.get(0));
+
+      // Clear and release the buffers to trigger a realloc when adding next 
value
+      vector.clear();
+
+      // The list vector should reuse a buffer when reallocating the offset 
buffer
+      vector.startNewValue(0);
+      bigIntVector.setSafe(0, 7);
+      vector.endValue(0, 1);
+      vector.startNewValue(1);
+      bigIntVector.setSafe(1, 8);
+      vector.endValue(1, 1);
+      vector.setValueCount(2);
+
+      result = vector.getObject(0);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(new Long(7), resultSet.get(0));
+
+      result = vector.getObject(1);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(new Long(8), resultSet.get(0));
+    }
+  }
 }

-- 
To stop receiving notification emails like this one, please contact
cutl...@apache.org.

Reply via email to