This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e4220e  ARROW-4635: [Java] allocateNew to use last capacity
2e4220e is described below

commit 2e4220ec19f09f2e6ebb14dc98887802ab31dd75
Author: Pindikura Ravindra <ravin...@dremio.com>
AuthorDate: Thu Feb 21 12:28:02 2019 +0530

    ARROW-4635: [Java] allocateNew to use last capacity
    
    Modified allocateNew to allocate buffers as per the last capacity. This 
fixes a regression that was
    causing oversized allocations with StructVectors.
    
    Author: Pindikura Ravindra <ravin...@dremio.com>
    
    Closes #3717 from pravindra/reallocs and squashes the following commits:
    
    dd34f591 <Pindikura Ravindra> ARROW-4635:  allocateNew to use last capacity
---
 .../apache/arrow/vector/BaseFixedWidthVector.java  | 18 ++++----
 .../arrow/vector/BaseVariableWidthVector.java      | 37 +++++++++--------
 .../java/org/apache/arrow/vector/BitVector.java    |  2 +-
 .../java/org/apache/arrow/vector/TestCopyFrom.java | 24 +++++------
 .../org/apache/arrow/vector/TestStructVector.java  | 31 ++++++++++++++
 .../org/apache/arrow/vector/TestVectorReAlloc.java | 48 ++++++++++++++++++++++
 6 files changed, 123 insertions(+), 37 deletions(-)

diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index f3c2837..fd1055d 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -42,7 +42,7 @@ public abstract class BaseFixedWidthVector extends 
BaseValueVector
         implements FixedWidthVector, FieldVector, VectorDefinitionSetter {
   private final int typeWidth;
 
-  protected int initialValueAllocation;
+  protected int lastValueCapacity;
 
   protected final Field field;
   private int allocationMonitor;
@@ -59,7 +59,7 @@ public abstract class BaseFixedWidthVector extends 
BaseValueVector
     allocationMonitor = 0;
     validityBuffer = allocator.getEmpty();
     valueBuffer = allocator.getEmpty();
-    initialValueAllocation = INITIAL_VALUE_ALLOCATION;
+    lastValueCapacity = INITIAL_VALUE_ALLOCATION;
   }
 
 
@@ -151,7 +151,7 @@ public abstract class BaseFixedWidthVector extends 
BaseValueVector
   @Override
   public void setInitialCapacity(int valueCount) {
     computeAndCheckBufferSize(valueCount);
-    initialValueAllocation = valueCount;
+    lastValueCapacity = valueCount;
   }
 
   /**
@@ -254,13 +254,13 @@ public abstract class BaseFixedWidthVector extends 
BaseValueVector
    */
   @Override
   public boolean allocateNewSafe() {
-    computeAndCheckBufferSize(initialValueAllocation);
+    computeAndCheckBufferSize(lastValueCapacity);
 
     /* we are doing a new allocation -- release the current buffers */
     clear();
 
     try {
-      allocateBytes(initialValueAllocation);
+      allocateBytes(lastValueCapacity);
     } catch (Exception e) {
       clear();
       return false;
@@ -317,6 +317,8 @@ public abstract class BaseFixedWidthVector extends 
BaseValueVector
     valueBuffer = buffers.getDataBuf();
     validityBuffer = buffers.getValidityBuf();
     zeroVector();
+
+    lastValueCapacity = getValueCapacity();
   }
 
   /**
@@ -405,8 +407,8 @@ public abstract class BaseFixedWidthVector extends 
BaseValueVector
   public void reAlloc() {
     int targetValueCount = getValueCapacity() * 2;
     if (targetValueCount == 0) {
-      if (initialValueAllocation > 0) {
-        targetValueCount = initialValueAllocation * 2;
+      if (lastValueCapacity > 0) {
+        targetValueCount = lastValueCapacity * 2;
       } else {
         targetValueCount = INITIAL_VALUE_ALLOCATION * 2;
       }
@@ -425,6 +427,8 @@ public abstract class BaseFixedWidthVector extends 
BaseValueVector
     newValidityBuffer.setZero(validityBuffer.capacity(), 
newValidityBuffer.capacity() - validityBuffer.capacity());
     validityBuffer.release();
     validityBuffer = newValidityBuffer;
+
+    lastValueCapacity = getValueCapacity();
   }
 
   @Override
diff --git 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index a5b550a..9d765a4 100644
--- 
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ 
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -38,8 +38,8 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
         implements VariableWidthVector, FieldVector, VectorDefinitionSetter {
   private static final int DEFAULT_RECORD_BYTE_COUNT = 8;
   private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * 
DEFAULT_RECORD_BYTE_COUNT;
-  private int initialValueAllocation;
-  private int initialValueAllocationSizeInBytes;
+  private int lastValueCapacity;
+  private int lastValueAllocationSizeInBytes;
 
   /* protected members */
   public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track 
offsets */
@@ -50,21 +50,19 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
   protected int valueCount;
   protected int lastSet;
   protected final Field field;
-  private boolean cleared;
 
   public BaseVariableWidthVector(final String name, final BufferAllocator 
allocator,
                                          FieldType fieldType) {
     super(name, allocator);
-    initialValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
+    lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
     // -1 because we require one extra slot for the offset array.
-    initialValueAllocation = INITIAL_VALUE_ALLOCATION - 1;
+    lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
     field = new Field(name, fieldType, null);
     valueCount = 0;
     lastSet = -1;
     offsetBuffer = allocator.getEmpty();
     validityBuffer = allocator.getEmpty();
     valueBuffer = allocator.getEmpty();
-    cleared = false;
   }
 
   /* TODO:
@@ -155,8 +153,8 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT;
     checkDataBufferSize(size);
     computeAndCheckOffsetsBufferSize(valueCount);
-    initialValueAllocationSizeInBytes = (int) size;
-    initialValueAllocation = valueCount;
+    lastValueAllocationSizeInBytes = (int) size;
+    lastValueCapacity = valueCount;
   }
 
   /**
@@ -170,8 +168,8 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     long size = Math.max((long)(valueCount * density), 1L);
     checkDataBufferSize(size);
     computeAndCheckOffsetsBufferSize(valueCount);
-    initialValueAllocationSizeInBytes = (int) size;
-    initialValueAllocation = valueCount;
+    lastValueAllocationSizeInBytes = (int) size;
+    lastValueCapacity = valueCount;
   }
 
   /**
@@ -251,7 +249,6 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     validityBuffer = releaseBuffer(validityBuffer);
     valueBuffer = releaseBuffer(valueBuffer);
     offsetBuffer = releaseBuffer(offsetBuffer);
-    cleared = true;
     lastSet = -1;
     valueCount = 0;
   }
@@ -362,14 +359,14 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
    */
   @Override
   public boolean allocateNewSafe() {
-    checkDataBufferSize(initialValueAllocationSizeInBytes);
-    computeAndCheckOffsetsBufferSize(initialValueAllocation);
+    checkDataBufferSize(lastValueAllocationSizeInBytes);
+    computeAndCheckOffsetsBufferSize(lastValueCapacity);
 
     /* we are doing a new allocation -- release the current buffers */
     clear();
 
     try {
-      allocateBytes(initialValueAllocationSizeInBytes, initialValueAllocation);
+      allocateBytes(lastValueAllocationSizeInBytes, lastValueCapacity);
     } catch (Exception e) {
       clear();
       return false;
@@ -442,6 +439,9 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     validityBuffer = buffers.getValidityBuf();
     initOffsetBuffer();
     initValidityBuffer();
+
+    lastValueCapacity = getValueCapacity();
+    lastValueAllocationSizeInBytes = valueBuffer.capacity();
   }
 
   /* allocate offset buffer */
@@ -478,7 +478,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
    * @throws OutOfMemoryException if the internal memory allocation fails
    */
   public void reallocDataBuffer() {
-    long baseSize = initialValueAllocationSizeInBytes;
+    long baseSize = lastValueAllocationSizeInBytes;
     final int currentBufferCapacity = valueBuffer.capacity();
 
     if (baseSize < (long) currentBufferCapacity) {
@@ -495,6 +495,7 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity);
     valueBuffer.release();
     valueBuffer = newBuf;
+    lastValueAllocationSizeInBytes = valueBuffer.capacity();
   }
 
   /**
@@ -523,8 +524,8 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
   public void reallocValidityAndOffsetBuffers() {
     int targetOffsetCount = (offsetBuffer.capacity() / OFFSET_WIDTH)  * 2;
     if (targetOffsetCount == 0) {
-      if (initialValueAllocation > 0) {
-        targetOffsetCount = 2 * (initialValueAllocation + 1);
+      if (lastValueCapacity > 0) {
+        targetOffsetCount = 2 * (lastValueCapacity + 1);
       } else {
         targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1);
       }
@@ -543,6 +544,8 @@ public abstract class BaseVariableWidthVector extends 
BaseValueVector
     newValidityBuffer.setZero(validityBuffer.capacity(), 
newValidityBuffer.capacity() - validityBuffer.capacity());
     validityBuffer.release();
     validityBuffer = newValidityBuffer;
+
+    lastValueCapacity = getValueCapacity();
   }
 
   /**
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java 
b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
index c6c9642..b806ec7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -94,7 +94,7 @@ public class BitVector extends BaseFixedWidthVector {
     if (size * 2 > MAX_ALLOCATION_SIZE) {
       throw new OversizedAllocationException("Requested amount of memory is 
more than max allowed");
     }
-    initialValueAllocation = valueCount;
+    lastValueCapacity = valueCount;
   }
 
   /**
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
index b10db95..08932ef 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
@@ -221,7 +221,7 @@ public class TestCopyFrom {
         final IntVector vector2 = new IntVector(EMPTY_SCHEMA_PATH, allocator)) 
{
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -283,7 +283,7 @@ public class TestCopyFrom {
         final BigIntVector vector2 = new BigIntVector(EMPTY_SCHEMA_PATH, 
allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -424,7 +424,7 @@ public class TestCopyFrom {
         final Float4Vector vector2 = new Float4Vector(EMPTY_SCHEMA_PATH, 
allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -486,7 +486,7 @@ public class TestCopyFrom {
         final Float8Vector vector2 = new Float8Vector(EMPTY_SCHEMA_PATH, 
allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -550,7 +550,7 @@ public class TestCopyFrom {
         final IntervalDayVector vector2 = new 
IntervalDayVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -618,7 +618,7 @@ public class TestCopyFrom {
         final IntervalYearVector vector2 = new 
IntervalYearVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -690,7 +690,7 @@ public class TestCopyFrom {
         final SmallIntVector vector2 = new SmallIntVector(EMPTY_SCHEMA_PATH, 
allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -753,7 +753,7 @@ public class TestCopyFrom {
         final TimeMicroVector vector2 = new TimeMicroVector(EMPTY_SCHEMA_PATH, 
allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -816,7 +816,7 @@ public class TestCopyFrom {
         final TimeMilliVector vector2 = new TimeMilliVector(EMPTY_SCHEMA_PATH, 
allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -879,7 +879,7 @@ public class TestCopyFrom {
         final TinyIntVector vector2 = new TinyIntVector(EMPTY_SCHEMA_PATH, 
allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -946,7 +946,7 @@ public class TestCopyFrom {
         final DecimalVector vector2 = new DecimalVector(EMPTY_SCHEMA_PATH, 
allocator, 30, 16)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
@@ -1014,7 +1014,7 @@ public class TestCopyFrom {
         final TimeStampVector vector2 = new 
TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator)) {
 
       vector1.allocateNew();
-      assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation);
+      assertTrue(vector1.getValueCapacity() >= 
vector1.INITIAL_VALUE_ALLOCATION);
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
index 54a11a3..7706be4 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
@@ -69,4 +69,35 @@ public class TestStructVector {
       assertEquals(0, toChild.getValidityBuffer().capacity());
     }
   }
+
+  @Test
+  public void testAllocateAfterReAlloc() throws Exception {
+    Map<String, String> metadata = new HashMap<>();
+    metadata.put("k1", "v1");
+    FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+    try (StructVector vector = new StructVector("struct", allocator, type, 
null)) {
+      MinorType childtype = MinorType.INT;
+      vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), 
IntVector.class);
+
+      /*
+       * Allocate the default size, and then, reAlloc. This should double the 
allocation.
+       */
+      vector.allocateNewSafe(); // Initial allocation
+      vector.reAlloc(); // Double the allocation size of self, and all 
children.
+      int savedValidityBufferCapacity = vector.getValidityBuffer().capacity();
+      int savedValueCapacity = vector.getValueCapacity();
+
+      /*
+       * Clear and allocate again.
+       */
+      vector.clear();
+      vector.allocateNewSafe();
+
+      /*
+       * Verify that the buffer sizes haven't changed.
+       */
+      Assert.assertEquals(vector.getValidityBuffer().capacity(), 
savedValidityBufferCapacity);
+      Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+    }
+  }
 }
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
index 60747aa..8e0806d 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
@@ -142,4 +142,52 @@ public class TestVectorReAlloc {
       assertNull(vector.getObject(513));
     }
   }
+
+  @Test
+  public void testFixedAllocateAfterReAlloc() throws Exception {
+    try (final IntVector vector = new IntVector("", allocator)) {
+      /*
+       * Allocate the default size, and then, reAlloc. This should double the 
allocation.
+       */
+      vector.allocateNewSafe(); // Initial allocation
+      vector.reAlloc(); // Double the allocation size.
+      int savedValueCapacity = vector.getValueCapacity();
+
+      /*
+       * Clear and allocate again.
+       */
+      vector.clear();
+      vector.allocateNewSafe();
+
+      /*
+       * Verify that the buffer sizes haven't changed.
+       */
+      Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+    }
+  }
+
+  @Test
+  public void testVariableAllocateAfterReAlloc() throws Exception {
+    try (final VarCharVector vector = new VarCharVector("", allocator)) {
+      /*
+       * Allocate the default size, and then, reAlloc. This should double the 
allocation.
+       */
+      vector.allocateNewSafe(); // Initial allocation
+      vector.reAlloc(); // Double the allocation size.
+      int savedValueCapacity = vector.getValueCapacity();
+      int savedValueBufferSize = vector.valueBuffer.capacity();
+
+      /*
+       * Clear and allocate again.
+       */
+      vector.clear();
+      vector.allocateNewSafe();
+
+      /*
+       * Verify that the buffer sizes haven't changed.
+       */
+      Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+      Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize);
+    }
+  }
 }

Reply via email to