This is an automated email from the ASF dual-hosted git repository. ravindra pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 2e4220e ARROW-4635: [Java] allocateNew to use last capacity 2e4220e is described below commit 2e4220ec19f09f2e6ebb14dc98887802ab31dd75 Author: Pindikura Ravindra <ravin...@dremio.com> AuthorDate: Thu Feb 21 12:28:02 2019 +0530 ARROW-4635: [Java] allocateNew to use last capacity Modified allocateNew to allocate buffers as per the last capacity. This fixes a regression that was causing oversized allocations with StructVectors. Author: Pindikura Ravindra <ravin...@dremio.com> Closes #3717 from pravindra/reallocs and squashes the following commits: dd34f591 <Pindikura Ravindra> ARROW-4635: allocateNew to use last capacity --- .../apache/arrow/vector/BaseFixedWidthVector.java | 18 ++++---- .../arrow/vector/BaseVariableWidthVector.java | 37 +++++++++-------- .../java/org/apache/arrow/vector/BitVector.java | 2 +- .../java/org/apache/arrow/vector/TestCopyFrom.java | 24 +++++------ .../org/apache/arrow/vector/TestStructVector.java | 31 ++++++++++++++ .../org/apache/arrow/vector/TestVectorReAlloc.java | 48 ++++++++++++++++++++++ 6 files changed, 123 insertions(+), 37 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java index f3c2837..fd1055d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java @@ -42,7 +42,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector implements FixedWidthVector, FieldVector, VectorDefinitionSetter { private final int typeWidth; - protected int initialValueAllocation; + protected int lastValueCapacity; protected final Field field; private int allocationMonitor; @@ -59,7 +59,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector allocationMonitor = 0; validityBuffer = allocator.getEmpty(); valueBuffer = allocator.getEmpty(); - initialValueAllocation = INITIAL_VALUE_ALLOCATION; + lastValueCapacity = INITIAL_VALUE_ALLOCATION; } @@ -151,7 +151,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector @Override public void setInitialCapacity(int valueCount) { computeAndCheckBufferSize(valueCount); - initialValueAllocation = valueCount; + lastValueCapacity = valueCount; } /** @@ -254,13 +254,13 @@ public abstract class BaseFixedWidthVector extends BaseValueVector */ @Override public boolean allocateNewSafe() { - computeAndCheckBufferSize(initialValueAllocation); + computeAndCheckBufferSize(lastValueCapacity); /* we are doing a new allocation -- release the current buffers */ clear(); try { - allocateBytes(initialValueAllocation); + allocateBytes(lastValueCapacity); } catch (Exception e) { clear(); return false; @@ -317,6 +317,8 @@ public abstract class BaseFixedWidthVector extends BaseValueVector valueBuffer = buffers.getDataBuf(); validityBuffer = buffers.getValidityBuf(); zeroVector(); + + lastValueCapacity = getValueCapacity(); } /** @@ -405,8 +407,8 @@ public abstract class BaseFixedWidthVector extends BaseValueVector public void reAlloc() { int targetValueCount = getValueCapacity() * 2; if (targetValueCount == 0) { - if (initialValueAllocation > 0) { - targetValueCount = initialValueAllocation * 2; + if (lastValueCapacity > 0) { + targetValueCount = lastValueCapacity * 2; } else { targetValueCount = INITIAL_VALUE_ALLOCATION * 2; } @@ -425,6 +427,8 @@ public abstract class BaseFixedWidthVector extends BaseValueVector newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); validityBuffer.release(); validityBuffer = newValidityBuffer; + + lastValueCapacity = getValueCapacity(); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index a5b550a..9d765a4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -38,8 +38,8 @@ public abstract class BaseVariableWidthVector extends BaseValueVector implements VariableWidthVector, FieldVector, VectorDefinitionSetter { private static final int DEFAULT_RECORD_BYTE_COUNT = 8; private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; - private int initialValueAllocation; - private int initialValueAllocationSizeInBytes; + private int lastValueCapacity; + private int lastValueAllocationSizeInBytes; /* protected members */ public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track offsets */ @@ -50,21 +50,19 @@ public abstract class BaseVariableWidthVector extends BaseValueVector protected int valueCount; protected int lastSet; protected final Field field; - private boolean cleared; public BaseVariableWidthVector(final String name, final BufferAllocator allocator, FieldType fieldType) { super(name, allocator); - initialValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; + lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; // -1 because we require one extra slot for the offset array. - initialValueAllocation = INITIAL_VALUE_ALLOCATION - 1; + lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1; field = new Field(name, fieldType, null); valueCount = 0; lastSet = -1; offsetBuffer = allocator.getEmpty(); validityBuffer = allocator.getEmpty(); valueBuffer = allocator.getEmpty(); - cleared = false; } /* TODO: @@ -155,8 +153,8 @@ public abstract class BaseVariableWidthVector extends BaseValueVector final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT; checkDataBufferSize(size); computeAndCheckOffsetsBufferSize(valueCount); - initialValueAllocationSizeInBytes = (int) size; - initialValueAllocation = valueCount; + lastValueAllocationSizeInBytes = (int) size; + lastValueCapacity = valueCount; } /** @@ -170,8 +168,8 @@ public abstract class BaseVariableWidthVector extends BaseValueVector long size = Math.max((long)(valueCount * density), 1L); checkDataBufferSize(size); computeAndCheckOffsetsBufferSize(valueCount); - initialValueAllocationSizeInBytes = (int) size; - initialValueAllocation = valueCount; + lastValueAllocationSizeInBytes = (int) size; + lastValueCapacity = valueCount; } /** @@ -251,7 +249,6 @@ public abstract class BaseVariableWidthVector extends BaseValueVector validityBuffer = releaseBuffer(validityBuffer); valueBuffer = releaseBuffer(valueBuffer); offsetBuffer = releaseBuffer(offsetBuffer); - cleared = true; lastSet = -1; valueCount = 0; } @@ -362,14 +359,14 @@ public abstract class BaseVariableWidthVector extends BaseValueVector */ @Override public boolean allocateNewSafe() { - checkDataBufferSize(initialValueAllocationSizeInBytes); - computeAndCheckOffsetsBufferSize(initialValueAllocation); + checkDataBufferSize(lastValueAllocationSizeInBytes); + computeAndCheckOffsetsBufferSize(lastValueCapacity); /* we are doing a new allocation -- release the current buffers */ clear(); try { - allocateBytes(initialValueAllocationSizeInBytes, initialValueAllocation); + allocateBytes(lastValueAllocationSizeInBytes, lastValueCapacity); } catch (Exception e) { clear(); return false; @@ -442,6 +439,9 @@ public abstract class BaseVariableWidthVector extends BaseValueVector validityBuffer = buffers.getValidityBuf(); initOffsetBuffer(); initValidityBuffer(); + + lastValueCapacity = getValueCapacity(); + lastValueAllocationSizeInBytes = valueBuffer.capacity(); } /* allocate offset buffer */ @@ -478,7 +478,7 @@ public abstract class BaseVariableWidthVector extends BaseValueVector * @throws OutOfMemoryException if the internal memory allocation fails */ public void reallocDataBuffer() { - long baseSize = initialValueAllocationSizeInBytes; + long baseSize = lastValueAllocationSizeInBytes; final int currentBufferCapacity = valueBuffer.capacity(); if (baseSize < (long) currentBufferCapacity) { @@ -495,6 +495,7 @@ public abstract class BaseVariableWidthVector extends BaseValueVector newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity); valueBuffer.release(); valueBuffer = newBuf; + lastValueAllocationSizeInBytes = valueBuffer.capacity(); } /** @@ -523,8 +524,8 @@ public abstract class BaseVariableWidthVector extends BaseValueVector public void reallocValidityAndOffsetBuffers() { int targetOffsetCount = (offsetBuffer.capacity() / OFFSET_WIDTH) * 2; if (targetOffsetCount == 0) { - if (initialValueAllocation > 0) { - targetOffsetCount = 2 * (initialValueAllocation + 1); + if (lastValueCapacity > 0) { + targetOffsetCount = 2 * (lastValueCapacity + 1); } else { targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1); } @@ -543,6 +544,8 @@ public abstract class BaseVariableWidthVector extends BaseValueVector newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); validityBuffer.release(); validityBuffer = newValidityBuffer; + + lastValueCapacity = getValueCapacity(); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java index c6c9642..b806ec7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java @@ -94,7 +94,7 @@ public class BitVector extends BaseFixedWidthVector { if (size * 2 > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); } - initialValueAllocation = valueCount; + lastValueCapacity = valueCount; } /** diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java index b10db95..08932ef 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java @@ -221,7 +221,7 @@ public class TestCopyFrom { final IntVector vector2 = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -283,7 +283,7 @@ public class TestCopyFrom { final BigIntVector vector2 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -424,7 +424,7 @@ public class TestCopyFrom { final Float4Vector vector2 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -486,7 +486,7 @@ public class TestCopyFrom { final Float8Vector vector2 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -550,7 +550,7 @@ public class TestCopyFrom { final IntervalDayVector vector2 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -618,7 +618,7 @@ public class TestCopyFrom { final IntervalYearVector vector2 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -690,7 +690,7 @@ public class TestCopyFrom { final SmallIntVector vector2 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -753,7 +753,7 @@ public class TestCopyFrom { final TimeMicroVector vector2 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -816,7 +816,7 @@ public class TestCopyFrom { final TimeMilliVector vector2 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -879,7 +879,7 @@ public class TestCopyFrom { final TinyIntVector vector2 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -946,7 +946,7 @@ public class TestCopyFrom { final DecimalVector vector2 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); @@ -1014,7 +1014,7 @@ public class TestCopyFrom { final TimeStampVector vector2 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); + assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); assertEquals(0, vector1.getValueCount()); int initialCapacity = vector1.getValueCapacity(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 54a11a3..7706be4 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -69,4 +69,35 @@ public class TestStructVector { assertEquals(0, toChild.getValidityBuffer().capacity()); } } + + @Test + public void testAllocateAfterReAlloc() throws Exception { + Map<String, String> metadata = new HashMap<>(); + metadata.put("k1", "v1"); + FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata); + try (StructVector vector = new StructVector("struct", allocator, type, null)) { + MinorType childtype = MinorType.INT; + vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class); + + /* + * Allocate the default size, and then, reAlloc. This should double the allocation. + */ + vector.allocateNewSafe(); // Initial allocation + vector.reAlloc(); // Double the allocation size of self, and all children. + int savedValidityBufferCapacity = vector.getValidityBuffer().capacity(); + int savedValueCapacity = vector.getValueCapacity(); + + /* + * Clear and allocate again. + */ + vector.clear(); + vector.allocateNewSafe(); + + /* + * Verify that the buffer sizes haven't changed. + */ + Assert.assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity); + Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java index 60747aa..8e0806d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java @@ -142,4 +142,52 @@ public class TestVectorReAlloc { assertNull(vector.getObject(513)); } } + + @Test + public void testFixedAllocateAfterReAlloc() throws Exception { + try (final IntVector vector = new IntVector("", allocator)) { + /* + * Allocate the default size, and then, reAlloc. This should double the allocation. + */ + vector.allocateNewSafe(); // Initial allocation + vector.reAlloc(); // Double the allocation size. + int savedValueCapacity = vector.getValueCapacity(); + + /* + * Clear and allocate again. + */ + vector.clear(); + vector.allocateNewSafe(); + + /* + * Verify that the buffer sizes haven't changed. + */ + Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + } + } + + @Test + public void testVariableAllocateAfterReAlloc() throws Exception { + try (final VarCharVector vector = new VarCharVector("", allocator)) { + /* + * Allocate the default size, and then, reAlloc. This should double the allocation. + */ + vector.allocateNewSafe(); // Initial allocation + vector.reAlloc(); // Double the allocation size. + int savedValueCapacity = vector.getValueCapacity(); + int savedValueBufferSize = vector.valueBuffer.capacity(); + + /* + * Clear and allocate again. + */ + vector.clear(); + vector.allocateNewSafe(); + + /* + * Verify that the buffer sizes haven't changed. + */ + Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); + } + } }