DRILL-6231: Fix memory allocation for repeated list vector closes #1171
Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d55b62f9 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d55b62f9 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d55b62f9 Branch: refs/heads/master Commit: d55b62f9ea76372c823c15780b2a69f81f0b945c Parents: 3167771 Author: Padma Penumarthy <[email protected]> Authored: Thu Mar 15 21:50:54 2018 -0700 Committer: Vitalii Diravka <[email protected]> Committed: Sat Mar 24 20:35:32 2018 +0200 ---------------------------------------------------------------------- .../drill/exec/record/RecordBatchSizer.java | 15 ++ .../exec/physical/unit/TestOutputBatchSize.java | 257 ++++++++++++++++++- 2 files changed, 267 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/d55b62f9/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java index 9525c91..bfe0ef1 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java @@ -395,11 +395,26 @@ public class RecordBatchSizer { } } + private void allocateRepeatedList(RepeatedListVector vector, int recordCount) { + vector.allocateOffsetsNew(recordCount); + recordCount *= getCardinality(); + ColumnSize child = children.get(vector.getField().getName()); + if (vector.getDataVector() != null) { + child.allocateVector(vector.getDataVector(), recordCount); + } + } + public void allocateVector(ValueVector vector, int recordCount) { if (vector instanceof AbstractMapVector) { allocateMap((AbstractMapVector) vector, recordCount); return; } + + if (vector instanceof RepeatedListVector) { + allocateRepeatedList((RepeatedListVector) vector, recordCount); + return; + } + AllocationHelper.allocate(vector, recordCount, getEntryWidth(), getCardinality()); } http://git-wip-us.apache.org/repos/asf/drill/blob/d55b62f9/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java index edd4cbf..76de381 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/unit/TestOutputBatchSize.java @@ -1194,20 +1194,267 @@ public class TestOutputBatchSize extends PhysicalOpUnitTestBase { // Allocates to nearest power of two colSize.allocateVector(v, testRowCount); + + // offset vector of delegate vector i.e. outer array should have row count number of values. UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector(); - assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity()); - ValueVector dataVector = ((RepeatedValueVector) v).getDataVector(); - assertEquals(Integer.highestOneBit((testRowCount * 2) << 1) - 1, dataVector.getValueCapacity()); + assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity()); + + // Get inner vector of delegate vector. + ValueVector vector = ((RepeatedValueVector) v).getDataVector(); + + // Data vector of inner vector should + // have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values. + ValueVector dataVector = ((RepeatedValueVector) vector).getDataVector(); + assertEquals(Integer.highestOneBit((testRowCount*8) << 1), dataVector.getValueCapacity()); + + // offset vector of inner vector should have + // 2 (outer array cardinality) * row count number of values. + offsetVector = ((RepeatedValueVector) vector).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount*2) << 1), offsetVector.getValueCapacity()); v.clear(); // Allocates the same as value passed since it is already power of two. // -1 is done for adjustment needed for offset vector. colSize.allocateVector(v, testRowCountPowerTwo - 1); + + // offset vector of delegate vector i.e. outer array should have row count number of values. offsetVector = ((RepeatedListVector) v).getOffsetVector(); assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); - dataVector = ((RepeatedValueVector) v).getDataVector(); - assertEquals(Integer.highestOneBit(testRowCountPowerTwo)-1, dataVector.getValueCapacity()); + + // Get inner vector of delegate vector. + vector = ((RepeatedValueVector) v).getDataVector(); + + // Data vector of inner vector should + // have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values. + dataVector = ((RepeatedValueVector) vector).getDataVector(); + assertEquals(testRowCountPowerTwo * 8, dataVector.getValueCapacity()); + + // offset vector of inner vector should have + // 2 (outer array cardinality) * row count number of values. + offsetVector = ((RepeatedValueVector) vector).getOffsetVector(); + assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity()); + v.clear(); + + // MAX ROW COUNT + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1); + + // offset vector of delegate vector i.e. outer array should have row count number of values. + offsetVector = ((RepeatedListVector) v).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity()); + + // Get inner vector of delegate vector. + vector = ((RepeatedValueVector) v).getDataVector(); + + // Data vector of inner vector should + // have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values. + dataVector = ((RepeatedValueVector) vector).getDataVector(); + assertEquals(ValueVector.MAX_ROW_COUNT*8, dataVector.getValueCapacity()); + + // offset vector of inner vector should have + // 2 (outer array cardinality) * row count number of values. + offsetVector = ((RepeatedValueVector) vector).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT*2, offsetVector.getValueCapacity()); + v.clear(); + + // MIN ROW COUNT + colSize.allocateVector(v, 0); + + // offset vector of delegate vector i.e. outer array should have 1 value. + offsetVector = ((RepeatedListVector) v).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity()); + + // Get inner vector of delegate vector. + vector = ((RepeatedValueVector) v).getDataVector(); + + // Data vector of inner vector should have 1 value + dataVector = ((RepeatedValueVector) vector).getDataVector(); + assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity()); + + // offset vector of inner vector should have + // 2 (outer array cardinality) * 1. + offsetVector = ((RepeatedValueVector) vector).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT*2, offsetVector.getValueCapacity()); v.clear(); } } + + @Test + public void testSizerRepeatedRepeatedList() throws Exception { + List<String> inputJsonBatches = Lists.newArrayList(); + StringBuilder batchString = new StringBuilder(); + + StringBuilder newString = new StringBuilder(); + newString.append("[ [[1,2,3,4], [5,6,7,8]], [[1,2,3,4], [5,6,7,8]] ]"); + + numRows = 9; + batchString.append("["); + for (int i = 0; i < numRows; i++) { + batchString.append("{\"c\" : " + newString); + batchString.append("},"); + } + batchString.append("{\"c\" : " + newString); + batchString.append("}"); + + batchString.append("]"); + inputJsonBatches.add(batchString.toString()); + + // Create a dummy scanBatch to figure out the size. + RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), + fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext)); + + VectorAccessible va = new BatchIterator(scanBatch).iterator().next(); + RecordBatchSizer sizer = new RecordBatchSizer(va); + + assertEquals(1, sizer.columns().size()); + RecordBatchSizer.ColumnSize column = sizer.columns().get("c"); + assertNotNull(column); + + /** + * stdDataSize:8*10*10*10, stdNetSize:8*10*10*10 + 8*10*10 + 8*10 + 4, + * dataSizePerEntry:16*8, netSizePerEntry:16*8 + 16*4 + 4*2 + 4*2, + * totalDataSize:16*8*10, totalNetSize:netSizePerEntry*10, valueCount:10, + * elementCount:10, estElementCountPerArray:1, isVariableWidth:false + */ + assertEquals(8000, column.getStdDataSizePerEntry()); + assertEquals(8884, column.getStdNetSizePerEntry()); + assertEquals(128, column.getDataSizePerEntry()); + assertEquals(156, column.getNetSizePerEntry()); + assertEquals(1280, column.getTotalDataSize()); + assertEquals(1560, column.getTotalNetSize()); + assertEquals(10, column.getValueCount()); + assertEquals(20, column.getElementCount()); + assertEquals(2, column.getCardinality(), 0.01); + assertEquals(false, column.isVariableWidth()); + + final int testRowCount = 1000; + final int testRowCountPowerTwo = 2048; + + for (VectorWrapper<?> vw : va) { + ValueVector v = vw.getValueVector(); + v.clear(); + + RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName()); + + // Allocates to nearest power of two + colSize.allocateVector(v, testRowCount); + + // offset vector of delegate vector i.e. outer array should have row count number of values. + UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity()); + + // Get data vector of delegate vector. This is repeated list again + ValueVector dataVector = ((RepeatedListVector) v).getDataVector(); + + // offset vector of delegate vector of the inner repeated list + // This should have row count * 2 number of values. + offsetVector = ((RepeatedListVector) dataVector).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount*2) << 1), offsetVector.getValueCapacity()); + + // Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment). + ValueVector innerDataVector = ((RepeatedValueVector) dataVector).getDataVector(); + assertEquals((Integer.highestOneBit((testRowCount*2) << 1) - 1), dataVector.getValueCapacity()); + + // offset vector of inner vector should have + // 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values. + offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector(); + assertEquals((Integer.highestOneBit(testRowCount*4) << 1), offsetVector.getValueCapacity()); + + // Data vector of inner vector should + // have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values. + dataVector = ((RepeatedValueVector) innerDataVector).getDataVector(); + assertEquals(Integer.highestOneBit(testRowCount << 1) * 16, dataVector.getValueCapacity()); + + v.clear(); + + // Allocates the same as value passed since it is already power of two. + // -1 is done for adjustment needed for offset vector. + colSize.allocateVector(v, testRowCountPowerTwo - 1); + + // offset vector of delegate vector i.e. outer array should have row count number of values. + offsetVector = ((RepeatedListVector) v).getOffsetVector(); + assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity()); + + // Get data vector of delegate vector. This is repeated list again + dataVector = ((RepeatedListVector) v).getDataVector(); + + // offset vector of delegate vector of the inner repeated list + // This should have row count * 2 number of values. + offsetVector = ((RepeatedListVector) dataVector).getOffsetVector(); + assertEquals(testRowCountPowerTwo*2, offsetVector.getValueCapacity()); + + // Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment). + innerDataVector = ((RepeatedValueVector) dataVector).getDataVector(); + assertEquals(testRowCountPowerTwo*2 - 1, dataVector.getValueCapacity()); + + // offset vector of inner vector should have + // 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values. + offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector(); + assertEquals(testRowCountPowerTwo*4, offsetVector.getValueCapacity()); + + // Data vector of inner vector should + // have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values. + dataVector = ((RepeatedValueVector) innerDataVector).getDataVector(); + assertEquals(testRowCountPowerTwo * 16, dataVector.getValueCapacity()); + + v.clear(); + + // MAX ROW COUNT + colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1); + + // offset vector of delegate vector i.e. outer array should have row count number of values. + offsetVector = ((RepeatedListVector) v).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity()); + + // Get data vector of delegate vector. This is repeated list again + dataVector = ((RepeatedListVector) v).getDataVector(); + + // offset vector of delegate vector of the inner repeated list + // This should have row count * 2 number of values. + offsetVector = ((RepeatedListVector) dataVector).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT*2, offsetVector.getValueCapacity()); + + // Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment). + innerDataVector = ((RepeatedValueVector) dataVector).getDataVector(); + assertEquals(ValueVector.MAX_ROW_COUNT*2 - 1, dataVector.getValueCapacity()); + + // offset vector of inner vector should have + // 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values. + offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector(); + assertEquals(ValueVector.MAX_ROW_COUNT*4, offsetVector.getValueCapacity()); + + // Data vector of inner vector should + // have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values. + dataVector = ((RepeatedValueVector) innerDataVector).getDataVector(); + assertEquals(ValueVector.MAX_ROW_COUNT*16, dataVector.getValueCapacity()); + + v.clear(); + + // MIN ROW COUNT + colSize.allocateVector(v, 0); + + // offset vector of delegate vector i.e. outer array should have 1 value. + offsetVector = ((RepeatedListVector) v).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity()); + + // Get data vector of delegate vector. This is repeated list again + dataVector = ((RepeatedListVector) v).getDataVector(); + + // offset vector of delegate vector of the inner repeated list + offsetVector = ((RepeatedListVector) dataVector).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity()); + + // offset vector of inner vector should have + // 2 (outer array cardinality) * 1. + offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector(); + assertEquals(ValueVector.MIN_ROW_COUNT*2, offsetVector.getValueCapacity()); + + // Data vector of inner vector should 1 value. + dataVector = ((RepeatedValueVector) innerDataVector).getDataVector(); + assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity()); + + v.clear(); + + } + } + } \ No newline at end of file
