DRILL-6262: IndexOutOfBoundException in RecordBatchSize for empty variableWidthVector
closes #1175 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/051a96da Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/051a96da Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/051a96da Branch: refs/heads/master Commit: 051a96dae42c70e633ba3f6af9817836544e075b Parents: 8663e8a Author: Sorabh Hamirwasia <[email protected]> Authored: Fri Mar 16 16:57:12 2018 -0700 Committer: Vitalii Diravka <[email protected]> Committed: Mon Mar 26 13:02:56 2018 +0300 ---------------------------------------------------------------------- .../drill/exec/record/RecordBatchSizer.java | 9 ++-- .../drill/exec/record/TestRecordBatchSizer.java | 43 ++++++++++++++++++-- .../codegen/templates/RepeatedValueVectors.java | 2 +- .../templates/VariableLengthVectors.java | 8 +++- 4 files changed, 50 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/051a96da/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java index bfe0ef1..1586ccc 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java @@ -321,10 +321,8 @@ public class RecordBatchSizer { // Calculate pure data size. if (isVariableWidth) { - UInt4Vector offsetVector = ((RepeatedValueVector) v).getOffsetVector(); - int innerValueCount = offsetVector.getAccessor().get(valueCount); VariableWidthVector dataVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector()); - totalDataSize = dataVector.getOffsetVector().getAccessor().get(innerValueCount); + totalDataSize = dataVector.getCurrentSizeInBytes(); } else { ValueVector dataVector = ((RepeatedValueVector) v).getDataVector(); totalDataSize = dataVector.getPayloadByteCount(elementCount); @@ -343,7 +341,7 @@ public class RecordBatchSizer { // Calculate pure data size. if (isVariableWidth) { VariableWidthVector variableWidthVector = ((VariableWidthVector) ((NullableVector) v).getValuesVector()); - totalDataSize = variableWidthVector.getOffsetVector().getAccessor().get(valueCount); + totalDataSize = variableWidthVector.getCurrentSizeInBytes(); } else { // Another special case. if (v instanceof UntypedNullVector) { @@ -362,8 +360,7 @@ public class RecordBatchSizer { // Calculate pure data size. if (isVariableWidth) { - UInt4Vector offsetVector = ((VariableWidthVector)v).getOffsetVector(); - totalDataSize = offsetVector.getAccessor().get(valueCount); + totalDataSize = ((VariableWidthVector) v).getCurrentSizeInBytes(); } else { totalDataSize = v.getPayloadByteCount(valueCount); } http://git-wip-us.apache.org/repos/asf/drill/blob/051a96da/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestRecordBatchSizer.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestRecordBatchSizer.java b/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestRecordBatchSizer.java index f32dba3..ccb9c19 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestRecordBatchSizer.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestRecordBatchSizer.java @@ -17,9 +17,6 @@ */ package org.apache.drill.exec.record; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; - import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.record.RecordBatchSizer.ColumnSize; import org.apache.drill.exec.vector.NullableVector; @@ -36,6 +33,10 @@ import org.apache.drill.test.rowSet.RowSetBuilder; import org.apache.drill.test.rowSet.schema.SchemaBuilder; import org.junit.Test; +import static junit.framework.TestCase.fail; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + public class TestRecordBatchSizer extends SubOperatorTest { private final int testRowCount = 1000; private final int testRowCountPowerTwo = 2048; @@ -804,4 +805,38 @@ public class TestRecordBatchSizer extends SubOperatorTest { } -} \ No newline at end of file + /** + * Test to verify that record batch sizer handles the actual empty vectors correctly. RowSetBuilder by default + * allocates Drillbuf of 10bytes for each vector type which makes their capacity >0 and not ==0 which will be in + * case of empty vectors. + */ + @Test + public void testEmptyVariableWidthVector() { + final BatchSchema schema = new SchemaBuilder() + .add("key", MinorType.INT) + .add("value", MinorType.VARCHAR) + .build(); + + final RowSetBuilder builder = fixture.rowSetBuilder(schema); + final RowSet rows = builder.build(); + + // Release the initial bytes allocated by RowSetBuilder + VectorAccessibleUtilities.clear(rows.container()); + + try { + // Create RecordBatchSizer for this empty container and it should not fail + final RecordBatchSizer sizer = new RecordBatchSizer(rows.container()); + int totalDataSize = 0; + + for (ColumnSize colSize : sizer.columns().values()) { + totalDataSize += colSize.getTotalDataSize(); + } + // Verify that the totalDataSize for all the columns is zero + assertEquals(0, totalDataSize); + } catch (Exception ex) { + fail(); + } finally { + rows.clear(); + } + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/051a96da/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java ---------------------------------------------------------------------- diff --git a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java index c20ee89..418898b 100644 --- a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java +++ b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java @@ -228,7 +228,7 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector @Override protected SerializedField.Builder getMetadataBuilder() { return super.getMetadataBuilder() - .setVarByteLength(values.getVarByteLength()); + .setVarByteLength(values.getCurrentSizeInBytes()); } @Override http://git-wip-us.apache.org/repos/asf/drill/blob/051a96da/exec/vector/src/main/codegen/templates/VariableLengthVectors.java ---------------------------------------------------------------------- diff --git a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java index 3dec3f8..516eb52 100644 --- a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java +++ b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java @@ -111,9 +111,15 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V return data.capacity(); } + /** + * Return the number of bytes contained in the current var len byte vector. + * TODO: Remove getVarByteLength with it's implementation after all client's are moved to using getCurrentSizeInBytes. + * It's kept as is to preserve backward compatibility + * @return + */ @Override public int getCurrentSizeInBytes() { - return offsetVector.getAccessor().get(getAccessor().getValueCount()); + return getVarByteLength(); } /**
