DRILL-6234: Improved documentation for VariableWidthVector mutators, and added simple unit tests demonstrating mutator behavior.
close apache/drill#1164 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/9a6cb59b Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/9a6cb59b Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/9a6cb59b Branch: refs/heads/master Commit: 9a6cb59b9b7a5b127e5f60309ce2f506ede9652a Parents: f5b8223 Author: Timothy Farkas <[email protected]> Authored: Tue Mar 13 17:24:28 2018 -0700 Committer: Aman Sinha <[email protected]> Committed: Fri Mar 30 22:47:31 2018 -0700 ---------------------------------------------------------------------- exec/vector/pom.xml | 7 +- .../templates/VariableLengthVectors.java | 61 ++++++++ .../apache/drill/exec/vector/ValueVector.java | 3 +- .../exec/vector/VariableLengthVectorTest.java | 141 +++++++++++++++++++ 4 files changed, 210 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/pom.xml ---------------------------------------------------------------------- diff --git a/exec/vector/pom.xml b/exec/vector/pom.xml index 0184305..21e138d 100644 --- a/exec/vector/pom.xml +++ b/exec/vector/pom.xml @@ -65,7 +65,12 @@ <version>0.7.1</version> </dependency> - + <dependency> + <groupId>org.apache.drill</groupId> + <artifactId>drill-common</artifactId> + <version>${project.version}</version> + <classifier>tests</classifier> + </dependency> </dependencies> http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java ---------------------------------------------------------------------- diff --git a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java index 516eb52..ab995cd 100644 --- a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java +++ b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java @@ -512,6 +512,8 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V } /** + * <h4>Overview</h4> + * <p> * Mutable${minor.class} implements a vector of variable width values. Elements in the vector * are accessed by position from the logical start of the vector. A fixed width offsetVector * is used to convert an element's position to it's offset from the start of the (0-based) @@ -520,6 +522,46 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V * The equivalent Java primitive is '${minor.javaType!type.javaType}' * * NB: this class is automatically generated from ValueVectorTypes.tdd using FreeMarker. + * </p> + * <h4>Contract</h4> + * <p> + * <ol> + * <li> + * <b>Supported Writes:</b> {@link VariableWidthVector}s do not support random writes. In contrast {@link org.apache.drill.exec.vector.FixedWidthVector}s do + * allow random writes but special care is needed. + * </li> + * <li> + * <b>Writing Values:</b> All set methods must be called with a consecutive sequence of indices. With a few exceptions: + * <ol> + * <li>You can update the last index you just set.</li> + * <li>You can reset a previous index (call it Idx), but you must assume all the data after Idx is corrupt. Also + * note that the memory consumed by data that came after Idx is not released.</li> + * </ol> + * </li> + * <li> + * <b>Setting Value Count:</b> Vectors aren't explicitly aware of how many values they contain. So you must keep track of the + * number of values you've written to the vector and once you are done writing to the vector you must call {@link Mutator#setValueCount(int)}. + * It is possible to trim the vector by setting the value count to be less than the number of values currently contained in the vector. Note the extra memory consumed in + * the data buffer is not freed when this is done. + * </li> + * <li> + * <b>Memory Allocation:</b> When setting a value at an index you must do one of the following to ensure you do not get an {@link IndexOutOfBoundsException}. + * <ol> + * <li> + * Allocate the exact amount of memory you need when using the {@link Mutator#set(int, byte[])} methods. If you do not + * manually allocate sufficient memory an {@link IndexOutOfBoundsException} can be thrown when the data buffer runs out of space. + * </li> + * <li> + * Or you can use the {@link Mutator#setSafe(int, byte[])} methods, which will automatically grow your data buffer to + * fit your data. + * </li> + * </ol> + * </li> + * <li> + * <b>Immutability:</b> Once a vector has been populated with data and {@link #setValueCount(int)} has been called, it should be considered immutable. + * </li> + * </ol> + * </p> */ public final class Mutator extends BaseValueVector.BaseMutator implements VariableWidthVector.VariableWidthMutator { @@ -703,6 +745,25 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V data.setBytes(currentOffset, holder.buffer, holder.start, length); } + /** + * <h4>Notes on Usage</h4> + * <p> + * For {@link VariableWidthVector}s this method can be used in the following cases: + * <ul> + * <li>Setting the actual number of elements currently contained in the vector.</li> + * <li>Trimming the vector to have fewer elements than it current does.</li> + * </ul> + * </p> + * <h4>Caveats</h4> + * <p> + * It is important to note that for {@link org.apache.drill.exec.vector.FixedWidthVector}s this method can also be used to expand the vector. + * However, {@link VariableWidthVector} do not support this usage and this method will throw an {@link IndexOutOfBoundsException} if you attempt + * to use it in this way. Expansion of valueCounts is not supported mainly because there is no benefit, since you would still have to rely on the setSafe + * methods to appropriatly expand the data buffer and populate the vector anyway (since by definition we do not know the width of elements). See DRILL-6234 for details. + * </p> + * <h4>Method Documentation</h4> + * {@inheritDoc} + */ @Override public void setValueCount(int valueCount) { final int currentByteCapacity = getByteCapacity(); http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java ---------------------------------------------------------------------- diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java index f873cc6..2659810 100644 --- a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java +++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java @@ -293,7 +293,8 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> { */ interface Mutator { /** - * Sets the number of values that is stored in this vector to the given value count. + * Sets the number of values that is stored in this vector to the given value count. <b>WARNING!</b> Once the + * valueCount is set, the vector should be considered immutable. * * @param valueCount value count to set. */ http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java ---------------------------------------------------------------------- diff --git a/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java new file mode 100644 index 0000000..eaee597 --- /dev/null +++ b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector; + +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.Types; +import org.apache.drill.exec.memory.RootAllocator; +import org.apache.drill.exec.record.MaterializedField; +import org.junit.Assert; +import org.junit.Test; + +/** + * This test uses {@link VarCharVector} to test the template code in VariableLengthVector. + */ +public class VariableLengthVectorTest +{ + /** + * If the vector contains 1000 records, setting a value count of 1000 should work. + */ + @Test + public void testSettingSameValueCount() + { + try (RootAllocator allocator = new RootAllocator(10_000_000)) { + final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR)); + final VarCharVector vector = new VarCharVector(field, allocator); + + vector.allocateNew(); + + try { + final int size = 1000; + final VarCharVector.Mutator mutator = vector.getMutator(); + final VarCharVector.Accessor accessor = vector.getAccessor(); + + setSafeIndexStrings("", 0, size, mutator); + + mutator.setValueCount(size); + Assert.assertEquals(size, accessor.getValueCount()); + checkIndexStrings("", 0, size, accessor); + } finally { + vector.clear(); + } + } + } + + /** + * Test truncating data. If you have 10000 records, reduce the vector to 1000 records. + */ + @Test + public void testTrunicateVectorSetValueCount() + { + try (RootAllocator allocator = new RootAllocator(10_000_000)) { + final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR)); + final VarCharVector vector = new VarCharVector(field, allocator); + + vector.allocateNew(); + + try { + final int size = 1000; + final int fluffSize = 10000; + final VarCharVector.Mutator mutator = vector.getMutator(); + final VarCharVector.Accessor accessor = vector.getAccessor(); + + setSafeIndexStrings("", 0, size, mutator); + setSafeIndexStrings("first cut ", size, fluffSize, mutator); + + mutator.setValueCount(fluffSize); + Assert.assertEquals(fluffSize, accessor.getValueCount()); + + checkIndexStrings("", 0, size, accessor); + + } finally { + vector.clear(); + } + } + } + + /** + * Set 10000 values. Then go back and set new values starting at the 1001 the record. + */ + @Test + public void testSetBackTracking() + { + try (RootAllocator allocator = new RootAllocator(10_000_000)) { + final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR)); + final VarCharVector vector = new VarCharVector(field, allocator); + + vector.allocateNew(); + + try { + final int size = 1000; + final int fluffSize = 10000; + final VarCharVector.Mutator mutator = vector.getMutator(); + final VarCharVector.Accessor accessor = vector.getAccessor(); + + setSafeIndexStrings("", 0, size, mutator); + setSafeIndexStrings("first cut ", size, fluffSize, mutator); + setSafeIndexStrings("redone cut ", size, fluffSize, mutator); + + mutator.setValueCount(fluffSize); + Assert.assertEquals(fluffSize, accessor.getValueCount()); + + checkIndexStrings("", 0, size, accessor); + checkIndexStrings("redone cut ", size, fluffSize, accessor); + + } finally { + vector.clear(); + } + } + } + + public static void setSafeIndexStrings(String prefix, int offset, int size, VarCharVector.Mutator mutator) + { + for (int index = offset; index < size; index++) { + final String indexString = prefix + "String num " + index; + mutator.setSafe(index, indexString.getBytes()); + } + } + + public static void checkIndexStrings(String prefix, int offset, int size, VarCharVector.Accessor accessor) + { + for (int index = offset; index < size; index++) { + final String indexString = prefix + "String num " + index; + Assert.assertArrayEquals(indexString.getBytes(), accessor.get(index)); + } + } +}
