This is an automated email from the ASF dual-hosted git repository.
arina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 2c1a8a0 DRILL-7257: Set nullable var-width vector lastSet value
2c1a8a0 is described below
commit 2c1a8a0afa3a1d8853e7651f1b3d47d52969f50a
Author: Paul Rogers <[email protected]>
AuthorDate: Thu May 16 14:54:39 2019 -0700
DRILL-7257: Set nullable var-width vector lastSet value
Turns out this is due to a subtle issue with variable-width nullable
vectors. Such vectors have a lastSet attribute in the Mutator class.
When using "transfer pairs" to copy values, the code somehow decides
to zero-fill from the lastSet value to the record count. The row set
framework did not set this value, meaning that the RemovingRecordBatch
zero-filled the dir0 column when it chose to use transfer pairs rather
than copying values. The use of transfer pairs occurs when all rows in
a batch pass the filter prior to the removing record batch.
Modified the nullable vector writer to properly set the lastSet value at
the end of each batch. Added a unit test to verify the value is set
correctly.
Includes a bit of code clean-up.
---
.../physical/impl/svremover/AbstractSV2Copier.java | 19 +++++++---------
.../physical/impl/svremover/GenericSV2Copier.java | 8 +++----
.../apache/drill/exec/record/VectorAccessible.java | 2 +-
.../apache/drill/exec/record/VectorContainer.java | 5 ++---
.../exec/record/selection/SelectionVector2.java | 10 ++++-----
.../test/rowSet/test/TestScalarAccessors.java | 13 +++++++----
.../codegen/templates/NullableValueVectors.java | 25 ++++++++++++----------
.../apache/drill/exec/vector/NullableVector.java | 1 +
.../accessor/writer/NullableScalarWriter.java | 3 +++
9 files changed, 46 insertions(+), 40 deletions(-)
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/AbstractSV2Copier.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/AbstractSV2Copier.java
index d273fd3..ed33703 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/AbstractSV2Copier.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/AbstractSV2Copier.java
@@ -17,6 +17,9 @@
*/
package org.apache.drill.exec.physical.impl.svremover;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.record.VectorAccessible;
import org.apache.drill.exec.record.VectorContainer;
@@ -24,9 +27,6 @@ import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.record.selection.SelectionVector2;
import org.apache.drill.exec.vector.ValueVector;
-import java.util.ArrayList;
-import java.util.List;
-
public abstract class AbstractSV2Copier extends AbstractCopier {
protected ValueVector[] vvIn;
private SelectionVector2 sv2;
@@ -35,21 +35,18 @@ public abstract class AbstractSV2Copier extends
AbstractCopier {
@Override
public void setup(VectorAccessible incoming, VectorContainer outgoing) {
super.setup(incoming, outgoing);
- this.sv2 = incoming.getSelectionVector2();
+ sv2 = incoming.getSelectionVector2();
final int count = outgoing.getNumberOfColumns();
vvIn = new ValueVector[count];
- {
- int index = 0;
-
- for (VectorWrapper vectorWrapper: incoming) {
- vvIn[index] = vectorWrapper.getValueVector();
- index++;
- }
+ int index = 0;
+ for (VectorWrapper<?> vectorWrapper: incoming) {
+ vvIn[index++] = vectorWrapper.getValueVector();
}
}
+ @Override
public void copyEntryIndirect(int inIndex, int outIndex) {
copyEntry(sv2.getIndex(inIndex), outIndex);
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/GenericSV2Copier.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/GenericSV2Copier.java
index f607e8c..d2f8dac 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/GenericSV2Copier.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/GenericSV2Copier.java
@@ -18,7 +18,6 @@
package org.apache.drill.exec.physical.impl.svremover;
import org.apache.drill.exec.record.RecordBatch;
-import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.vector.SchemaChangeCallBack;
@@ -28,15 +27,14 @@ public class GenericSV2Copier extends AbstractSV2Copier {
public GenericSV2Copier(RecordBatch incomingBatch, VectorContainer
outputContainer,
SchemaChangeCallBack callBack) {
for(VectorWrapper<?> vv : incomingBatch){
- TransferPair pair =
vv.getValueVector().makeTransferPair(outputContainer.addOrGet(vv.getField(),
callBack));
- transferPairs.add(pair);
+
transferPairs.add(vv.getValueVector().makeTransferPair(outputContainer.addOrGet(vv.getField(),
callBack)));
}
}
@Override
public void copyEntry(int inIndex, int outIndex) {
- for ( int i = 0; i < vvIn.length; i++ ) {
+ for (int i = 0; i < vvIn.length; i++) {
vvOut[i].copyEntry(outIndex, vvIn[i], inIndex);
}
}
-}
\ No newline at end of file
+}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorAccessible.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorAccessible.java
index 63dab62..f51f521 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorAccessible.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorAccessible.java
@@ -23,7 +23,7 @@ import
org.apache.drill.exec.record.selection.SelectionVector4;
// TODO javadoc
public interface VectorAccessible extends Iterable<VectorWrapper<?>> {
- // TODO are these <?> releated in any way? Should they be the same one?
+ // TODO are these <?> related in any way? Should they be the same one?
// TODO javadoc
VectorWrapper<?> getValueAccessorById(Class<?> clazz, int... fieldIds);
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
index 05d9510..4a90184 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/record/VectorContainer.java
@@ -32,7 +32,6 @@ import
org.apache.drill.exec.record.selection.SelectionVector2;
import org.apache.drill.exec.record.selection.SelectionVector4;
import org.apache.drill.exec.vector.SchemaChangeCallBack;
import org.apache.drill.exec.vector.ValueVector;
-
import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
import org.apache.drill.shaded.guava.com.google.common.collect.Sets;
@@ -376,8 +375,8 @@ public class VectorContainer implements VectorAccessible {
}
public void setRecordCount(int recordCount) {
- this.recordCount = recordCount;
- initialized = true;
+ this.recordCount = recordCount;
+ initialized = true;
}
/**
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/record/selection/SelectionVector2.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/record/selection/SelectionVector2.java
index 8afc5fb..859d492 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/record/selection/SelectionVector2.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/record/selection/SelectionVector2.java
@@ -17,12 +17,12 @@
*/
package org.apache.drill.exec.record.selection;
-import io.netty.buffer.DrillBuf;
-
-import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.record.DeadBuf;
+import io.netty.buffer.DrillBuf;
+
/**
* A selection vector that fronts, at most, 64K values.
* The selection vector is used for two cases:
@@ -158,12 +158,12 @@ public class SelectionVector2 implements AutoCloseable {
}
public void setRecordCount(int recordCount){
-// logger.debug("Seting record count to {}", recordCount);
+// logger.debug("Setting record count to {}", recordCount);
this.recordCount = recordCount;
}
public boolean canDoFullTransfer() {
- return (recordCount == batchActualRecordCount);
+ return recordCount == batchActualRecordCount;
}
public void setBatchActualRecordCount(int actualRecordCount) {
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java
b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java
index 56c6af2..582c2f4 100644
---
a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java
+++
b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java
@@ -17,12 +17,12 @@
*/
package org.apache.drill.test.rowSet.test;
+import static org.apache.drill.test.rowSet.RowSetUtilities.dec;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
-import static org.apache.drill.test.rowSet.RowSetUtilities.dec;
import java.math.BigDecimal;
import java.util.Arrays;
@@ -32,26 +32,28 @@ import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.SimpleVectorWrapper;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.DateUtilities;
+import org.apache.drill.exec.vector.NullableVarCharVector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.accessor.ArrayReader;
import org.apache.drill.exec.vector.accessor.ScalarReader;
import org.apache.drill.exec.vector.accessor.ScalarWriter;
import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.RowSetBuilder;
import org.apache.drill.test.rowSet.RowSetReader;
import org.joda.time.DateTimeZone;
import org.joda.time.Instant;
import org.joda.time.LocalDate;
import org.joda.time.LocalTime;
import org.joda.time.Period;
-import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetBuilder;
import org.junit.Test;
import org.junit.experimental.categories.Category;
-import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
/**
* Verify that simple scalar (non-repeated) column readers
@@ -678,6 +680,9 @@ public class TestScalarAccessors extends SubOperatorTest {
.addRow("abcd")
.build();
assertEquals(3, rs.rowCount());
+ SimpleVectorWrapper<?> vw = (SimpleVectorWrapper<?>)
rs.container().getValueVector(0);
+ NullableVarCharVector v = (NullableVarCharVector) vw.getValueVector();
+ assertEquals(3, v.getMutator().getLastSet());
RowSetReader reader = rs.reader();
ScalarReader colReader = reader.scalar(0);
diff --git a/exec/vector/src/main/codegen/templates/NullableValueVectors.java
b/exec/vector/src/main/codegen/templates/NullableValueVectors.java
index f82c718..60b3ec2 100644
--- a/exec/vector/src/main/codegen/templates/NullableValueVectors.java
+++ b/exec/vector/src/main/codegen/templates/NullableValueVectors.java
@@ -15,17 +15,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-import org.apache.drill.common.types.TypeProtos.DataMode;
-import org.apache.drill.exec.memory.AllocationManager.BufferLedger;
-import org.apache.drill.exec.record.MaterializedField;
-import org.apache.drill.exec.util.DecimalUtility;
-import org.apache.drill.exec.vector.BaseDataValueVector;
-import org.apache.drill.exec.vector.NullableVectorDefinitionSetter;
-
-import java.lang.Override;
-import java.lang.UnsupportedOperationException;
-import java.util.Set;
-
<@pp.dropOutputFile />
<#list vv.types as type>
<#list type.minor as minor>
@@ -41,6 +30,7 @@ import java.util.Set;
package org.apache.drill.exec.vector;
<#include "/@includes/vv_imports.ftl" />
+import
org.apache.drill.shaded.guava.com.google.common.annotations.VisibleForTesting;
/**
* Nullable${minor.class} implements a vector of values which could be null.
Elements in the vector
@@ -457,6 +447,13 @@ public final class ${className} extends
BaseDataValueVector implements <#if type
mutator.exchange(other.getMutator());
}
+ @Override
+ public void finalizeLastSet(int count) {
+ <#if type.major = "VarLen">
+ mutator.lastSet = count;
+ </#if>
+ }
+
<#if type.major != "VarLen">
@Override
public void toNullable(ValueVector nullableVector) {
@@ -756,6 +753,7 @@ public final class ${className} extends BaseDataValueVector
implements <#if type
values.getMutator().setValueCount(valueCount);
bits.getMutator().setValueCount(valueCount);
}
+
<#if type.major == "VarLen">
/** Enables this wrapper container class to participate in bulk mutator
logic */
private final class VarLenBulkInputCallbackImpl implements
VarLenBulkInput.BulkInputCallback<VarLenBulkEntry> {
@@ -849,6 +847,11 @@ public final class ${className} extends
BaseDataValueVector implements <#if type
<#if type.major = "VarLen">lastSet = -1;</#if>
}
+ <#if type.major = "VarLen">
+ @VisibleForTesting
+ public int getLastSet() { return lastSet; }
+
+ </#if>
// For nullable vectors, exchanging buffers (done elsewhere)
// requires also exchanging mutator state (done here.)
diff --git
a/exec/vector/src/main/java/org/apache/drill/exec/vector/NullableVector.java
b/exec/vector/src/main/java/org/apache/drill/exec/vector/NullableVector.java
index 80b732a..eed998a 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/NullableVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/NullableVector.java
@@ -27,4 +27,5 @@ public interface NullableVector extends ValueVector {
ValueVector getBitsVector();
ValueVector getValuesVector();
+ void finalizeLastSet(int count);
}
diff --git
a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java
b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java
index b3c2ea5..be3a3e4 100644
---
a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java
+++
b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java
@@ -70,12 +70,14 @@ public class NullableScalarWriter extends
AbstractScalarWriterImpl {
}
}
+ private final NullableVector nullableVector;
private final UInt1ColumnWriter isSetWriter;
private final BaseScalarWriter baseWriter;
private ColumnWriterIndex writerIndex;
public NullableScalarWriter(ColumnMetadata schema, NullableVector
nullableVector, BaseScalarWriter baseWriter) {
this.schema = schema;
+ this.nullableVector = nullableVector;
isSetWriter = new UInt1ColumnWriter(nullableVector.getBitsVector());
this.baseWriter = baseWriter;
}
@@ -273,6 +275,7 @@ public class NullableScalarWriter extends
AbstractScalarWriterImpl {
// Avoid back-filling null values.
baseWriter.skipNulls();
baseWriter.endWrite();
+ nullableVector.finalizeLastSet(writerIndex.vectorIndex());
}
@Override