This is an automated email from the ASF dual-hosted git repository. volodymyr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit d27221fd7d060acc1e069db2597ca1b1727f0fa3 Author: Bohdan Kazydub <[email protected]> AuthorDate: Fri Dec 13 16:27:07 2019 +0200 DRILL-7473: Parquet reader failed to get field of repeated map closes #1933 --- .../apache/drill/exec/expr/EvaluationVisitor.java | 7 ------ .../physical/impl/validate/BatchValidator.java | 24 +++++++-------------- .../exec/planner/index/FunctionalIndexHelper.java | 3 +-- .../drill/exec/vector/complex/FieldIdUtil.java | 1 + .../exec/store/parquet/TestParquetComplex.java | 15 +++++++++++++ .../map/parquet/repeated_struct_with_dict.parquet | Bin 0 -> 646 bytes 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java index c3478f6..d8742ff 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/EvaluationVisitor.java @@ -595,13 +595,6 @@ public class EvaluationVisitor { if (complex || repeated) { - if (e.getFieldId().isDict(depth)) { - JVar dictReader = generator.declareClassField("dictReader", generator.getModel()._ref(FieldReader.class)); - eval.assign(dictReader, expr); - - return new HoldingContainer(e.getMajorType(), dictReader, null, null, false, true); - } - JVar complexReader = generator.declareClassField("reader", generator.getModel()._ref(FieldReader.class)); if (isNullReaderLikely) { diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java index e1ffd7a..36d9c8f 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java @@ -35,11 +35,10 @@ import org.apache.drill.exec.vector.VarCharVector; import org.apache.drill.exec.vector.VarDecimalVector; import org.apache.drill.exec.vector.VariableWidthVector; import org.apache.drill.exec.vector.ZeroVector; +import org.apache.drill.exec.vector.complex.AbstractRepeatedMapVector; import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector; -import org.apache.drill.exec.vector.complex.DictVector; import org.apache.drill.exec.vector.complex.MapVector; import org.apache.drill.exec.vector.complex.RepeatedListVector; -import org.apache.drill.exec.vector.complex.RepeatedMapVector; import org.apache.drill.exec.vector.complex.UnionVector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -293,14 +292,16 @@ public class BatchValidator { // structure to check. } else if (vector instanceof BaseRepeatedValueVector) { validateRepeatedVector(name, (BaseRepeatedValueVector) vector); - } else if (vector instanceof RepeatedMapVector) { - validateRepeatedMapVector(name, (RepeatedMapVector) vector); + } else if (vector instanceof AbstractRepeatedMapVector) { // RepeatedMapVector or DictVector + // In case of DictVector, keys and values vectors are not validated explicitly to avoid NPE + // when keys and values vectors are not set. This happens when output dict vector's keys and + // values are not constructed while copying values from input reader to dict writer and the + // input reader is an instance of NullReader for all rows which does not have schema. + validateRepeatedMapVector(name, (AbstractRepeatedMapVector) vector); } else if (vector instanceof MapVector) { validateMapVector(name, (MapVector) vector); } else if (vector instanceof RepeatedListVector) { validateRepeatedListVector(name, (RepeatedListVector) vector); - } else if (vector instanceof DictVector) { - validateDictVector(name, (DictVector) vector); } else if (vector instanceof UnionVector) { validateUnionVector(name, (UnionVector) vector); } else if (vector instanceof VarDecimalVector) { @@ -397,8 +398,7 @@ public class BatchValidator { } } - private void validateRepeatedMapVector(String name, - RepeatedMapVector vector) { + private void validateRepeatedMapVector(String name, AbstractRepeatedMapVector vector) { int valueCount = vector.getAccessor().getValueCount(); int elementCount = validateOffsetVector(name + "-offsets", vector.getOffsetVector(), valueCount, Integer.MAX_VALUE); @@ -407,14 +407,6 @@ public class BatchValidator { } } - private void validateDictVector(String name, DictVector vector) { - int valueCount = vector.getAccessor().getValueCount(); - int elementCount = validateOffsetVector(name + "-offsets", - vector.getOffsetVector(), valueCount, Integer.MAX_VALUE); - validateVector(elementCount, vector.getKeys()); - validateVector(elementCount, vector.getValues()); - } - private void validateRepeatedListVector(String name, RepeatedListVector vector) { int valueCount = vector.getAccessor().getValueCount(); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/index/FunctionalIndexHelper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/index/FunctionalIndexHelper.java index 3ff81b4..41de5dd 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/index/FunctionalIndexHelper.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/index/FunctionalIndexHelper.java @@ -163,8 +163,7 @@ public class FunctionalIndexHelper { SchemaPath path = SchemaPath.parseFromString(f.getName()); rowfields.add(new RelDataTypeFieldImpl( path.getRootSegmentPath(), rowfields.size(), - typeFactory.createMapType(typeFactory.createSqlType(SqlTypeName.VARCHAR), - typeFactory.createSqlType(SqlTypeName.ANY)) + typeFactory.createSqlType(SqlTypeName.ANY) )); columns.add(path); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/FieldIdUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/FieldIdUtil.java index 09e2cfc..0b6e0ee 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/FieldIdUtil.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/FieldIdUtil.java @@ -85,6 +85,7 @@ public class FieldIdUtil { } // skip the first array segment as there is no corresponding child vector. seg = seg.getChild(); + depth++; // multi-level numbered access to a repeated map is not possible so return if the next part is also an array // segment. diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java index 075f644..f5b9cd7 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java @@ -835,4 +835,19 @@ public class TestParquetComplex extends BaseTestQuery { .baselineValues(2, TestBuilder.mapOfObject("a", 1, "b", 2, "c", 3)) .go(); } + + @Test // DRILL-7473 + public void testDictInRepeatedMap() throws Exception { + String query = "select struct_array[1].d as d from cp.`store/parquet/complex/map/parquet/repeated_struct_with_dict.parquet`"; + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("d") + .baselineValuesForSingleColumn( + TestBuilder.mapOfObject(1, "a", 2, "b", 3, "c"), + TestBuilder.mapOfObject(), + TestBuilder.mapOfObject(1, "a", 2, "b") + ) + .go(); + } } diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/map/parquet/repeated_struct_with_dict.parquet b/exec/java-exec/src/test/resources/store/parquet/complex/map/parquet/repeated_struct_with_dict.parquet new file mode 100644 index 0000000..8e1a8b6 Binary files /dev/null and b/exec/java-exec/src/test/resources/store/parquet/complex/map/parquet/repeated_struct_with_dict.parquet differ
