Repository: drill Updated Branches: refs/heads/master 4a718a0bd -> 7a2fc87ee
DRILL-5864: Selecting a non-existing field from a MapR-DB JSON table fails with NPE. Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/125a9271 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/125a9271 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/125a9271 Branch: refs/heads/master Commit: 125a9271d7cf0dfb30aac8e62447507ea0a7d6c9 Parents: 4a718a0 Author: Hanumath Rao Maduri <hmad...@maprtech.com> Authored: Wed Oct 11 17:07:22 2017 -0700 Committer: Aman Sinha <asi...@maprtech.com> Committed: Sun Nov 5 08:20:41 2017 -0800 ---------------------------------------------------------------------- contrib/format-maprdb/README.md | 6 ++ .../store/mapr/db/MapRDBFormatPluginConfig.java | 11 +++ .../mapr/db/json/MaprDBJsonRecordReader.java | 7 ++ .../drill/maprdb/tests/json/TestSimpleJson.java | 10 +++ .../exec/vector/complex/fn/JsonReader.java | 66 +------------- .../exec/vector/complex/fn/JsonReaderUtils.java | 94 ++++++++++++++++++++ 6 files changed, 129 insertions(+), 65 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/contrib/format-maprdb/README.md ---------------------------------------------------------------------- diff --git a/contrib/format-maprdb/README.md b/contrib/format-maprdb/README.md index ff19285..a94a7cb 100644 --- a/contrib/format-maprdb/README.md +++ b/contrib/format-maprdb/README.md @@ -1,2 +1,8 @@ drill-mapr-plugin ================= +By default all the tests in contrib/format-maprdb are disabled. +To enable and run these tests please use -Pmapr profile to +compile and execute the tests. + +Here is an example of the mvn command to use to run these tests. +mvn install -Dtests=cluster -Pmapr http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java ---------------------------------------------------------------------- diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java index 50a67b4..ad153fe 100644 --- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java +++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java @@ -32,6 +32,12 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig { public boolean ignoreSchemaChange = false; public boolean readAllNumbersAsDouble = false; public boolean disableCountOptimization = false; + /* This flag is a switch to do special handling in case of + * no columns in the query exists in the maprdb table. This flag + * can get deprecated once it is observed that this special handling + * is not regressing performance of reading maprdb table. + */ + public boolean nonExistentFieldSupport = true; @Override public int hashCode() { @@ -40,6 +46,7 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig { result = 31 * result + (ignoreSchemaChange ? 1231 : 1237); result = 31 * result + (readAllNumbersAsDouble ? 1231 : 1237); result = 31 * result + (disableCountOptimization ? 1231 : 1237); + result = 31 * result + (nonExistentFieldSupport ? 1231 : 1237); return result; } @@ -56,6 +63,8 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig { return false; } else if (disableCountOptimization != other.disableCountOptimization) { return false; + } else if (nonExistentFieldSupport != other.nonExistentFieldSupport) { + return false; } return true; } @@ -76,6 +85,8 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig { return enablePushdown; } + public boolean isNonExistentFieldSupport() { return nonExistentFieldSupport; } + public boolean isIgnoreSchemaChange() { return ignoreSchemaChange; } http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java ---------------------------------------------------------------------- diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java index ca31767..113b3ad 100644 --- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java +++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java @@ -26,6 +26,7 @@ import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.Stack; +import java.util.Collections; import java.util.concurrent.TimeUnit; import org.apache.drill.common.exceptions.ExecutionSetupException; @@ -44,6 +45,7 @@ import org.apache.drill.exec.store.mapr.db.MapRDBSubScanSpec; import org.apache.drill.exec.util.Utilities; import org.apache.drill.exec.vector.BaseValueVector; import org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl; +import org.apache.drill.exec.vector.complex.fn.JsonReaderUtils; import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter; import org.ojai.DocumentReader; import org.ojai.DocumentReader.EventType; @@ -95,6 +97,7 @@ public class MaprDBJsonRecordReader extends AbstractRecordReader { private final boolean allTextMode; private final boolean ignoreSchemaChange; private final boolean disableCountOptimization; + private final boolean nonExistentColumnsProjection; public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec, MapRDBFormatPluginConfig formatPluginConfig, @@ -119,6 +122,7 @@ public class MaprDBJsonRecordReader extends AbstractRecordReader { allTextMode = formatPluginConfig.isAllTextMode(); ignoreSchemaChange = formatPluginConfig.isIgnoreSchemaChange(); disablePushdown = !formatPluginConfig.isEnablePushdown(); + nonExistentColumnsProjection = formatPluginConfig.isNonExistentFieldSupport(); } @Override @@ -230,6 +234,9 @@ public class MaprDBJsonRecordReader extends AbstractRecordReader { } } + if (nonExistentColumnsProjection && recordCount > 0) { + JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), allTextMode, Collections.EMPTY_LIST); + } vectorWriter.setValueCount(recordCount); logger.debug("Took {} ms to get {} records", watch.elapsed(TimeUnit.MILLISECONDS), recordCount); return recordCount; http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java ---------------------------------------------------------------------- diff --git a/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java b/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java index 998aae6..26f54b8 100644 --- a/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java +++ b/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java @@ -58,6 +58,16 @@ public class TestSimpleJson extends BaseJsonTest { } @Test + public void testSelectNonExistentColumns() throws Exception { + setColumnWidths(new int[] {23}); + final String sql = "SELECT\n" + + " something\n" + + "FROM\n" + + " hbase.business business limit 5"; + runSQLAndVerifyCount(sql, 5); + } + + @Test public void testKVGen() throws Exception { setColumnWidths(new int[] {21, 10, 6}); final String sql = "select _id, t.parking[0].`key` K, t.parking[0].`value` V from" http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java index cfad551..4ffbb26 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java @@ -104,71 +104,7 @@ public class JsonReader extends BaseJsonProcessor { @SuppressWarnings("resource") @Override public void ensureAtLeastOneField(ComplexWriter writer) { - List<BaseWriter.MapWriter> writerList = Lists.newArrayList(); - List<PathSegment> fieldPathList = Lists.newArrayList(); - BitSet emptyStatus = new BitSet(columns.size()); - - // first pass: collect which fields are empty - for (int i = 0; i < columns.size(); i++) { - SchemaPath sp = columns.get(i); - PathSegment fieldPath = sp.getRootSegment(); - BaseWriter.MapWriter fieldWriter = writer.rootAsMap(); - while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) { - fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath()); - fieldPath = fieldPath.getChild(); - } - writerList.add(fieldWriter); - fieldPathList.add(fieldPath); - if (fieldWriter.isEmptyMap()) { - emptyStatus.set(i, true); - } - if (i == 0 && !allTextMode) { - // when allTextMode is false, there is not much benefit to producing all - // the empty - // fields; just produce 1 field. The reason is that the type of the - // fields is - // unknown, so if we produce multiple Integer fields by default, a - // subsequent batch - // that contains non-integer fields will error out in any case. Whereas, - // with - // allTextMode true, we are sure that all fields are going to be treated - // as varchar, - // so it makes sense to produce all the fields, and in fact is necessary - // in order to - // avoid schema change exceptions by downstream operators. - break; - } - - } - - // second pass: create default typed vectors corresponding to empty fields - // Note: this is not easily do-able in 1 pass because the same fieldWriter - // may be - // shared by multiple fields whereas we want to keep track of all fields - // independently, - // so we rely on the emptyStatus. - for (int j = 0; j < fieldPathList.size(); j++) { - BaseWriter.MapWriter fieldWriter = writerList.get(j); - PathSegment fieldPath = fieldPathList.get(j); - if (emptyStatus.get(j)) { - if (allTextMode) { - fieldWriter.varChar(fieldPath.getNameSegment().getPath()); - } else { - fieldWriter.integer(fieldPath.getNameSegment().getPath()); - } - } - } - - for (ListWriter field : emptyArrayWriters) { - // checks that array has not been initialized - if (field.getValueCapacity() == 0) { - if (allTextMode) { - field.varChar(); - } else { - field.integer(); - } - } - } + JsonReaderUtils.ensureAtLeastOneField(writer, columns, allTextMode, emptyArrayWriters); } public void setSource(int start, int end, DrillBuf buf) throws IOException { http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java new file mode 100644 index 0000000..775be02 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.vector.complex.fn; + +import com.google.common.collect.Lists; +import org.apache.drill.common.expression.PathSegment; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.exec.vector.complex.writer.BaseWriter; + +import java.util.BitSet; +import java.util.Collection; +import java.util.List; + +public class JsonReaderUtils { + + public static void ensureAtLeastOneField(BaseWriter.ComplexWriter writer, + Collection<SchemaPath> columns, + boolean allTextMode, + List<BaseWriter.ListWriter> emptyArrayWriters) { + + List<BaseWriter.MapWriter> writerList = Lists.newArrayList(); + List<PathSegment> fieldPathList = Lists.newArrayList(); + BitSet emptyStatus = new BitSet(columns.size()); + int i = 0; + + // first pass: collect which fields are empty + for (SchemaPath sp : columns) { + PathSegment fieldPath = sp.getRootSegment(); + BaseWriter.MapWriter fieldWriter = writer.rootAsMap(); + while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) { + fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath()); + fieldPath = fieldPath.getChild(); + } + writerList.add(fieldWriter); + fieldPathList.add(fieldPath); + if (fieldWriter.isEmptyMap()) { + emptyStatus.set(i, true); + } + if (i == 0 && !allTextMode) { + // when allTextMode is false, there is not much benefit to producing all + // the empty fields; just produce 1 field. The reason is that the type of the + // fields is unknown, so if we produce multiple Integer fields by default, a + // subsequent batch that contains non-integer fields will error out in any case. + // Whereas, with allTextMode true, we are sure that all fields are going to be + // treated as varchar, so it makes sense to produce all the fields, and in fact + // is necessary in order to avoid schema change exceptions by downstream operators. + break; + } + i++; + } + + // second pass: create default typed vectors corresponding to empty fields + // Note: this is not easily do-able in 1 pass because the same fieldWriter + // may be shared by multiple fields whereas we want to keep track of all fields + // independently, so we rely on the emptyStatus. + for (int j = 0; j < fieldPathList.size(); j++) { + BaseWriter.MapWriter fieldWriter = writerList.get(j); + PathSegment fieldPath = fieldPathList.get(j); + if (emptyStatus.get(j)) { + if (allTextMode) { + fieldWriter.varChar(fieldPath.getNameSegment().getPath()); + } else { + fieldWriter.integer(fieldPath.getNameSegment().getPath()); + } + } + } + + for (BaseWriter.ListWriter field : emptyArrayWriters) { + // checks that array has not been initialized + if (field.getValueCapacity() == 0) { + if (allTextMode) { + field.varChar(); + } else { + field.integer(); + } + } + } + } +} \ No newline at end of file