[1/2] drill git commit: DRILL-5864: Selecting a non-existing field from a MapR-DB JSON table fails with NPE.

amansinha Sun, 05 Nov 2017 11:45:37 -0800

Repository: drill
Updated Branches:
  refs/heads/master 4a718a0bd -> 7a2fc87ee



DRILL-5864: Selecting a non-existing field from a MapR-DB JSON table fails with 
NPE.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/125a9271
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/125a9271
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/125a9271

Branch: refs/heads/master
Commit: 125a9271d7cf0dfb30aac8e62447507ea0a7d6c9
Parents: 4a718a0
Author: Hanumath Rao Maduri <hmad...@maprtech.com>
Authored: Wed Oct 11 17:07:22 2017 -0700
Committer: Aman Sinha <asi...@maprtech.com>
Committed: Sun Nov 5 08:20:41 2017 -0800

----------------------------------------------------------------------
 contrib/format-maprdb/README.md                 |  6 ++
 .../store/mapr/db/MapRDBFormatPluginConfig.java | 11 +++
 .../mapr/db/json/MaprDBJsonRecordReader.java    |  7 ++
 .../drill/maprdb/tests/json/TestSimpleJson.java | 10 +++
 .../exec/vector/complex/fn/JsonReader.java      | 66 +-------------
 .../exec/vector/complex/fn/JsonReaderUtils.java | 94 ++++++++++++++++++++
 6 files changed, 129 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/contrib/format-maprdb/README.md
----------------------------------------------------------------------
diff --git a/contrib/format-maprdb/README.md b/contrib/format-maprdb/README.md
index ff19285..a94a7cb 100644
--- a/contrib/format-maprdb/README.md
+++ b/contrib/format-maprdb/README.md
@@ -1,2 +1,8 @@
 drill-mapr-plugin
 =================
+By default all the tests in contrib/format-maprdb are disabled.
+To enable and run these tests please use -Pmapr profile to
+compile and execute the tests.
+
+Here is an example of the mvn command to use to run these tests.
+mvn install -Dtests=cluster -Pmapr

http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
----------------------------------------------------------------------
diff --git 
a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
 
b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
index 50a67b4..ad153fe 100644
--- 
a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
+++ 
b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
@@ -32,6 +32,12 @@ public class MapRDBFormatPluginConfig extends 
TableFormatPluginConfig {
   public boolean ignoreSchemaChange = false;
   public boolean readAllNumbersAsDouble = false;
   public boolean disableCountOptimization = false;
+  /* This flag is a switch to do special handling in case of
+   * no columns in the query exists in the maprdb table. This flag
+   * can get deprecated once it is observed that this special handling
+   * is not regressing performance of reading maprdb table.
+   */
+  public boolean nonExistentFieldSupport = true;
 
   @Override
   public int hashCode() {
@@ -40,6 +46,7 @@ public class MapRDBFormatPluginConfig extends 
TableFormatPluginConfig {
     result = 31 * result + (ignoreSchemaChange ? 1231 : 1237);
     result = 31 * result + (readAllNumbersAsDouble ? 1231 : 1237);
     result = 31 * result + (disableCountOptimization ? 1231 : 1237);
+    result = 31 * result + (nonExistentFieldSupport ? 1231 : 1237);
     return result;
   }
 
@@ -56,6 +63,8 @@ public class MapRDBFormatPluginConfig extends 
TableFormatPluginConfig {
       return false;
     } else if (disableCountOptimization != other.disableCountOptimization) {
       return false;
+    } else if (nonExistentFieldSupport != other.nonExistentFieldSupport) {
+      return false;
     }
     return true;
   }
@@ -76,6 +85,8 @@ public class MapRDBFormatPluginConfig extends 
TableFormatPluginConfig {
     return enablePushdown;
   }
 
+  public boolean isNonExistentFieldSupport() { return nonExistentFieldSupport; 
}
+
   public boolean isIgnoreSchemaChange() {
     return ignoreSchemaChange;
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
----------------------------------------------------------------------
diff --git 
a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
 
b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
index ca31767..113b3ad 100644
--- 
a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
+++ 
b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
@@ -26,6 +26,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.Stack;
+import java.util.Collections;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.drill.common.exceptions.ExecutionSetupException;
@@ -44,6 +45,7 @@ import org.apache.drill.exec.store.mapr.db.MapRDBSubScanSpec;
 import org.apache.drill.exec.util.Utilities;
 import org.apache.drill.exec.vector.BaseValueVector;
 import org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl;
+import org.apache.drill.exec.vector.complex.fn.JsonReaderUtils;
 import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
 import org.ojai.DocumentReader;
 import org.ojai.DocumentReader.EventType;
@@ -95,6 +97,7 @@ public class MaprDBJsonRecordReader extends 
AbstractRecordReader {
   private final boolean allTextMode;
   private final boolean ignoreSchemaChange;
   private final boolean disableCountOptimization;
+  private final boolean nonExistentColumnsProjection;
 
   public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec,
       MapRDBFormatPluginConfig formatPluginConfig,
@@ -119,6 +122,7 @@ public class MaprDBJsonRecordReader extends 
AbstractRecordReader {
     allTextMode = formatPluginConfig.isAllTextMode();
     ignoreSchemaChange = formatPluginConfig.isIgnoreSchemaChange();
     disablePushdown = !formatPluginConfig.isEnablePushdown();
+    nonExistentColumnsProjection = 
formatPluginConfig.isNonExistentFieldSupport();
   }
 
   @Override
@@ -230,6 +234,9 @@ public class MaprDBJsonRecordReader extends 
AbstractRecordReader {
       }
     }
 
+    if (nonExistentColumnsProjection && recordCount > 0) {
+      JsonReaderUtils.ensureAtLeastOneField(vectorWriter, getColumns(), 
allTextMode, Collections.EMPTY_LIST);
+    }
     vectorWriter.setValueCount(recordCount);
     logger.debug("Took {} ms to get {} records", 
watch.elapsed(TimeUnit.MILLISECONDS), recordCount);
     return recordCount;

http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java
----------------------------------------------------------------------
diff --git 
a/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java
 
b/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java
index 998aae6..26f54b8 100644
--- 
a/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java
+++ 
b/contrib/format-maprdb/src/test/java/com/mapr/drill/maprdb/tests/json/TestSimpleJson.java
@@ -58,6 +58,16 @@ public class TestSimpleJson extends BaseJsonTest {
   }
 
   @Test
+  public void testSelectNonExistentColumns() throws Exception {
+    setColumnWidths(new int[] {23});
+    final String sql = "SELECT\n"
+            + "  something\n"
+            + "FROM\n"
+            + "  hbase.business business limit 5";
+    runSQLAndVerifyCount(sql, 5);
+  }
+
+  @Test
   public void testKVGen() throws Exception {
     setColumnWidths(new int[] {21, 10, 6});
     final String sql = "select _id, t.parking[0].`key` K, t.parking[0].`value` 
V from"

http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
index cfad551..4ffbb26 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
@@ -104,71 +104,7 @@ public class JsonReader extends BaseJsonProcessor {
   @SuppressWarnings("resource")
   @Override
   public void ensureAtLeastOneField(ComplexWriter writer) {
-    List<BaseWriter.MapWriter> writerList = Lists.newArrayList();
-    List<PathSegment> fieldPathList = Lists.newArrayList();
-    BitSet emptyStatus = new BitSet(columns.size());
-
-    // first pass: collect which fields are empty
-    for (int i = 0; i < columns.size(); i++) {
-      SchemaPath sp = columns.get(i);
-      PathSegment fieldPath = sp.getRootSegment();
-      BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
-      while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
-        fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
-        fieldPath = fieldPath.getChild();
-      }
-      writerList.add(fieldWriter);
-      fieldPathList.add(fieldPath);
-      if (fieldWriter.isEmptyMap()) {
-        emptyStatus.set(i, true);
-      }
-      if (i == 0 && !allTextMode) {
-        // when allTextMode is false, there is not much benefit to producing 
all
-        // the empty
-        // fields; just produce 1 field. The reason is that the type of the
-        // fields is
-        // unknown, so if we produce multiple Integer fields by default, a
-        // subsequent batch
-        // that contains non-integer fields will error out in any case. 
Whereas,
-        // with
-        // allTextMode true, we are sure that all fields are going to be 
treated
-        // as varchar,
-        // so it makes sense to produce all the fields, and in fact is 
necessary
-        // in order to
-        // avoid schema change exceptions by downstream operators.
-        break;
-      }
-
-    }
-
-    // second pass: create default typed vectors corresponding to empty fields
-    // Note: this is not easily do-able in 1 pass because the same fieldWriter
-    // may be
-    // shared by multiple fields whereas we want to keep track of all fields
-    // independently,
-    // so we rely on the emptyStatus.
-    for (int j = 0; j < fieldPathList.size(); j++) {
-      BaseWriter.MapWriter fieldWriter = writerList.get(j);
-      PathSegment fieldPath = fieldPathList.get(j);
-      if (emptyStatus.get(j)) {
-        if (allTextMode) {
-          fieldWriter.varChar(fieldPath.getNameSegment().getPath());
-        } else {
-          fieldWriter.integer(fieldPath.getNameSegment().getPath());
-        }
-      }
-    }
-
-    for (ListWriter field : emptyArrayWriters) {
-      // checks that array has not been initialized
-      if (field.getValueCapacity() == 0) {
-        if (allTextMode) {
-          field.varChar();
-        } else {
-          field.integer();
-        }
-      }
-    }
+    JsonReaderUtils.ensureAtLeastOneField(writer, columns, allTextMode, 
emptyArrayWriters);
   }
 
   public void setSource(int start, int end, DrillBuf buf) throws IOException {

http://git-wip-us.apache.org/repos/asf/drill/blob/125a9271/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java
new file mode 100644
index 0000000..775be02
--- /dev/null
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.complex.fn;
+
+import com.google.common.collect.Lists;
+import org.apache.drill.common.expression.PathSegment;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.List;
+
+public class JsonReaderUtils {
+
+  public static void ensureAtLeastOneField(BaseWriter.ComplexWriter writer,
+                                    Collection<SchemaPath> columns,
+                                    boolean allTextMode,
+                                    List<BaseWriter.ListWriter> 
emptyArrayWriters) {
+
+    List<BaseWriter.MapWriter> writerList = Lists.newArrayList();
+    List<PathSegment> fieldPathList = Lists.newArrayList();
+    BitSet emptyStatus = new BitSet(columns.size());
+    int i = 0;
+
+    // first pass: collect which fields are empty
+    for (SchemaPath sp : columns) {
+      PathSegment fieldPath = sp.getRootSegment();
+      BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
+      while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
+        fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
+        fieldPath = fieldPath.getChild();
+      }
+      writerList.add(fieldWriter);
+      fieldPathList.add(fieldPath);
+      if (fieldWriter.isEmptyMap()) {
+        emptyStatus.set(i, true);
+      }
+      if (i == 0 && !allTextMode) {
+        // when allTextMode is false, there is not much benefit to producing 
all
+        // the empty fields; just produce 1 field. The reason is that the type 
of the
+        // fields is unknown, so if we produce multiple Integer fields by 
default, a
+        // subsequent batch that contains non-integer fields will error out in 
any case.
+        // Whereas, with allTextMode true, we are sure that all fields are 
going to be
+        // treated as varchar, so it makes sense to produce all the fields, 
and in fact
+        // is necessary in order to avoid schema change exceptions by 
downstream operators.
+        break;
+      }
+      i++;
+    }
+
+    // second pass: create default typed vectors corresponding to empty fields
+    // Note: this is not easily do-able in 1 pass because the same fieldWriter
+    // may be shared by multiple fields whereas we want to keep track of all 
fields
+    // independently, so we rely on the emptyStatus.
+    for (int j = 0; j < fieldPathList.size(); j++) {
+      BaseWriter.MapWriter fieldWriter = writerList.get(j);
+      PathSegment fieldPath = fieldPathList.get(j);
+      if (emptyStatus.get(j)) {
+        if (allTextMode) {
+          fieldWriter.varChar(fieldPath.getNameSegment().getPath());
+        } else {
+          fieldWriter.integer(fieldPath.getNameSegment().getPath());
+        }
+      }
+    }
+
+    for (BaseWriter.ListWriter field : emptyArrayWriters) {
+      // checks that array has not been initialized
+      if (field.getValueCapacity() == 0) {
+        if (allTextMode) {
+          field.varChar();
+        } else {
+          field.integer();
+        }
+      }
+    }
+  }
+}
\ No newline at end of file

[1/2] drill git commit: DRILL-5864: Selecting a non-existing field from a MapR-DB JSON table fails with NPE.

Reply via email to