[ 
https://issues.apache.org/jira/browse/DRILL-3353?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16498293#comment-16498293
 ] 

ASF GitHub Bot commented on DRILL-3353:
---------------------------------------

ilooner closed pull request #86: DRILL-3353: Fix dropping nested fields
URL: https://github.com/apache/drill/pull/86
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/exec/java-exec/src/main/codegen/templates/TypeHelper.java 
b/exec/java-exec/src/main/codegen/templates/TypeHelper.java
index d6ccd3a9bf..9c66cb718d 100644
--- a/exec/java-exec/src/main/codegen/templates/TypeHelper.java
+++ b/exec/java-exec/src/main/codegen/templates/TypeHelper.java
@@ -91,10 +91,10 @@ public static SqlAccessor getSqlAccessor(ValueVector 
vector){
     throw new UnsupportedOperationException(buildErrorMessage("find sql 
accessor", type));
   }
   
-  public static ValueVector getNewVector(SchemaPath parentPath, String name, 
BufferAllocator allocator, MajorType type){
+  public static ValueVector getNewVector(SchemaPath parentPath, String name, 
BufferAllocator allocator, MajorType type, CallBack callback){
     SchemaPath child = parentPath.getChild(name);
     MaterializedField field = MaterializedField.create(child, type);
-    return getNewVector(field, allocator);
+    return getNewVector(field, allocator, callback);
   }
   
   
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/OutputMutator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/OutputMutator.java
index 0fe79d90c8..e109ec07fa 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/OutputMutator.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/OutputMutator.java
@@ -21,6 +21,7 @@
 
 import org.apache.drill.exec.exception.SchemaChangeException;
 import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.util.CallBack;
 import org.apache.drill.exec.vector.ValueVector;
 
 /**
@@ -61,4 +62,10 @@
    * @return A DrillBuf that will be released at the end of the current query 
(and can be resized as desired during use).
    */
   public DrillBuf getManagedBuffer();
+
+  /**
+   *
+   * @return the CallBack object for this mutator
+   */
+  public CallBack getCallBack();
 }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
index 6bf1280ae0..fa454b7826 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
@@ -51,6 +51,7 @@
 import org.apache.drill.exec.store.RecordReader;
 import org.apache.drill.exec.testing.ControlsInjector;
 import org.apache.drill.exec.testing.ControlsInjectorFactory;
+import org.apache.drill.exec.util.CallBack;
 import org.apache.drill.exec.vector.AllocationHelper;
 import org.apache.drill.exec.vector.NullableVarCharVector;
 import org.apache.drill.exec.vector.SchemaChangeCallBack;
@@ -210,6 +211,11 @@ public IterOutcome next() {
 
       populatePartitionVectors();
 
+      for (VectorWrapper w : container) {
+        w.getValueVector().getMutator().setValueCount(recordCount);
+      }
+
+
       // this is a slight misuse of this metric but it will allow Readers to 
report how many records they generated.
       final boolean isNewSchema = mutator.isNewSchema();
       oContext.getStats().batchReceived(0, getRecordCount(), isNewSchema);
@@ -344,6 +350,11 @@ public boolean isNewSchema() {
     public DrillBuf getManagedBuffer() {
       return oContext.getManagedBuffer();
     }
+
+    @Override
+    public CallBack getCallBack() {
+      return callBack;
+    }
   }
 
   @Override
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/TopN/TopNBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/TopN/TopNBatch.java
index 516b0282fb..10f1d7fbf7 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/TopN/TopNBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/TopN/TopNBatch.java
@@ -276,7 +276,7 @@ private void purge() throws SchemaChangeException {
     SimpleRecordBatch batch = new SimpleRecordBatch(c, selectionVector4, 
context);
     SimpleRecordBatch newBatch = new SimpleRecordBatch(newContainer, null, 
context);
     if (copier == null) {
-      copier = RemovingRecordBatch.getGenerated4Copier(batch, context, 
oContext.getAllocator(),  newContainer, newBatch);
+      copier = RemovingRecordBatch.getGenerated4Copier(batch, context, 
oContext.getAllocator(),  newContainer, newBatch, null);
     } else {
       for (VectorWrapper<?> i : batch) {
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/filter/FilterRecordBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/filter/FilterRecordBatch.java
index 5eee9dfe5f..c1d78c3746 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/filter/FilterRecordBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/filter/FilterRecordBatch.java
@@ -193,7 +193,7 @@ protected Filterer generateSV2Filterer() throws 
SchemaChangeException {
     cg.addExpr(new ReturnValueExpression(expr));
 
     for (VectorWrapper<?> v : incoming) {
-      TransferPair pair = 
v.getValueVector().makeTransferPair(container.addOrGet(v.getField()));
+      TransferPair pair = 
v.getValueVector().makeTransferPair(container.addOrGet(v.getField(), callBack));
       transfers.add(pair);
     }
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/limit/LimitRecordBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/limit/LimitRecordBatch.java
index d9330ea753..4ea5a5cc6d 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/limit/LimitRecordBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/limit/LimitRecordBatch.java
@@ -63,7 +63,7 @@ protected boolean setupNewSchema() throws 
SchemaChangeException {
 
 
     for(VectorWrapper<?> v : incoming){
-      TransferPair pair = 
v.getValueVector().makeTransferPair(container.addOrGet(v.getField()));
+      TransferPair pair = 
v.getValueVector().makeTransferPair(container.addOrGet(v.getField(), callBack));
       transfers.add(pair);
     }
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/project/ProjectRecordBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/project/ProjectRecordBatch.java
index b6e5dc0ba1..5b5c90d47f 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/project/ProjectRecordBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/project/ProjectRecordBatch.java
@@ -325,7 +325,7 @@ protected boolean setupNewSchema() throws 
SchemaChangeException {
                 continue;
               }
               final FieldReference ref = new FieldReference(name);
-              final ValueVector vvOut = 
container.addOrGet(MaterializedField.create(ref, vvIn.getField().getType()));
+              final ValueVector vvOut = 
container.addOrGet(MaterializedField.create(ref, vvIn.getField().getType()), 
callBack);
               final TransferPair tp = vvIn.makeTransferPair(vvOut);
               transfers.add(tp);
             }
@@ -399,7 +399,7 @@ protected boolean setupNewSchema() throws 
SchemaChangeException {
         Preconditions.checkNotNull(incoming);
 
         final FieldReference ref = getRef(namedExpression);
-        final ValueVector vvOut = 
container.addOrGet(MaterializedField.create(ref, vectorRead.getMajorType()));
+        final ValueVector vvOut = 
container.addOrGet(MaterializedField.create(ref, vectorRead.getMajorType()), 
callBack);
         final TransferPair tp = vvIn.makeTransferPair(vvOut);
         transfers.add(tp);
         transferFieldIds.add(vectorRead.getFieldId().getFieldIds()[0]);
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/RemovingRecordBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/RemovingRecordBatch.java
index 57e7b55d80..b5b1b0afe2 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/RemovingRecordBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/svremover/RemovingRecordBatch.java
@@ -34,7 +34,9 @@
 import org.apache.drill.exec.record.VectorContainer;
 import org.apache.drill.exec.record.VectorWrapper;
 import org.apache.drill.exec.record.WritableBatch;
+import org.apache.drill.exec.util.CallBack;
 import org.apache.drill.exec.vector.CopyUtil;
+import org.apache.drill.exec.vector.SchemaChangeCallBack;
 import org.apache.drill.exec.vector.ValueVector;
 
 import com.google.common.base.Preconditions;
@@ -194,7 +196,7 @@ public void close(){
     @Override
     public void setupRemover(FragmentContext context, RecordBatch incoming, 
RecordBatch outgoing){
       for(VectorWrapper<?> vv : incoming){
-        TransferPair tp = 
vv.getValueVector().makeTransferPair(container.addOrGet(vv.getField()));
+        TransferPair tp = 
vv.getValueVector().makeTransferPair(container.addOrGet(vv.getField(), 
callBack));
         pairs.add(tp);
       }
     }
@@ -220,7 +222,7 @@ private Copier getGenerated2Copier() throws 
SchemaChangeException{
     Preconditions.checkArgument(incoming.getSchema().getSelectionVectorMode() 
== SelectionVectorMode.TWO_BYTE);
 
     for(VectorWrapper<?> vv : incoming){
-      TransferPair tp = 
vv.getValueVector().makeTransferPair(container.addOrGet(vv.getField()));
+      TransferPair tp = 
vv.getValueVector().makeTransferPair(container.addOrGet(vv.getField(), 
callBack));
     }
 
     try {
@@ -237,14 +239,14 @@ private Copier getGenerated2Copier() throws 
SchemaChangeException{
 
   private Copier getGenerated4Copier() throws SchemaChangeException {
     Preconditions.checkArgument(incoming.getSchema().getSelectionVectorMode() 
== SelectionVectorMode.FOUR_BYTE);
-    return getGenerated4Copier(incoming, context, oContext.getAllocator(), 
container, this);
+    return getGenerated4Copier(incoming, context, oContext.getAllocator(), 
container, this, callBack);
   }
 
-  public static Copier getGenerated4Copier(RecordBatch batch, FragmentContext 
context, BufferAllocator allocator, VectorContainer container, RecordBatch 
outgoing) throws SchemaChangeException{
+  public static Copier getGenerated4Copier(RecordBatch batch, FragmentContext 
context, BufferAllocator allocator, VectorContainer container, RecordBatch 
outgoing, SchemaChangeCallBack callBack) throws SchemaChangeException{
 
     for(VectorWrapper<?> vv : batch){
       ValueVector v = vv.getValueVectors()[0];
-      v.makeTransferPair(container.addOrGet(v.getField()));
+      v.makeTransferPair(container.addOrGet(v.getField(), callBack));
     }
 
     try {
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
index 12b15a9faf..f118535bcf 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
@@ -143,9 +143,22 @@ public void init(Map<String, String> writerOptions) throws 
IOException {
     enableDictionary = 
Boolean.parseBoolean(writerOptions.get(ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING));
   }
 
+  private boolean containsComplexVectors(BatchSchema schema) {
+    for (MaterializedField field : schema) {
+      MinorType type = field.getType().getMinorType();
+      switch (type) {
+      case MAP:
+      case LIST:
+        return true;
+      default:
+      }
+    }
+    return false;
+  }
+
   @Override
   public void updateSchema(VectorAccessible batch) throws IOException {
-    if (this.batchSchema == null || 
!this.batchSchema.equals(batch.getSchema())) {
+    if (this.batchSchema == null || 
!this.batchSchema.equals(batch.getSchema()) || 
containsComplexVectors(this.batchSchema)) {
       if (this.batchSchema != null) {
         flush();
       }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
index 1df4b81e64..efba46dcb1 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
@@ -41,9 +41,10 @@
   private final MapWithOrdinal<String, ValueVector> vectors =  new 
MapWithOrdinal<>();
 
   protected AbstractMapVector(MaterializedField field, BufferAllocator 
allocator, CallBack callBack) {
-    super(field, allocator, callBack);
+    super(field.clone(), allocator, callBack);
+    MaterializedField clonedField = field.clone();
     // create the hierarchy of the child vectors based on the materialized 
field
-    for (MaterializedField child : field.getChildren()) {
+    for (MaterializedField child : clonedField.getChildren()) {
       if (!child.equals(BaseRepeatedValueVector.OFFSETS_FIELD)) {
         String fieldName = child.getLastName();
         ValueVector v = TypeHelper.getNewVector(child, allocator, callBack);
@@ -116,7 +117,7 @@ public boolean allocateNewSafe() {
       create = true;
     }
     if (create) {
-      final T vector = (T) TypeHelper.getNewVector(field.getPath(), name, 
allocator, type);
+      final T vector = (T) TypeHelper.getNewVector(field.getPath(), name, 
allocator, type, callBack);
       putChild(name, vector);
       if (callBack!=null) {
         callBack.doWork();
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
index 3032aacadb..1e30ea2117 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
@@ -161,6 +161,8 @@ protected MapTransferPair(MapVector from, MapVector to, 
boolean allocate) {
       this.from = from;
       this.to = to;
       this.pairs = new TransferPair[from.size()];
+      this.to.ephPair = null;
+      this.to.ephPair2 = null;
 
       int i = 0;
       ValueVector vector;
@@ -294,9 +296,12 @@ public Mutator getMutator() {
     public Object getObject(int index) {
       Map<String, Object> vv = new JsonStringHashMap();
       for (String child:getChildFieldNames()) {
-        Object value = getChild(child).getAccessor().getObject(index);
-        if (value != null) {
-          vv.put(child, value);
+        ValueVector v = getChild(child);
+        if (v != null) {
+          Object value = v.getAccessor().getObject(index);
+          if (value != null) {
+            vv.put(child, value);
+          }
         }
       }
       return vv;
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
index 97f5b39663..644e5db67e 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
@@ -326,6 +326,7 @@ public RepeatedMapTransferPair(RepeatedMapVector from, 
RepeatedMapVector to, boo
       this.from = from;
       this.to = to;
       this.pairs = new TransferPair[from.size()];
+      this.to.ephPair = null;
 
       int i = 0;
       ValueVector vector;
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/impl/VectorContainerWriter.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/impl/VectorContainerWriter.java
index 6b6ab46a1b..5aea0ca50c 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/impl/VectorContainerWriter.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/impl/VectorContainerWriter.java
@@ -21,6 +21,7 @@
 import org.apache.drill.exec.exception.SchemaChangeException;
 import org.apache.drill.exec.physical.impl.OutputMutator;
 import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.util.CallBack;
 import org.apache.drill.exec.vector.ValueVector;
 import org.apache.drill.exec.vector.complex.MapVector;
 import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter;
@@ -35,7 +36,7 @@
   public VectorContainerWriter(OutputMutator mutator) {
     super(null);
     this.mutator = mutator;
-    this.mapVector = new SpecialMapVector();
+    this.mapVector = new SpecialMapVector(mutator.getCallBack());
     this.mapRoot = new SingleMapWriter(mapVector, this);
   }
 
@@ -81,8 +82,8 @@ public void allocate() {
 
   private class SpecialMapVector extends MapVector {
 
-    public SpecialMapVector() {
-      super("", null, null);
+    public SpecialMapVector(CallBack callback) {
+      super("", null, callback);
     }
 
     @Override
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestOutputMutator.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestOutputMutator.java
index 0509b7b15b..e3591b660b 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestOutputMutator.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/TestOutputMutator.java
@@ -30,6 +30,7 @@
 import org.apache.drill.exec.record.MaterializedField;
 import org.apache.drill.exec.record.VectorContainer;
 import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.util.CallBack;
 import org.apache.drill.exec.vector.ValueVector;
 
 import com.google.common.collect.Maps;
@@ -92,4 +93,9 @@ public DrillBuf getManagedBuffer() {
     return allocator.buffer(255);
   }
 
+  @Override
+  public CallBack getCallBack() {
+    return null;
+  }
+
 }
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
index bb1af9eb2e..c1fb9284ca 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
@@ -18,6 +18,7 @@
 package org.apache.drill.exec.store.json;
 
 import org.apache.drill.BaseTestQuery;
+import org.apache.drill.TestBuilder;
 import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.exec.proto.UserBitShared;
 import org.junit.Test;
@@ -154,4 +155,22 @@ public void 
testMixedNumberTypesWhenReadingNumbersAsDouble() throws Exception {
       testNoResult("alter session set `store.json.read_numbers_as_double`= 
false");
     }
   }
+
+  @Test
+  public void drill_3353() throws Exception {
+    try {
+      testNoResult("alter session set `store.json.all_text_mode` = true");
+      test("create table dfs_test.tmp.drill_3353 as select a from 
dfs.`${WORKING_PATH}/src/test/resources/jsoninput/drill_3353` where e = true");
+      String query = "select t.a.d cnt from dfs_test.tmp.drill_3353 t where 
t.a.d is not null";
+      test(query);
+      testBuilder()
+          .sqlQuery(query)
+          .unOrdered()
+          .baselineColumns("cnt")
+          .baselineValues("1")
+          .go();
+    } finally {
+      testNoResult("alter session set `store.json.all_text_mode` = false");
+    }
+  }
 }
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
index 61380cf09b..af1b8960c8 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/ParquetRecordReaderTest.java
@@ -55,6 +55,7 @@
 import org.apache.drill.exec.store.CachedSingleFileSystem;
 import org.apache.drill.exec.store.TestOutputMutator;
 import org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader;
+import org.apache.drill.exec.util.CallBack;
 import org.apache.drill.exec.vector.BigIntVector;
 import org.apache.drill.exec.vector.NullableBigIntVector;
 import org.apache.drill.exec.vector.ValueVector;
@@ -363,7 +364,12 @@ public boolean isNewSchema() {
     public DrillBuf getManagedBuffer() {
       return allocator.buffer(255);
     }
-  }
+
+   @Override
+   public CallBack getCallBack() {
+     return null;
+   }
+ }
 
   private void validateFooters(final List<Footer> metadata) {
     logger.debug(metadata.toString());
diff --git a/exec/java-exec/src/test/resources/jsoninput/drill_3353/a.json 
b/exec/java-exec/src/test/resources/jsoninput/drill_3353/a.json
new file mode 100644
index 0000000000..0ffb7d3e4b
--- /dev/null
+++ b/exec/java-exec/src/test/resources/jsoninput/drill_3353/a.json
@@ -0,0 +1,3 @@
+{ a : { b : 1, c : 1 }, e : false } 
+{ a : { b : 1, c : 1 }, e : false } 
+{ a : { b : 1, c : 1 }, e : true  } 
diff --git a/exec/java-exec/src/test/resources/jsoninput/drill_3353/b.json 
b/exec/java-exec/src/test/resources/jsoninput/drill_3353/b.json
new file mode 100644
index 0000000000..56a4b3b437
--- /dev/null
+++ b/exec/java-exec/src/test/resources/jsoninput/drill_3353/b.json
@@ -0,0 +1,3 @@
+{ a : { b : 1, d : 1 }, e : false } 
+{ a : { b : 1, d : 1 }, e : false } 
+{ a : { b : 1, d : 1 }, e : true  } 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Non data-type related schema changes errors
> -------------------------------------------
>
>                 Key: DRILL-3353
>                 URL: https://issues.apache.org/jira/browse/DRILL-3353
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Storage - JSON
>    Affects Versions: 1.0.0
>            Reporter: Oscar Bernal
>            Assignee: Steven Phillips
>            Priority: Major
>             Fix For: 1.5.0
>
>         Attachments: i-bfbc0a5c-ios-PulsarEvent-2015-06-23_19.json.zip
>
>
> I'm having trouble querying a data set with varying schema for a nested 
> object fields. The majority of my data for a specific type of record has the 
> following nested data:
> {code}
> "attributes":{"daysSinceInstall":0,"destination":"none","logged":"no","nth":1,"type":"organic","wearable":"no"}}
> {code}
> Among those records (hundreds of them) I have only two with a slightly 
> different schema:
> {code}
> "attributes":{"adSet":"Teste-Adwords-Engagement-Branch-iOS-230615-adset","campaign":"Teste-Adwords-Engagement-Branch-iOS-230615","channel":"Adwords","daysSinceInstall":0,"destination":"none","logged":"no","nth":4,"type":"branch","wearable":"no"}}
> {code}
> When trying to query the "new" fields, my queries fail:
> With {code:sql}ALTER SYSTEM SET `store.json.all_text_mode` = true;{code}
> {noformat}
> 0: jdbc:drill:zk=local> select log.event.attributes from 
> `dfs`.`root`.`/file.json` as log where log.si = 
> '07A3F985-4B34-4A01-9B83-3B14548EF7BE' and log.event.attributes.ad = 
> 'Teste-FB-Engagement-Puro-iOS-230615';
> Error: SYSTEM ERROR: java.lang.NumberFormatException: 
> Teste-FB-Engagement-Puro-iOS-230615"
> Fragment 0:0
> [Error Id: 22d37a65-7dd0-4661-bbfc-7a50bbee9388 on 
> ip-10-0-1-16.sa-east-1.compute.internal:31010] (state=,code=0)
> {noformat}
> With {code:sql}ALTER SYSTEM SET `store.json.all_text_mode` = false;`{code}
> {noformat}
> 0: jdbc:drill:zk=local> select log.event.attributes from 
> `dfs`.`root`.`/file.json` as log where log.si = 
> '07A3F985-4B34-4A01-9B83-3B14548EF7BE';
> Error: DATA_READ ERROR: Error parsing JSON - You tried to write a Bit type 
> when you are using a ValueWriter of type NullableVarCharWriterImpl.
> File  file.json
> Record  35
> Fragment 0:0
> [Error Id: 5746e3e9-48c0-44b1-8e5f-7c94e7c64d0f on 
> ip-10-0-1-16.sa-east-1.compute.internal:31010] (state=,code=0)
> {noformat}
> If I try to extract all "attributes" from those events, Drill will only 
> return a subset of the fields, ignoring the others. 
> {noformat}
> 0: jdbc:drill:zk=local> select log.event.attributes from 
> `dfs`.`root`.`/file.json` as log where log.si = 
> '07A3F985-4B34-4A01-9B83-3B14548EF7BE' and log.type ='Opens App';
> +----------------------------------------------------+
> |                       EXPR$0                       |
> +----------------------------------------------------+
> | {"logged":"no","wearable":"no","type":"xxxx"}   |
> | {"logged":"no","wearable":"no","type":"xxxx"}  |
> | {"logged":"no","wearable":"no","type":"xxxx"}  |
> | {"logged":"no","wearable":"no","type":"xxxx"}    |
> | {"logged":"no","wearable":"no","type":"xxxx"}   |
> +----------------------------------------------------+
> {noformat}
> What I find strange is that I have thousands of records in the same file with 
> different schema for different record types and all other queries seem run 
> well.
> Is there something about how Drill infers schema that I might be missing 
> here? Does it infer based on a sample % of the data and fail for records that 
> were not taken into account while inferring schema? I suspect I wouldn't have 
> this error if I had 100's of records with that other schema inside the file, 
> but I can't find anything in the docs or code to support that hypothesis. 
> Perhaps it's just a bug? Is it expected?
> Troubleshooting guide seems to mention something about this but it's very 
> vague in implying Drill doesn't fully support schema changes. I thought that 
> was for data type changes mostly, for which there are other well documented 
> issues.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to