DRILL-5504: Add vector validator to diagnose offset vector issues

Validates offset vectors in VarChar and repeated vectors. Validates the
special case of repeated VarChar vectors (two layers of offsets.)

Provides two new session variables to turn on validation. One enables
the existing operator (iterator) validation, the other adds vector
validation. This allows validation to occur in a “production” Drill
(without restarting Drill with assertions, as previously required.)

Unit tests validate the validator. Another test validates the
integration, but requires manual steps, so is ignored by default.

This version is first-cut: all work is done within a single class.
Allows back-porting to an earlier version to solve a specific issues. A
revision should move some of the work into generated code (or refactor
vectors to allow outside access), since offset vectors appear for each
subclass; not on a base class that would allow generic operations.

* Added boot-time options to allow enabling vector validation in Maven
unit tests.
* Code cleanup per suggestions.
* Additional (manual) tests for boot-time options and default options.

closes #832


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d7bc213b
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d7bc213b
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d7bc213b

Branch: refs/heads/master
Commit: d7bc213ba7cf9a49657cb0699540ca375014a828
Parents: 7873988
Author: Paul Rogers <prog...@maprtech.com>
Authored: Thu May 11 12:46:15 2017 -0700
Committer: Jinfeng Ni <j...@apache.org>
Committed: Fri Jun 2 21:43:14 2017 -0700

----------------------------------------------------------------------
 .../org/apache/drill/exec/ExecConstants.java    |  31 +-
 .../drill/exec/physical/impl/ImplCreator.java   |  12 +-
 .../physical/impl/validate/BatchValidator.java  | 208 ++++++++++++
 .../IteratorValidatorBatchIterator.java         |  20 +-
 .../impl/validate/IteratorValidatorCreator.java |  12 +-
 .../server/options/SystemOptionManager.java     |   4 +-
 .../compliant/CompliantTextRecordReader.java    |   1 +
 .../src/main/resources/drill-module.conf        |  10 +
 .../impl/validate/TestBatchValidator.java       | 323 +++++++++++++++++++
 .../impl/validate/TestValidationOptions.java    | 135 ++++++++
 10 files changed, 749 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java 
b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 007e39a..83ffb20 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -447,11 +447,40 @@ public interface ExecConstants {
   String USE_DYNAMIC_UDFS_KEY = "exec.udf.use_dynamic";
   BooleanValidator USE_DYNAMIC_UDFS = new 
BooleanValidator(USE_DYNAMIC_UDFS_KEY, true);
 
-
   String QUERY_TRANSIENT_STATE_UPDATE_KEY = "exec.query.progress.update";
   BooleanValidator QUERY_TRANSIENT_STATE_UPDATE = new 
BooleanValidator(QUERY_TRANSIENT_STATE_UPDATE_KEY, true);
 
   String PERSISTENT_TABLE_UMASK = "exec.persistent_table.umask";
   StringValidator PERSISTENT_TABLE_UMASK_VALIDATOR = new 
StringValidator(PERSISTENT_TABLE_UMASK, "002");
 
+  /**
+   * Enables batch iterator (operator) validation. Validation is normally 
enabled
+   * only when assertions are enabled. This option enables iterator validation 
even
+   * if assertions are not enabled. That is, it allows iterator validation 
even on
+   * a "production" Drill instance.
+   */
+  String ENABLE_ITERATOR_VALIDATION_OPTION = "debug.validate_iterators";
+  BooleanValidator ENABLE_ITERATOR_VALIDATOR = new 
BooleanValidator(ENABLE_ITERATOR_VALIDATION_OPTION, false);
+
+  /**
+   * Boot-time config option to enable validation. Primarily used for tests.
+   * If true, overrrides the above. (That is validation is done if assertions 
are on,
+   * if the above session option is set to true, or if this config option is 
set to true.
+   */
+
+  String ENABLE_ITERATOR_VALIDATION = "drill.exec.debug.validate_iterators";
+
+  /**
+   * When iterator validation is enabled, additionally validates the vectors in
+   * each batch passed to each iterator.
+   */
+  String ENABLE_VECTOR_VALIDATION_OPTION = "debug.validate_vectors";
+  BooleanValidator ENABLE_VECTOR_VALIDATOR = new 
BooleanValidator(ENABLE_VECTOR_VALIDATION_OPTION, false);
+
+  /**
+   * Boot-time config option to enable vector validation. Primarily used for
+   * tests. Add the following to the command line to enable:<br>
+   * <tt>-ea -Ddrill.exec.debug.validate_vectors=true</tt>
+   */
+  String ENABLE_VECTOR_VALIDATION = "drill.exec.debug.validate_vectors";
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java
index 5872ef1..58bf383 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ImplCreator.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -26,6 +26,7 @@ import java.util.concurrent.TimeUnit;
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.drill.common.AutoCloseables;
 import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.ops.FragmentContext;
 import org.apache.drill.exec.physical.base.FragmentRoot;
 import org.apache.drill.exec.physical.base.PhysicalOperator;
@@ -69,9 +70,16 @@ public class ImplCreator {
     Preconditions.checkNotNull(root);
     Preconditions.checkNotNull(context);
 
-    if (AssertionUtil.isAssertionsEnabled()) {
+    // Enable iterator (operator) validation if assertions are enabled (debug 
mode)
+    // or if in production mode and the ENABLE_ITERATOR_VALIDATION option is 
set
+    // to true.
+
+    if (AssertionUtil.isAssertionsEnabled() ||
+        
context.getOptionSet().getOption(ExecConstants.ENABLE_ITERATOR_VALIDATOR) ||
+        
context.getConfig().getBoolean(ExecConstants.ENABLE_ITERATOR_VALIDATION)) {
       root = 
IteratorValidatorInjector.rewritePlanWithIteratorValidator(context, root);
     }
+
     final ImplCreator creator = new ImplCreator();
     Stopwatch watch = Stopwatch.createStarted();
 

http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java
new file mode 100644
index 0000000..e0f3ff2
--- /dev/null
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/BatchValidator.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ 
******************************************************************************/
+package org.apache.drill.exec.physical.impl.validate;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.exec.record.SimpleVectorWrapper;
+import org.apache.drill.exec.record.VectorAccessible;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.BaseDataValueVector;
+import org.apache.drill.exec.vector.FixedWidthVector;
+import org.apache.drill.exec.vector.NullableVarCharVector;
+import org.apache.drill.exec.vector.NullableVector;
+import org.apache.drill.exec.vector.RepeatedVarCharVector;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.VariableWidthVector;
+import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector;
+import org.apache.drill.exec.vector.complex.RepeatedFixedWidthVectorLike;
+
+
+/**
+ * Validate a batch of value vectors. It is not possible to validate the
+ * data, but we can validate the structure, especially offset vectors.
+ * Only handles single (non-hyper) vectors at present. Current form is
+ * self-contained. Better checks can be done by moving checks inside
+ * vectors or by exposing more metadata from vectors.
+ */
+
+public class BatchValidator {
+  private static final org.slf4j.Logger logger =
+      org.slf4j.LoggerFactory.getLogger(BatchValidator.class);
+
+  public static final int MAX_ERRORS = 100;
+
+  private final int rowCount;
+  private final VectorAccessible batch;
+  private final List<String> errorList;
+  private int errorCount;
+
+  public BatchValidator(VectorAccessible batch) {
+    rowCount = batch.getRecordCount();
+    this.batch = batch;
+    errorList = null;
+  }
+
+  public BatchValidator(VectorAccessible batch, boolean captureErrors) {
+    rowCount = batch.getRecordCount();
+    this.batch = batch;
+    if (captureErrors) {
+      errorList = new ArrayList<>();
+    } else {
+      errorList = null;
+    }
+  }
+
+  public void validate() {
+    if (batch.getRecordCount() == 0) {
+      return;
+    }
+    for (VectorWrapper<? extends ValueVector> w : batch) {
+      validateWrapper(w);
+    }
+  }
+
+  private void validateWrapper(VectorWrapper<? extends ValueVector> w) {
+    if (w instanceof SimpleVectorWrapper) {
+      validateVector(w.getValueVector());
+    }
+  }
+
+  private void validateVector(ValueVector vector) {
+    String name = vector.getField().getName();
+    if (vector instanceof NullableVector) {
+      validateNullableVector(name, (NullableVector) vector);
+    } else if (vector instanceof VariableWidthVector) {
+      validateVariableWidthVector(name, (VariableWidthVector) vector, 
rowCount);
+    } else if (vector instanceof FixedWidthVector) {
+      validateFixedWidthVector(name, (FixedWidthVector) vector);
+    } else if (vector instanceof BaseRepeatedValueVector) {
+      validateRepeatedVector(name, (BaseRepeatedValueVector) vector);
+    } else {
+      logger.debug("Don't know how to validate vector: " + name + " of class " 
+ vector.getClass().getSimpleName());
+    }
+  }
+
+  private void validateVariableWidthVector(String name, VariableWidthVector 
vector, int entryCount) {
+
+    // Offsets are in the derived classes. Handle only VarChar for now.
+
+    if (vector instanceof VarCharVector) {
+      validateVarCharVector(name, (VarCharVector) vector, entryCount);
+    } else {
+      logger.debug("Don't know how to validate vector: " + name + " of class " 
+ vector.getClass().getSimpleName());
+    }
+  }
+
+  private void validateVarCharVector(String name, VarCharVector vector, int 
entryCount) {
+//    int dataLength = vector.getAllocatedByteCount(); // Includes offsets and 
data.
+    int dataLength = vector.getBuffer().capacity();
+    validateOffsetVector(name + "-offsets", vector.getOffsetVector(), 
entryCount, dataLength);
+  }
+
+  private void validateRepeatedVector(String name, BaseRepeatedValueVector 
vector) {
+
+    int dataLength = Integer.MAX_VALUE;
+    if (vector instanceof RepeatedVarCharVector) {
+      dataLength = ((RepeatedVarCharVector) 
vector).getOffsetVector().getValueCapacity();
+    } else if (vector instanceof RepeatedFixedWidthVectorLike) {
+      dataLength = ((BaseDataValueVector) ((BaseRepeatedValueVector) 
vector).getDataVector()).getBuffer().capacity();
+    }
+    int itemCount = validateOffsetVector(name + "-offsets", 
vector.getOffsetVector(), rowCount, dataLength);
+
+    // Special handling of repeated VarChar vectors
+    // The nested data vectors are not quite exactly like top-level vectors.
+
+    @SuppressWarnings("resource")
+    ValueVector dataVector = vector.getDataVector();
+    if (dataVector instanceof VariableWidthVector) {
+      validateVariableWidthVector(name + "-data", (VariableWidthVector) 
dataVector, itemCount);
+    }
+  }
+
+  private int validateOffsetVector(String name, UInt4Vector offsetVector, int 
valueCount, int maxOffset) {
+    if (valueCount == 0) {
+      return 0;
+    }
+    UInt4Vector.Accessor accessor = offsetVector.getAccessor();
+
+    // First value must be zero in current version.
+
+    int prevOffset = accessor.get(0);
+    if (prevOffset != 0) {
+      error(name, offsetVector, "Offset (0) must be 0 but was " + prevOffset);
+    }
+
+    // Note <= comparison: offset vectors have (n+1) entries.
+
+    for (int i = 1; i <= valueCount; i++) {
+      int offset = accessor.get(i);
+      if (offset < prevOffset) {
+        error(name, offsetVector, "Decreasing offsets at (" + (i-1) + ", " + i 
+ ") = (" + prevOffset + ", " + offset + ")");
+      } else if (offset > maxOffset) {
+        error(name, offsetVector, "Invalid offset at index " + i + " = " + 
offset + " exceeds maximum of " + maxOffset);
+      }
+      prevOffset = offset;
+    }
+    return prevOffset;
+  }
+
+  private void error(String name, ValueVector vector, String msg) {
+    if (errorCount == 0) {
+      logger.error("Found one or more vector errors from " + 
batch.getClass().getSimpleName());
+    }
+    errorCount++;
+    if (errorCount >= MAX_ERRORS) {
+      return;
+    }
+    String fullMsg = "Column " + name + " of type " + 
vector.getClass().getSimpleName( ) + ": " + msg;
+    logger.error(fullMsg);
+    if (errorList != null) {
+      errorList.add(fullMsg);
+    }
+  }
+
+  private void validateNullableVector(String name, NullableVector vector) {
+    // Can't validate at this time because the bits vector is in each
+    // generated subtype.
+
+    // Validate a VarChar vector because it is common.
+
+    if (vector instanceof NullableVarCharVector) {
+      @SuppressWarnings("resource")
+      VarCharVector values = ((NullableVarCharVector) 
vector).getValuesVector();
+      validateVarCharVector(name + "-values", values, rowCount);
+    }
+  }
+
+  private void validateFixedWidthVector(String name, FixedWidthVector vector) {
+    // TODO Auto-generated method stub
+
+  }
+
+  /**
+   * Obtain the list of errors. For use in unit-testing this class.
+   * @return the list of errors found, or null if error capture was
+   * not enabled
+   */
+
+  public List<String> errors() { return errorList; }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
index 01c3c92..0d7fccc 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorBatchIterator.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -94,6 +94,11 @@ public class IteratorValidatorBatchIterator implements 
CloseableRecordBatch {
   /** High-level IterOutcome sequence state. */
   private ValidationState validationState = ValidationState.INITIAL_NO_SCHEMA;
 
+  /**
+   * Enable/disable per-batch vector validation. Enable only to debug vector
+   * corruption issues.
+   */
+  private boolean validateBatches;
 
   public IteratorValidatorBatchIterator(RecordBatch incoming) {
     this.incoming = incoming;
@@ -103,6 +108,11 @@ public class IteratorValidatorBatchIterator implements 
CloseableRecordBatch {
     logger.trace( "[#{}; on {}]: Being constructed.", instNum, batchTypeName);
   }
 
+
+  public void enableBatchValidation(boolean option) {
+    validateBatches = option;
+  }
+
   @Override
   public String toString() {
     return
@@ -224,6 +234,7 @@ public class IteratorValidatorBatchIterator implements 
CloseableRecordBatch {
           // above).
           // OK_NEW_SCHEMA moves to have-seen-schema state.
           validationState = ValidationState.HAVE_SCHEMA;
+          validateBatch();
           break;
         case OK:
           // OK is allowed as long as OK_NEW_SCHEMA was seen, except if 
terminated
@@ -234,6 +245,7 @@ public class IteratorValidatorBatchIterator implements 
CloseableRecordBatch {
                     "next() returned %s without first returning %s [#%d, %s]",
                     batchState, OK_NEW_SCHEMA, instNum, batchTypeName));
           }
+          validateBatch();
           // OK doesn't change high-level state.
           break;
         case NONE:
@@ -326,6 +338,12 @@ public class IteratorValidatorBatchIterator implements 
CloseableRecordBatch {
     }
   }
 
+  private void validateBatch() {
+    if (validateBatches) {
+      new BatchValidator(incoming).validate();
+    }
+  }
+
   @Override
   public WritableBatch getWritableBatch() {
     validateReadState("getWritableBatch()");

http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java
index cc30326..2288419 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/validate/IteratorValidatorCreator.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -20,6 +20,7 @@ package org.apache.drill.exec.physical.impl.validate;
 import java.util.List;
 
 import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.ops.FragmentContext;
 import org.apache.drill.exec.physical.config.IteratorValidator;
 import org.apache.drill.exec.physical.impl.BatchCreator;
@@ -35,6 +36,13 @@ public class IteratorValidatorCreator implements 
BatchCreator<IteratorValidator>
       List<RecordBatch> children)
       throws ExecutionSetupException {
     Preconditions.checkArgument(children.size() == 1);
-    return new IteratorValidatorBatchIterator(children.iterator().next());
+    RecordBatch child = children.iterator().next();
+    IteratorValidatorBatchIterator iter = new 
IteratorValidatorBatchIterator(child);
+    boolean validateBatches = 
context.getOptionSet().getOption(ExecConstants.ENABLE_VECTOR_VALIDATOR) ||
+                              
context.getConfig().getBoolean(ExecConstants.ENABLE_VECTOR_VALIDATION);
+    iter.enableBatchValidation(validateBatches);
+    logger.trace("Iterator validation enabled for " + 
child.getClass().getSimpleName() +
+                 (validateBatches ? " with vector validation" : ""));
+    return iter;
   }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index 8d0e96c..4f7ecc2 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -170,7 +170,9 @@ public class SystemOptionManager extends BaseOptionManager 
implements OptionMana
       ExecConstants.QUERY_PROFILE_DEBUG_VALIDATOR,
       ExecConstants.USE_DYNAMIC_UDFS,
       ExecConstants.QUERY_TRANSIENT_STATE_UPDATE,
-      ExecConstants.PERSISTENT_TABLE_UMASK_VALIDATOR
+      ExecConstants.PERSISTENT_TABLE_UMASK_VALIDATOR,
+      ExecConstants.ENABLE_ITERATOR_VALIDATOR,
+      ExecConstants.ENABLE_VECTOR_VALIDATOR
     };
     final Map<String, OptionValidator> tmp = new HashMap<>();
     for (final OptionValidator validator : validators) {

http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
index e253730..4a35c3b 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
@@ -151,6 +151,7 @@ public class CompliantTextRecordReader extends 
AbstractRecordReader {
       }
 
       // setup Input using InputStream
+      logger.trace("Opening file {}", split.getPath());
       stream = dfs.openPossiblyCompressedStream(split.getPath());
       input = new TextInput(settings, stream, readBuffer, split.getStart(), 
split.getStart() + split.getLength());
 

http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/main/resources/drill-module.conf
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/resources/drill-module.conf 
b/exec/java-exec/src/main/resources/drill-module.conf
index 19e1b1f..7c095ac 100644
--- a/exec/java-exec/src/main/resources/drill-module.conf
+++ b/exec/java-exec/src/main/resources/drill-module.conf
@@ -187,6 +187,16 @@ drill.exec: {
     // Use plain Java compilation where available
     prefer_plain_java: false
   },
+  debug: {
+    // If true, inserts the iterator validator atop each operator.
+    // Primrily used for testing.
+    validate_iterators: false,
+    // If iterator validation is enabled, also validates the vectors
+    // in each batch. Primarily used for testing. To enable from
+    // the command line:
+    // java ... -ea -Ddrill.exec.debug.validate_vectors=true ...
+    validate_vectors: false
+  },
   sort: {
     purge.threshold : 1000,
     external: {

http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
new file mode 100644
index 0000000..eafb4c8
--- /dev/null
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ 
******************************************************************************/
+package org.apache.drill.exec.physical.impl.validate;
+
+import static org.junit.Assert.*;
+
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.impl.validate.BatchValidator;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.VectorAccessible;
+import org.apache.drill.exec.vector.RepeatedVarCharVector;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.test.LogFixture;
+import org.apache.drill.test.OperatorFixture;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import ch.qos.logback.classic.Level;
+
+public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
+
+  protected static OperatorFixture fixture;
+  protected static LogFixture logFixture;
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    logFixture = LogFixture.builder()
+        .toConsole()
+        .logger(BatchValidator.class, Level.TRACE)
+        .build();
+    fixture = OperatorFixture.standardFixture();
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    fixture.close();
+    logFixture.close();
+  }
+
+  @Test
+  public void testValidFixed() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.INT)
+        .addNullable("b", MinorType.INT)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add(10, 100)
+        .add(20, 120)
+        .add(30, null)
+        .add(40, 140)
+        .build();
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    assertTrue(validator.errors().isEmpty());
+    batch.clear();
+  }
+
+  @Test
+  public void testValidVariable() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.VARCHAR)
+        .addNullable("b", MinorType.VARCHAR)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add("col1.1", "col1.2")
+        .add("col2.1", "col2.2")
+        .add("col3.1", null)
+        .add("col4.1", "col4.2")
+        .build();
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    assertTrue(validator.errors().isEmpty());
+    batch.clear();
+  }
+
+  @Test
+  public void testValidRepeated() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.INT, DataMode.REPEATED)
+        .add("b", MinorType.VARCHAR, DataMode.REPEATED)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add(new int[] {}, new String[] {})
+        .add(new int[] {1, 2, 3}, new String[] {"fred", "barney", "wilma"})
+        .add(new int[] {4}, new String[] {"dino"})
+        .build();
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    assertTrue(validator.errors().isEmpty());
+    batch.clear();
+  }
+
+  @Test
+  public void testVariableMissingLast() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.VARCHAR)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add("x")
+        .add("y")
+        .add("z")
+        .build();
+
+    // Here we are evil: stomp on the last offset to simulate corruption.
+    // Don't do this in real code!
+
+    VectorAccessible va = batch.vectorAccessible();
+    @SuppressWarnings("resource")
+    ValueVector v = va.iterator().next().getValueVector();
+    VarCharVector vc = (VarCharVector) v;
+    @SuppressWarnings("resource")
+    UInt4Vector ov = vc.getOffsetVector();
+    assertTrue(ov.getAccessor().get(3) > 0);
+    ov.getMutator().set(3, 0);
+
+    // Validator should catch the error.
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    List<String> errors = validator.errors();
+    assertEquals(1, errors.size());
+    assertTrue(errors.get(0).contains("Decreasing offsets"));
+    batch.clear();
+  }
+
+  @Test
+  public void testVariableCorruptFirst() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.VARCHAR)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add("x")
+        .add("y")
+        .add("z")
+        .build();
+
+    zapOffset(batch, 0, 1);
+
+    // Validator should catch the error.
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    List<String> errors = validator.errors();
+    assertEquals(1, errors.size());
+    assertTrue(errors.get(0).contains("Offset (0) must be 0"));
+    batch.clear();
+  }
+
+  public void zapOffset(SingleRowSet batch, int index, int bogusValue) {
+
+    // Here we are evil: stomp on an offset to simulate corruption.
+    // Don't do this in real code!
+
+    VectorAccessible va = batch.vectorAccessible();
+    @SuppressWarnings("resource")
+    ValueVector v = va.iterator().next().getValueVector();
+    VarCharVector vc = (VarCharVector) v;
+    @SuppressWarnings("resource")
+    UInt4Vector ov = vc.getOffsetVector();
+    ov.getMutator().set(index, bogusValue);
+  }
+
+  @Test
+  public void testVariableCorruptMiddleLow() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.VARCHAR)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add("xx")
+        .add("yy")
+        .add("zz")
+        .build();
+
+    zapOffset(batch, 2, 1);
+
+    // Validator should catch the error.
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    List<String> errors = validator.errors();
+    assertEquals(1, errors.size());
+    assertTrue(errors.get(0).contains("Decreasing offsets"));
+    batch.clear();
+  }
+
+  @Test
+  public void testVariableCorruptMiddleHigh() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.VARCHAR)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add("xx")
+        .add("yy")
+        .add("zz")
+        .build();
+
+    zapOffset(batch, 1, 10);
+
+    // Validator should catch the error.
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    List<String> errors = validator.errors();
+    assertEquals(1, errors.size());
+    assertTrue(errors.get(0).contains("Decreasing offsets"));
+    batch.clear();
+  }
+
+  @Test
+  public void testVariableCorruptLastOutOfRange() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.VARCHAR)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add("xx")
+        .add("yy")
+        .add("zz")
+        .build();
+
+    zapOffset(batch, 3, 100_000);
+
+    // Validator should catch the error.
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    List<String> errors = validator.errors();
+    assertEquals(1, errors.size());
+    assertTrue(errors.get(0).contains("Invalid offset"));
+    batch.clear();
+  }
+
+  @Test
+  public void testRepeatedBadArrayOffset() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.VARCHAR, DataMode.REPEATED)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add((Object) new String[] {})
+        .add((Object) new String[] {"fred", "barney", "wilma"})
+        .add((Object) new String[] {"dino"})
+        .build();
+
+    VectorAccessible va = batch.vectorAccessible();
+    @SuppressWarnings("resource")
+    ValueVector v = va.iterator().next().getValueVector();
+    RepeatedVarCharVector vc = (RepeatedVarCharVector) v;
+    @SuppressWarnings("resource")
+    UInt4Vector ov = vc.getOffsetVector();
+    ov.getMutator().set(3, 1);
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    List<String> errors = validator.errors();
+    assertEquals(1, errors.size());
+    assertTrue(errors.get(0).contains("Decreasing offsets"));
+    batch.clear();
+  }
+
+  @Test
+  public void testRepeatedBadValueOffset() {
+    BatchSchema schema = new SchemaBuilder()
+        .add("a", MinorType.VARCHAR, DataMode.REPEATED)
+        .build();
+
+    SingleRowSet batch = fixture.rowSetBuilder(schema)
+        .add((Object) new String[] {})
+        .add((Object) new String[] {"fred", "barney", "wilma"})
+        .add((Object) new String[] {"dino"})
+        .build();
+
+    VectorAccessible va = batch.vectorAccessible();
+    @SuppressWarnings("resource")
+    ValueVector v = va.iterator().next().getValueVector();
+    RepeatedVarCharVector rvc = (RepeatedVarCharVector) v;
+    @SuppressWarnings("resource")
+    VarCharVector vc = rvc.getDataVector();
+    @SuppressWarnings("resource")
+    UInt4Vector ov = vc.getOffsetVector();
+    ov.getMutator().set(4, 100_000);
+
+    BatchValidator validator = new BatchValidator(batch.vectorAccessible(), 
true);
+    validator.validate();
+    List<String> errors = validator.errors();
+    assertEquals(1, errors.size());
+    assertTrue(errors.get(0).contains("Invalid offset"));
+    batch.clear();
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/d7bc213b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java
new file mode 100644
index 0000000..d4e33b0
--- /dev/null
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestValidationOptions.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.validate;
+
+import static org.junit.Assert.assertFalse;
+
+import org.apache.drill.exec.ExecConstants;
+import 
org.apache.drill.exec.store.easy.text.compliant.CompliantTextRecordReader;
+import org.apache.drill.test.ClientFixture;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.DrillTest;
+import org.apache.drill.test.FixtureBuilder;
+import org.apache.drill.test.LogFixture;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import ch.qos.logback.classic.Level;
+
+@Ignore("requires manual verification")
+public class TestValidationOptions extends DrillTest {
+
+  protected static LogFixture logFixture;
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    logFixture = LogFixture.builder()
+        .toConsole()
+        .logger(BatchValidator.class, Level.TRACE)
+        .logger(IteratorValidatorCreator.class, Level.TRACE)
+        .logger(CompliantTextRecordReader.class, Level.TRACE)
+        .build();
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    logFixture.close();
+  }
+
+  // To validate these tests, set breakpoints in ImplCreator
+  // and IteratorValidatorBatchIterator to see if the options
+  // work as expected.
+
+  @Test
+  public void testOptions() throws Exception {
+    FixtureBuilder builder = ClusterFixture.builder()
+        .maxParallelization(1)
+        .configProperty(ExecConstants.ENABLE_ITERATOR_VALIDATION, false)
+        .configProperty(ExecConstants.ENABLE_VECTOR_VALIDATION, false)
+        .sessionOption(ExecConstants.ENABLE_ITERATOR_VALIDATION_OPTION, true)
+        .sessionOption(ExecConstants.ENABLE_VECTOR_VALIDATION_OPTION, true)
+        ;
+    try (ClusterFixture cluster = builder.build();
+         ClientFixture client = cluster.clientFixture()) {
+
+      boolean hasAssertions = false;
+      assert hasAssertions = true;
+      assertFalse(hasAssertions);
+      String sql = "SELECT id_i, name_s10 FROM `mock`.`customers_10`";
+      client.queryBuilder().sql(sql).run();
+
+      client.alterSession(ExecConstants.ENABLE_VECTOR_VALIDATION, false);
+      client.queryBuilder().sql(sql).run();
+
+      client.alterSession(ExecConstants.ENABLE_ITERATOR_VALIDATION, false);
+      client.queryBuilder().sql(sql).run();
+    }
+  }
+
+  /**
+   * Config options override session options. Config options allow passing in
+   * the setting at run time on the command line. This is a work-around for the
+   * fact that the config system has no generic solution at present.
+   *
+   * @throws Exception if anything goes wrong
+   */
+
+  @Test
+  public void testConfig() throws Exception {
+    FixtureBuilder builder = ClusterFixture.builder()
+        .maxParallelization(1)
+        .configProperty(ExecConstants.ENABLE_ITERATOR_VALIDATION, true)
+        .configProperty(ExecConstants.ENABLE_VECTOR_VALIDATION, true)
+        .sessionOption(ExecConstants.ENABLE_ITERATOR_VALIDATION_OPTION, false)
+        .sessionOption(ExecConstants.ENABLE_VECTOR_VALIDATION_OPTION, false)
+        ;
+    try (ClusterFixture cluster = builder.build();
+         ClientFixture client = cluster.clientFixture()) {
+
+      boolean hasAssertions = false;
+      assert hasAssertions = true;
+      assertFalse(hasAssertions);
+      String sql = "SELECT id_i, name_s10 FROM `mock`.`customers_10`";
+      client.queryBuilder().sql(sql).run();
+    }
+  }
+
+  /**
+   * Should do no validation with all-default options.
+   *
+   * @throws Exception
+   */
+
+  @Test
+  public void testDefaults() throws Exception {
+    FixtureBuilder builder = ClusterFixture.builder()
+        .maxParallelization(1)
+        ;
+    try (ClusterFixture cluster = builder.build();
+         ClientFixture client = cluster.clientFixture()) {
+
+      boolean hasAssertions = false;
+      assert hasAssertions = true;
+      assertFalse(hasAssertions);
+      String sql = "SELECT id_i, name_s10 FROM `mock`.`customers_10`";
+      client.queryBuilder().sql(sql).run();
+    }
+  }
+}

Reply via email to