[13/17] hive git commit: HIVE-14362: Support explain analyze in Hive (Pengcheng Xiong, reviewed by Ashutosh Chauhan, Gopal V and Gabor Szadovszky)

sershe Tue, 30 Aug 2016 15:22:29 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java
index 57f9432..f3c6820 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java
@@ -29,7 +29,7 @@ public class SubQueryDiagnostic {
   static QBSubQueryRewrite getRewrite(QBSubQuery subQuery, 
       TokenRewriteStream stream,
       Context ctx) {
-    if (ctx.getExplain()) {
+    if (ctx.isExplainSkipExecution()) {
       return new QBSubQueryRewrite(subQuery, stream);
     } else {
       return new QBSubQueryRewriteNoop(subQuery, stream);


http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
index 114fa2f..fb5ca57 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
@@ -28,6 +28,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Queue;
 import java.util.Set;
+import java.util.Stack;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -41,6 +42,7 @@ import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.QueryState;
 import org.apache.hadoop.hive.ql.exec.ColumnStatsTask;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.StatsTask;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
@@ -52,6 +54,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
+import 
org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer;
 import 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext;
 import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc;
 import org.apache.hadoop.hive.ql.plan.ColumnStatsWork;
@@ -131,6 +134,7 @@ public abstract class TaskCompiler {
           }
         }
       }
+
       return;
     }
 
@@ -497,4 +501,5 @@ public abstract class TaskCompiler {
     clone.setMapJoinOps(pCtx.getMapJoinOps());
     return clone;
   }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 66a8322..cd0b588 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -72,6 +72,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc;
 import org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize;
 import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism;
 import 
org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits;
+import 
org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck;
 import org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider;
 import org.apache.hadoop.hive.ql.optimizer.physical.MemoryDecider;
@@ -390,8 +391,9 @@ public class TezCompiler extends TaskCompiler {
     }
 
     // we need to clone some operator plans and remove union operators still
+    int indexForTezUnion = 0;
     for (BaseWork w: procCtx.workWithUnionOperators) {
-      GenTezUtils.removeUnionOperators(procCtx, w);
+      GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
     }
 
     // then we make sure the file sink operators are set up right
@@ -489,7 +491,8 @@ public class TezCompiler extends TaskCompiler {
       LOG.debug("Skipping cross product analysis");
     }
 
-    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)
+        && ctx.getExplainAnalyze() == null) {
       physicalCtx = new Vectorizer().resolve(physicalCtx);
     } else {
       LOG.debug("Skipping vectorization");
@@ -517,6 +520,11 @@ public class TezCompiler extends TaskCompiler {
     //  the backend. If you have a physical optimization that changes
     //  table scans or filters, you have to invoke it before this one.
     physicalCtx = new SerializeFilter().resolve(physicalCtx);
+
+    if (physicalCtx.getContext().getExplainAnalyze() != null) {
+      new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
+    }
+
     perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, 
"optimizeTaskPlan");
     return;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 33fbffe..34d83ef 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -260,6 +260,7 @@ public class UpdateDeleteSemanticAnalyzer extends 
SemanticAnalyzer {
       // references.
       HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, 
"nonstrict");
       rewrittenCtx = new Context(conf);
+      rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
     } catch (IOException e) {
       throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
index 08278de..baf77c7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate;
 import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization;
 import org.apache.hadoop.hive.ql.optimizer.SparkRemoveDynamicPruningBySize;
 import 
org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits;
+import 
org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
@@ -429,7 +430,8 @@ public class SparkCompiler extends TaskCompiler {
       LOG.debug("Skipping cross product analysis");
     }
 
-    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)
+        && ctx.getExplainAnalyze() == null) {
       (new Vectorizer()).resolve(physicalCtx);
     } else {
       LOG.debug("Skipping vectorization");
@@ -443,6 +445,10 @@ public class SparkCompiler extends TaskCompiler {
 
     new CombineEquivalentWorkResolver().resolve(physicalCtx);
 
+    if (physicalCtx.getContext().getExplainAnalyze() != null) {
+      new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
+    }
+
     PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
     return;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
index adec5c7..e217bdf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
@@ -21,8 +21,10 @@ package org.apache.hadoop.hive.ql.plan;
 
 import java.util.Map;
 
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.PTFUtils;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
+import org.apache.hadoop.hive.ql.stats.StatsCollectionContext;
 
 public class AbstractOperatorDesc implements OperatorDesc {
 
@@ -31,6 +33,7 @@ public class AbstractOperatorDesc implements OperatorDesc {
   protected transient OpTraits opTraits;
   protected transient Map<String, String> opProps;
   protected long memNeeded = 0;
+  protected String runtimeStatsTmpDir;
 
   @Override
   @Explain(skipHeader = true, displayName = "Statistics")
@@ -89,4 +92,13 @@ public class AbstractOperatorDesc implements OperatorDesc {
   public void setMemoryNeeded(long memNeeded) {
     this.memNeeded = memNeeded;
   }
+
+  public String getRuntimeStatsTmpDir() {
+    return runtimeStatsTmpDir;
+  }
+
+  public void setRuntimeStatsTmpDir(String runtimeStatsTmpDir) {
+    this.runtimeStatsTmpDir = runtimeStatsTmpDir;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
index a213c83..9f4767c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ExplainConfiguration;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 
 /**
@@ -42,15 +43,10 @@ public class ExplainWork implements Serializable {
   private HashSet<ReadEntity> inputs;
   private ParseContext pCtx;
 
-  boolean extended;
-  boolean formatted;
-  boolean dependency;
-  boolean logical;
+  private ExplainConfiguration config;
 
   boolean appendTaskType;
 
-  boolean authorize;
-  boolean userLevelExplain;
   String cboInfo;
 
   private transient BaseSemanticAnalyzer analyzer;
@@ -63,12 +59,7 @@ public class ExplainWork implements Serializable {
       List<Task<? extends Serializable>> rootTasks,
       Task<? extends Serializable> fetchTask,
       BaseSemanticAnalyzer analyzer,
-      boolean extended,
-      boolean formatted,
-      boolean dependency,
-      boolean logical,
-      boolean authorize,
-      boolean userLevelExplain,
+      ExplainConfiguration config,
       String cboInfo) {
     this.resFile = resFile;
     this.rootTasks = new ArrayList<Task<? extends Serializable>>(rootTasks);
@@ -77,14 +68,9 @@ public class ExplainWork implements Serializable {
     if (analyzer != null) {
       this.inputs = analyzer.getInputs();
     }
-    this.extended = extended;
-    this.formatted = formatted;
-    this.dependency = dependency;
-    this.logical = logical;
     this.pCtx = pCtx;
-    this.authorize = authorize;
-    this.userLevelExplain = userLevelExplain;
     this.cboInfo = cboInfo;
+    this.config = config;
   }
 
   public Path getResFile() {
@@ -120,27 +106,15 @@ public class ExplainWork implements Serializable {
   }
 
   public boolean getExtended() {
-    return extended;
-  }
-
-  public void setExtended(boolean extended) {
-    this.extended = extended;
+    return config.isExtended();
   }
 
   public boolean getDependency() {
-    return dependency;
-  }
-
-  public void setDependency(boolean dependency) {
-    this.dependency = dependency;
+    return config.isDependency();
   }
 
   public boolean isFormatted() {
-    return formatted;
-  }
-
-  public void setFormatted(boolean formatted) {
-    this.formatted = formatted;
+    return config.isFormatted();
   }
 
   public ParseContext getParseContext() {
@@ -152,11 +126,7 @@ public class ExplainWork implements Serializable {
   }
 
   public boolean isLogical() {
-    return logical;
-  }
-
-  public void setLogical(boolean logical) {
-    this.logical = logical;
+    return config.isLogical();
   }
 
   public boolean isAppendTaskType() {
@@ -168,11 +138,7 @@ public class ExplainWork implements Serializable {
   }
 
   public boolean isAuthorize() {
-    return authorize;
-  }
-
-  public void setAuthorize(boolean authorize) {
-    this.authorize = authorize;
+    return config.isAuthorize();
   }
 
   public BaseSemanticAnalyzer getAnalyzer() {
@@ -180,11 +146,7 @@ public class ExplainWork implements Serializable {
   }
 
   public boolean isUserLevelExplain() {
-    return userLevelExplain;
-  }
-
-  public void setUserLevelExplain(boolean userLevelExplain) {
-    this.userLevelExplain = userLevelExplain;
+    return config.isUserLevelExplain();
   }
 
   public String getCboInfo() {
@@ -195,4 +157,12 @@ public class ExplainWork implements Serializable {
     this.cboInfo = cboInfo;
   }
 
+  public ExplainConfiguration getConfig() {
+    return config;
+  }
+
+  public void setConfig(ExplainConfiguration config) {
+    this.config = config;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
index ce0e0a8..07ed4fd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
@@ -61,7 +61,6 @@ public class FileSinkDesc extends AbstractOperatorDesc {
   private DynamicPartitionCtx dpCtx;
   private String staticSpec; // static partition spec ends with a '/'
   private boolean gatherStats;
-  private int indexInTezUnion = -1;
 
   // Consider a query like:
   // insert overwrite table T3 select ... from T1 join T2 on T1.key = T2.key;
@@ -475,12 +474,4 @@ public class FileSinkDesc extends AbstractOperatorDesc {
     this.statsTmpDir = statsCollectionTempDir;
   }
 
-  public int getIndexInTezUnion() {
-    return indexInTezUnion;
-  }
-
-  public void setIndexInTezUnion(int indexInTezUnion) {
-    this.indexInTezUnion = indexInTezUnion;
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java
index a5527dc..20cd56f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.plan;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -48,7 +49,12 @@ public class MergeJoinWork extends BaseWork {
 
   @Override
   public Set<Operator<?>> getAllRootOperators() {
-    return getMainWork().getAllRootOperators();
+    Set<Operator<?>> set = new HashSet<>();
+    set.addAll(getMainWork().getAllRootOperators());
+    for (BaseWork w : mergeWorkList) {
+      set.addAll(w.getAllRootOperators());
+    }
+    return set;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java
index 16be499..ad620c2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java
@@ -30,4 +30,6 @@ public interface OperatorDesc extends Serializable, Cloneable 
{
   public Map<String, String> getOpProps();
   public long getMemoryNeeded();
   public void setMemoryNeeded(long memoryNeeded);
+  public String getRuntimeStatsTmpDir();
+  public void setRuntimeStatsTmpDir(String runtimeStatsTmpDir);
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index 029043f..c46ea70 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -40,18 +40,20 @@ public class Statistics implements Serializable {
   }
 
   private long numRows;
+  private long runTimeNumRows;
   private long dataSize;
   private State basicStatsState;
   private Map<String, ColStatistics> columnStats;
   private State columnStatsState;
 
   public Statistics() {
-    this(0, 0);
+    this(0, 0, -1);
   }
 
-  public Statistics(long nr, long ds) {
+  public Statistics(long nr, long ds, long rnr) {
     this.setNumRows(nr);
     this.setDataSize(ds);
+    this.setRunTimeNumRows(rnr);
     this.basicStatsState = State.NONE;
     this.columnStats = null;
     this.columnStatsState = State.NONE;
@@ -107,6 +109,9 @@ public class Statistics implements Serializable {
     StringBuilder sb = new StringBuilder();
     sb.append("Num rows: ");
     sb.append(numRows);
+    if (runTimeNumRows >= 0) {
+      sb.append("/" + runTimeNumRows);
+    }
     sb.append(" Data size: ");
     sb.append(dataSize);
     sb.append(" Basic stats: ");
@@ -121,6 +126,9 @@ public class Statistics implements Serializable {
     StringBuilder sb = new StringBuilder();
     sb.append("rows=");
     sb.append(numRows);
+    if (runTimeNumRows >= 0) {
+      sb.append("/" + runTimeNumRows);
+    }
     sb.append(" width=");
     // just to be safe about numRows
     if (numRows != 0) {
@@ -148,7 +156,7 @@ public class Statistics implements Serializable {
 
   @Override
   public Statistics clone() throws CloneNotSupportedException {
-    Statistics clone = new Statistics(numRows, dataSize);
+    Statistics clone = new Statistics(numRows, dataSize, runTimeNumRows);
     clone.setBasicStatsState(basicStatsState);
     clone.setColumnStatsState(columnStatsState);
     if (columnStats != null) {
@@ -263,4 +271,12 @@ public class Statistics implements Serializable {
     }
     return null;
   }
+
+  public long getRunTimeNumRows() {
+    return runTimeNumRows;
+  }
+
+  public void setRunTimeNumRows(long runTimeNumRows) {
+    this.runTimeNumRows = runTimeNumRows;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java
index 990d80c..805bc5b 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java
@@ -27,6 +27,7 @@ import java.util.Map;
 
 import org.apache.commons.io.output.ByteArrayOutputStream;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.parse.ExplainConfiguration;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.plan.Explain;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
@@ -132,6 +133,7 @@ public class TestExplainTask {
     pCtx.setTopOps(topOps);
     work.setParseContext(pCtx);
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    work.setConfig(new ExplainConfiguration());
     new ExplainTask().getJSONLogicalPlan(new PrintStream(baos), work);
     baos.close();
     return baos.toString();

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
index ae1747d..d6fe540 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
@@ -297,8 +297,10 @@ public class TestUpdateDeleteSemanticAnalyzer {
     Path tmp = new Path(f.getPath());
     fs.create(tmp);
     fs.deleteOnExit(tmp);
+    ExplainConfiguration config = new ExplainConfiguration();
+    config.setExtended(true);
     ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), 
sem.getRootTasks(),
-        sem.getFetchTask(), sem, true, false, false, false, false, false, 
null);
+        sem.getFetchTask(), sem, config, null);
     ExplainTask task = new ExplainTask();
     task.setWork(work);
     task.initialize(queryState, plan, null, null);

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_1.q 
b/ql/src/test/queries/clientpositive/explainanalyze_1.q
new file mode 100644
index 0000000..a4b3dc5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_1.q
@@ -0,0 +1,38 @@
+set hive.mapred.mode=nonstrict;
+
+explain analyze select * from src a union all select * from src b limit 10;
+
+explain analyze select key from src;
+
+explain analyze create table t as select key from src;
+
+create table t as select key from src;
+
+explain analyze insert overwrite table t select key from src;
+
+explain analyze select key from src limit 10;
+
+explain analyze select key from src where value < 10;
+
+explain analyze select key from src where key < 10;
+select count(*) from (select key from src where key < 10)subq;
+
+explain analyze select key, count(key) from src group by key;
+select count(*) from (select key, count(key) from src group by key)subq;
+
+explain analyze select count(*) from src a join src b on a.key = b.value where 
a.key > 0;
+
+explain analyze select count(*) from src a join src b on a.key = b.key where 
a.key > 0;
+select count(*) from src a join src b on a.key = b.key where a.key > 0;
+
+
+explain analyze select * from src a union all select * from src b;
+select count(*) from (select * from src a union all select * from src b)subq;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+EXPLAIN analyze 
+SELECT x.key, y.value
+FROM src x JOIN src y ON (x.key = y.key);

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_2.q 
b/ql/src/test/queries/clientpositive/explainanalyze_2.q
new file mode 100644
index 0000000..dfee826
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_2.q
@@ -0,0 +1,329 @@
+set hive.explain.user=true;
+set hive.metastore.aggregate.stats.cache.enabled=false;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE;
+
+CREATE TABLE ss(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) 
STORED AS TEXTFILE;
+
+CREATE TABLE sr(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) 
STORED AS TEXTFILE;
+
+CREATE TABLE cs(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) 
STORED AS TEXTFILE;
+
+INSERT OVERWRITE TABLE ss
+SELECT x.key,x.value,y.key,y.value,z.key,z.value
+FROM src1 x 
+JOIN src y ON (x.key = y.key) 
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
+
+INSERT OVERWRITE TABLE sr
+SELECT x.key,x.value,y.key,y.value,z.key,z.value
+FROM src1 x 
+JOIN src y ON (x.key = y.key) 
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=12);
+
+INSERT OVERWRITE TABLE cs
+SELECT x.key,x.value,y.key,y.value,z.key,z.value
+FROM src1 x 
+JOIN src y ON (x.key = y.key) 
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08');
+
+
+ANALYZE TABLE ss COMPUTE STATISTICS;
+ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3;
+
+ANALYZE TABLE sr COMPUTE STATISTICS;
+ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3;
+
+ANALYZE TABLE cs COMPUTE STATISTICS;
+ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3;
+
+set hive.auto.convert.join=false;
+
+explain analyze
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
+
+explain analyze
+select 
+ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3)
+FROM 
+ss,sr,cs,src d1,src d2,src d3,src1,srcpart
+where
+    ss.k1 = d1.key 
+and sr.k1 = d2.key 
+and cs.k1 = d3.key 
+and ss.k2 = sr.k2
+and ss.k3 = sr.k3
+and ss.v1 = src1.value
+and ss.v2 = srcpart.value
+and sr.v2 = cs.v2
+and sr.v3 = cs.v3
+and ss.v3='ssv3'
+and sr.v1='srv1'
+and src1.key = 'src1key'
+and srcpart.key = 'srcpartkey'
+and d1.value = 'd1value'
+and d2.value in ('2000Q1','2000Q2','2000Q3')
+and d3.value in ('2000Q1','2000Q2','2000Q3')
+group by 
+ss.k1,sr.k2,cs.k3
+order by 
+ss.k1,sr.k2,cs.k3
+limit 100;
+
+explain analyze
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value);
+
+explain analyze
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select key, value from src1 union select key, value from src union 
select key, value from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select key, value from src1 union select key, value from src union 
select key, value from src union select key, value from src)z ON (x.value = 
z.value);
+
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+set hive.stats.fetch.column.stats=false;
+
+
+explain analyze
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
+
+explain analyze
+select 
+ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3)
+FROM 
+ss,sr,cs,src d1,src d2,src d3,src1,srcpart
+where
+    ss.k1 = d1.key 
+and sr.k1 = d2.key 
+and cs.k1 = d3.key 
+and ss.k2 = sr.k2
+and ss.k3 = sr.k3
+and ss.v1 = src1.value
+and ss.v2 = srcpart.value
+and sr.v2 = cs.v2
+and sr.v3 = cs.v3
+and ss.v3='ssv3'
+and sr.v1='srv1'
+and src1.key = 'src1key'
+and srcpart.key = 'srcpartkey'
+and d1.value = 'd1value'
+and d2.value in ('2000Q1','2000Q2','2000Q3')
+and d3.value in ('2000Q1','2000Q2','2000Q3')
+group by 
+ss.k1,sr.k2,cs.k3
+order by 
+ss.k1,sr.k2,cs.k3
+limit 100;
+
+explain analyze
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value);
+
+explain analyze
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select key, value from src1 union select key, value from src union 
select key, value from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select key, value from src1 union select key, value from src union 
select key, value from src union select key, value from src)z ON (x.value = 
z.value);
+
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = 
org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds 
string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) 
CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds 
string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE 
srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE 
srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE 
srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE 
srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE 
srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE 
srcbucket_mapjoin_part partition(ds='2008-04-08');
+
+
+
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY 
(key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+CREATE TABLE tab2(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED 
BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab2 partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = false;
+set hive.auto.convert.sortmerge.join = true;
+
+set hive.auto.convert.join.noconditionaltask.size=500;
+
+explain analyze 
+select s1.key as key, s1.value as value from tab s1 join tab s3 on 
s1.key=s3.key;
+
+explain analyze 
+select s1.key as key, s1.value as value from tab s1 join tab s3 on 
s1.key=s3.key join tab s2 on s1.value=s2.value;
+
+explain analyze 
+select s1.key as key, s1.value as value from tab s1 join tab2 s3 on 
s1.key=s3.key;
+
+explain analyze 
+select s1.key as key, s1.value as value from tab s1 join tab2 s3 on 
s1.key=s3.key join tab2 s2 on s1.value=s2.value;
+
+explain analyze
+select count(*) from (select s1.key as key, s1.value as value from tab s1 join 
tab s3 on s1.key=s3.key
+UNION  ALL
+select s2.key as key, s2.value as value from tab s2
+) a join tab_part b on (a.key = b.key);
+
+explain analyze
+select count(*) from (select s1.key as key, s1.value as value from tab s1 join 
tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value
+UNION  ALL
+select s2.key as key, s2.value as value from tab s2
+) a join tab_part b on (a.key = b.key);
+
+explain analyze
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select * from src1 union all select * from src)z ON (x.value = z.value)
+union all
+SELECT x.key, y.value
+FROM src x JOIN src y ON (x.key = y.key) 
+JOIN (select key, value from src1 union all select key, value from src union 
all select key, value from src)z ON (x.value = z.value)
+union all
+SELECT x.key, y.value
+FROM src1 x JOIN src1 y ON (x.key = y.key) 
+JOIN (select key, value from src1 union all select key, value from src union 
all select key, value from src union all select key, value from src)z ON 
(x.value = z.value);
+
+explain analyze
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src x JOIN src y ON (x.key = y.key) 
+JOIN (select key, value from src1 union select key, value from src union 
select key, value from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src1 y ON (x.key = y.key) 
+JOIN (select key, value from src1 union select key, value from src union 
select key, value from src union select key, value from src)z ON (x.value = 
z.value);
+
+CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE;
+CREATE TABLE b(key STRING, value STRING) STORED AS TEXTFILE;
+CREATE TABLE c(key STRING, value STRING) STORED AS TEXTFILE;
+
+explain analyze
+from
+(
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key) 
+JOIN (select * from src1 union all select * from src)z ON (x.value = z.value)
+union all
+SELECT x.key, y.value
+FROM src x JOIN src y ON (x.key = y.key) 
+JOIN (select key, value from src1 union all select key, value from src union 
all select key, value from src)z ON (x.value = z.value)
+union all
+SELECT x.key, y.value
+FROM src1 x JOIN src1 y ON (x.key = y.key) 
+JOIN (select key, value from src1 union all select key, value from src union 
all select key, value from src union all select key, value from src)z ON 
(x.value = z.value)
+) tmp
+INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value
+INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value
+INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value;
+
+explain analyze
+FROM
+( 
+SELECT x.key as key, y.value as value from src1 x JOIN src y ON (x.key = 
y.key) 
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value) 
+union
+SELECT x.key as key, y.value as value from src x JOIN src y ON (x.key = y.key) 
+JOIN (select key, value from src1 union select key, value from src union 
select key, value from src)z ON (x.value = z.value)
+union
+SELECT x.key as key, y.value as value from src1 x JOIN src1 y ON (x.key = 
y.key) 
+JOIN (select key, value from src1 union select key, value from src union 
select key, value from src union select key, value from src)z ON (x.value = 
z.value)
+) tmp
+INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value
+INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value
+INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value;
+
+
+CREATE TABLE DEST1(key STRING, value STRING) STORED AS TEXTFILE;
+CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE;
+
+explain analyze 
+FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+                         UNION DISTINCT  
+      select s2.key as key, s2.value as value from src s2) unionsrc
+INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT 
SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key
+INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, 
COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value;
+
+explain analyze FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b 
(b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key;
+
+set hive.entity.capture.transform=true;
+
+explain analyze
+SELECT 
+TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue)
+FROM src a join src b
+on a.key = b.key;
+
+explain analyze
+FROM (
+      select key, value from (
+      select 'tst1' as key, cast(count(1) as string) as value, 'tst1' as 
value2 from src s1
+                         UNION all 
+      select s2.key as key, s2.value as value, 'tst1' as value2 from src s2) 
unionsub
+                         UNION all
+      select key, value from src s0
+                             ) unionsrc
+INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT 
SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key
+INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, 
COUNT(DISTINCT SUBSTR(unionsrc.value,5)) 
+GROUP BY unionsrc.key, unionsrc.value;
+
+explain analyze
+FROM (
+      select 'tst1' as key, cast(count(1) as string) as value, 'tst1' as 
value2 from src s1
+                         UNION all 
+      select s2.key as key, s2.value as value, 'tst1' as value2 from src s2
+                             ) unionsrc
+INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT 
SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key
+INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, 
COUNT(DISTINCT SUBSTR(unionsrc.value,5)) 
+GROUP BY unionsrc.key, unionsrc.value;

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_3.q 
b/ql/src/test/queries/clientpositive/explainanalyze_3.q
new file mode 100644
index 0000000..69f82e5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_3.q
@@ -0,0 +1,158 @@
+set 
hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider;
+set 
hive.metastore.filter.hook=org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=true;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.vectorized.execution.enabled=true;
+
+explain analyze select key, value
+FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol;
+
+explain analyze show tables;
+
+explain analyze create database newDB location "/tmp/";
+
+create database newDB location "/tmp/";
+
+explain analyze describe database extended newDB;
+
+describe database extended newDB;
+
+explain analyze use newDB;
+
+use newDB;
+
+create table tab (name string);
+
+explain analyze alter table tab rename to newName;
+
+explain analyze drop table tab;
+
+drop table tab;
+
+explain analyze use default;
+
+use default;
+
+drop database newDB;
+
+explain analyze analyze table src compute statistics;
+
+explain analyze analyze table src compute statistics for columns;
+
+explain analyze
+CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x));
+
+CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x));
+
+explain analyze SELECT SIGMOID(2) FROM src LIMIT 1;
+explain analyze DROP TEMPORARY MACRO SIGMOID;
+DROP TEMPORARY MACRO SIGMOID;
+
+explain analyze create table src_autho_test as select * from src;
+create table src_autho_test as select * from src;
+
+set hive.security.authorization.enabled=true;
+
+explain analyze grant select on table src_autho_test to user hive_test_user;
+grant select on table src_autho_test to user hive_test_user;
+
+explain analyze show grant user hive_test_user on table src_autho_test;
+explain analyze show grant user hive_test_user on table src_autho_test(key);
+
+select key from src_autho_test order by key limit 20;
+
+explain analyze revoke select on table src_autho_test from user hive_test_user;
+
+explain analyze grant select(key) on table src_autho_test to user 
hive_test_user;
+
+explain analyze revoke select(key) on table src_autho_test from user 
hive_test_user;
+
+explain analyze 
+create role sRc_roLE;
+
+create role sRc_roLE;
+
+explain analyze
+grant role sRc_roLE to user hive_test_user;
+
+grant role sRc_roLE to user hive_test_user;
+
+explain analyze show role grant user hive_test_user;
+
+explain analyze drop role sRc_roLE;
+drop role sRc_roLE;
+
+set hive.security.authorization.enabled=false;
+drop table src_autho_test;
+
+explain analyze drop view v;
+
+explain analyze create view v as with cte as (select * from src  order by key 
limit 5)
+select * from cte;
+
+explain analyze with cte as (select * from src  order by key limit 5)
+select * from cte;
+
+create table orc_merge5 (userid bigint, string1 string, subtype double, 
decimal1 decimal, ts timestamp) stored as orc;
+
+load data local inpath '../../data/files/orc_split_elim.orc' into table 
orc_merge5;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=50000;
+SET hive.optimize.index.filter=true;
+set hive.merge.orcfile.stripe.level=false;
+set hive.merge.tezfiles=false;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.compute.splits.in.am=true;
+set tez.grouping.min-size=1000;
+set tez.grouping.max-size=50000;
+
+set hive.merge.orcfile.stripe.level=true;
+set hive.merge.tezfiles=true;
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain analyze insert overwrite table orc_merge5 select 
userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13;
+
+drop table orc_merge5;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds 
string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) 
CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds 
string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE 
srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE 
srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE 
srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE 
srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE 
srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE 
srcbucket_mapjoin_part partition(ds='2008-04-08');
+
+
+
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY 
(key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = true;
+explain analyze
+select a.key, a.value, b.value
+from tab a join tab_part b on a.key = b.key;
+
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_4.q 
b/ql/src/test/queries/clientpositive/explainanalyze_4.q
new file mode 100644
index 0000000..dad397b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_4.q
@@ -0,0 +1,103 @@
+set hive.mapred.mode=nonstrict;
+
+set hive.explain.user=true;
+set hive.auto.convert.join=false;
+set hive.optimize.dynamic.partition.hashjoin=false;
+
+-- First try with regular mergejoin
+explain analyze
+select
+  *
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null
+order by a.cint;
+
+select
+  *
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null
+order by a.cint;
+
+explain analyze
+select
+  count(*)
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null;
+
+select
+  count(*)
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null;
+
+explain analyze
+select
+  a.csmallint, count(*) c1
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null
+group by a.csmallint
+order by c1;
+
+select
+  a.csmallint, count(*) c1
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null
+group by a.csmallint
+order by c1;
+
+set hive.auto.convert.join=true;
+set hive.optimize.dynamic.partition.hashjoin=true;
+set hive.auto.convert.join.noconditionaltask.size=200000;
+set hive.stats.fetch.column.stats=false;
+set hive.exec.reducers.bytes.per.reducer=200000;
+
+-- Try with dynamically partitioned hashjoin
+explain analyze
+select
+  *
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null
+order by a.cint;
+
+select
+  *
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null
+order by a.cint;
+
+explain analyze
+select
+  count(*)
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null;
+
+select
+  count(*)
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null;
+
+explain analyze
+select
+  a.csmallint, count(*) c1
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null
+group by a.csmallint
+order by c1;
+
+select
+  a.csmallint, count(*) c1
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+  a.cint between 1000000 and 3000000 and b.cbigint is not null
+group by a.csmallint
+order by c1;

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_5.q 
b/ql/src/test/queries/clientpositive/explainanalyze_5.q
new file mode 100644
index 0000000..bb23e45
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_5.q
@@ -0,0 +1,81 @@
+set hive.stats.column.autogather=true;
+
+explain analyze analyze table src compute statistics;
+
+explain analyze analyze table src compute statistics for columns;
+
+drop table src_multi2;
+
+create table src_multi2 like src;
+
+explain analyze insert overwrite table src_multi2 select subq.key, src.value 
from (select * from src union select * from src1)subq join src on 
subq.key=src.key;
+
+select count(*) from (select * from src union select * from src1)subq;
+
+insert overwrite table src_multi2 select subq.key, src.value from (select * 
from src union select * from src1)subq join src on subq.key=src.key;
+
+describe formatted src_multi2;
+
+
+set hive.mapred.mode=nonstrict;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+
+-- SORT_QUERY_RESULTS
+
+create table acid_uami(i int,
+                 de decimal(5,2),
+                 vc varchar(128)) clustered by (i) into 2 buckets stored as 
orc TBLPROPERTIES ('transactional'='true');
+
+insert into table acid_uami values 
+    (1, 109.23, 'mary had a little lamb'),
+    (6553, 923.19, 'its fleece was white as snow');
+
+insert into table acid_uami values 
+    (10, 119.23, 'and everywhere that mary went'),
+    (65530, 823.19, 'the lamb was sure to go');
+
+select * from acid_uami order by de;
+
+explain analyze update acid_uami set de = 3.14 where de = 109.23 or de = 
119.23;
+
+select * from acid_uami order by de;
+
+update acid_uami set de = 3.14 where de = 109.23 or de = 119.23;
+
+select * from acid_uami order by de;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/delete_orig_table;
+dfs -copyFromLocal ../../data/files/alltypesorc 
${system:test.tmp.dir}/delete_orig_table/00000_0; 
+
+create table acid_dot(
+    ctinyint TINYINT,
+    csmallint SMALLINT,
+    cint INT,
+    cbigint BIGINT,
+    cfloat FLOAT,
+    cdouble DOUBLE,
+    cstring1 STRING,
+    cstring2 STRING,
+    ctimestamp1 TIMESTAMP,
+    ctimestamp2 TIMESTAMP,
+    cboolean1 BOOLEAN,
+    cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc 
location '${system:test.tmp.dir}/delete_orig_table' TBLPROPERTIES 
('transactional'='true');
+
+select count(*) from acid_dot;
+
+explain analyze delete from acid_dot where cint < -1070551679;
+
+select count(*) from acid_dot;
+
+delete from acid_dot where cint < -1070551679;
+
+select count(*) from acid_dot;
+
+dfs -rmr ${system:test.tmp.dir}/delete_orig_table;

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out 
b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index f6f2bfa..7e2edd9 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -46,18 +46,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: employeeid (type: int)
               outputColumnNames: employeeid
+              Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16)
                 keys: 2000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1
+                Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: 2000.0 (type: double)
                   sort order: +
                   Map-reduce partition columns: 2000.0 (type: double)
+                  Statistics: Num rows: 26 Data size: 105 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
@@ -65,11 +69,14 @@ STAGE PLANS:
           keys: 2000.0 (type: double)
           mode: mergepartial
           outputColumnNames: _col0, _col1
+          Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 2000.0 (type: double)
             outputColumnNames: _col0, _col1
+            Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
+              Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -98,20 +105,24 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
             GatherStats: false
             Select Operator
               expressions: employeeid (type: int)
               outputColumnNames: employeeid
+              Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16)
                 keys: 2000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1
+                Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: 2000.0 (type: double)
                   null sort order: a
                   sort order: +
                   Map-reduce partition columns: 2000.0 (type: double)
+                  Statistics: Num rows: 26 Data size: 105 Basic stats: 
COMPLETE Column stats: NONE
                   tag: -1
                   value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
                   auto parallelism: false
@@ -173,14 +184,17 @@ STAGE PLANS:
           keys: 2000.0 (type: double)
           mode: mergepartial
           outputColumnNames: _col0, _col1
+          Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 2000.0 (type: double)
             outputColumnNames: _col0, _col1
+            Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
 #### A masked pattern was here ####
               NumFilesPerFileSink: 1
+              Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
 #### A masked pattern was here ####
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -232,18 +246,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: employeeid (type: int)
               outputColumnNames: employeeid
+              Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16)
                 keys: 4000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1
+                Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: 4000.0 (type: double)
                   sort order: +
                   Map-reduce partition columns: 4000.0 (type: double)
+                  Statistics: Num rows: 26 Data size: 105 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
@@ -251,11 +269,14 @@ STAGE PLANS:
           keys: 4000.0 (type: double)
           mode: mergepartial
           outputColumnNames: _col0, _col1
+          Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 4000.0 (type: double)
             outputColumnNames: _col0, _col1
+            Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
+              Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -284,20 +305,24 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
             GatherStats: false
             Select Operator
               expressions: employeeid (type: int)
               outputColumnNames: employeeid
+              Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16)
                 keys: 4000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1
+                Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: 4000.0 (type: double)
                   null sort order: a
                   sort order: +
                   Map-reduce partition columns: 4000.0 (type: double)
+                  Statistics: Num rows: 26 Data size: 105 Basic stats: 
COMPLETE Column stats: NONE
                   tag: -1
                   value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
                   auto parallelism: false
@@ -359,14 +384,17 @@ STAGE PLANS:
           keys: 4000.0 (type: double)
           mode: mergepartial
           outputColumnNames: _col0, _col1
+          Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 4000.0 (type: double)
             outputColumnNames: _col0, _col1
+            Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
               GlobalTableId: 0
 #### A masked pattern was here ####
               NumFilesPerFileSink: 1
+              Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: NONE
 #### A masked pattern was here ####
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -418,18 +446,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: employeeid (type: int), employeename (type: string)
               outputColumnNames: employeeid, employeename
+              Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16), 
compute_stats(employeename, 16)
                 keys: 2000.0 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: 2000.0 (type: double)
                   sort order: +
                   Map-reduce partition columns: 2000.0 (type: double)
+                  Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
@@ -437,11 +469,14 @@ STAGE PLANS:
           keys: 2000.0 (type: double)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 2000.0 (type: double)
             outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
+              Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -498,18 +533,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: employeesalary (type: double), employeeid (type: 
int), employeename (type: string)
               outputColumnNames: employeesalary, employeeid, employeename
+              Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16), 
compute_stats(employeename, 16)
                 keys: employeesalary (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: double)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: double)
+                  Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
@@ -517,11 +556,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: double)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 _col0 (type: double)
             outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
+              Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -580,23 +622,29 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: employeeid (type: int), employeename (type: string)
               outputColumnNames: employeeid, employeename
+              Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16), 
compute_stats(employeename, 16)
                 mode: hash
                 outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   sort order: 
+                  Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
+            Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out 
b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index 21089e1..47fffab 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -84,18 +84,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column 
stats: NONE
             Select Operator
               expressions: country (type: string), employeename (type: 
string), employeeid (type: int)
               outputColumnNames: country, employeename, employeeid
+              Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeename, 16), 
compute_stats(employeeid, 16)
                 keys: 4000.0 (type: double), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: 4000.0 (type: double), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: 4000.0 (type: double), _col1 
(type: string)
+                  Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
@@ -103,11 +107,14 @@ STAGE PLANS:
           keys: 4000.0 (type: double), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col2 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 _col3 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 4000.0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column 
stats: NONE
             File Output Operator
               compressed: false
+              Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -157,18 +164,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: country (type: string), employeeid (type: int)
               outputColumnNames: country, employeeid
+              Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16)
                 keys: 2000.0 (type: double), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: 2000.0 (type: double), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: 2000.0 (type: double), _col1 
(type: string)
+                  Statistics: Num rows: 42 Data size: 169 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
@@ -176,11 +187,14 @@ STAGE PLANS:
           keys: 2000.0 (type: double), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 2000.0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
+              Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -241,18 +255,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE 
Column stats: NONE
             Select Operator
               expressions: employeesalary (type: double), country (type: 
string), employeeid (type: int)
               outputColumnNames: employeesalary, country, employeeid
+              Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16)
                 keys: employeesalary (type: double), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: double), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: double), _col1 
(type: string)
+                  Statistics: Num rows: 116 Data size: 466 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
@@ -260,11 +278,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: double), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
+              Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -322,18 +343,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: employee_part
+            Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column 
stats: NONE
             Select Operator
               expressions: employeesalary (type: double), country (type: 
string), employeeid (type: int), employeename (type: string)
               outputColumnNames: employeesalary, country, employeeid, 
employeename
+              Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL 
Column stats: NONE
               Group By Operator
                 aggregations: compute_stats(employeeid, 16), 
compute_stats(employeename, 16)
                 keys: employeesalary (type: double), country (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: double), _col1 (type: string)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: double), _col1 
(type: string)
+                  Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
@@ -341,11 +366,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: double), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>),
 _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
+            Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE 
Column stats: NONE
             File Output Operator
               compressed: false
+              Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/results/clientpositive/columnstats_quoting.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out 
b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
index 288e61b..52e3538 100644
--- a/ql/src/test/results/clientpositive/columnstats_quoting.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
@@ -24,23 +24,29 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: user_web_events
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
             Select Operator
               expressions: user id (type: bigint), user name (type: string)
               outputColumnNames: user id, user name
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Group By Operator
                 aggregations: compute_stats(user id, 16), compute_stats(user 
name, 16)
                 mode: hash
                 outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   sort order: 
+                  Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
+            Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -75,23 +81,29 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: user_web_events
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
             Select Operator
               expressions: user id (type: bigint)
               outputColumnNames: user id
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column 
stats: NONE
               Group By Operator
                 aggregations: compute_stats(user id, 16)
                 mode: hash
                 outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   sort order: 
+                  Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0)
           mode: mergepartial
           outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column 
stats: NONE
           File Output Operator
             compressed: false
+            Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

[13/17] hive git commit: HIVE-14362: Support explain analyze in Hive (Pengcheng Xiong, reviewed by Ashutosh Chauhan, Gopal V and Gabor Szadovszky)

Reply via email to