http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java index 57f9432..f3c6820 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java @@ -29,7 +29,7 @@ public class SubQueryDiagnostic { static QBSubQueryRewrite getRewrite(QBSubQuery subQuery, TokenRewriteStream stream, Context ctx) { - if (ctx.getExplain()) { + if (ctx.isExplainSkipExecution()) { return new QBSubQueryRewrite(subQuery, stream); } else { return new QBSubQueryRewriteNoop(subQuery, stream);
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 114fa2f..fb5ca57 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -28,6 +28,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Queue; import java.util.Set; +import java.util.Stack; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,6 +42,7 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; import org.apache.hadoop.hive.ql.exec.FetchTask; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; @@ -52,6 +54,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; @@ -131,6 +134,7 @@ public abstract class TaskCompiler { } } } + return; } @@ -497,4 +501,5 @@ public abstract class TaskCompiler { clone.setMapJoinOps(pCtx.getMapJoinOps()); return clone; } + } http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 66a8322..cd0b588 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -72,6 +72,7 @@ import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc; import org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize; import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; +import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck; import org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider; import org.apache.hadoop.hive.ql.optimizer.physical.MemoryDecider; @@ -390,8 +391,9 @@ public class TezCompiler extends TaskCompiler { } // we need to clone some operator plans and remove union operators still + int indexForTezUnion = 0; for (BaseWork w: procCtx.workWithUnionOperators) { - GenTezUtils.removeUnionOperators(procCtx, w); + GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++); } // then we make sure the file sink operators are set up right @@ -489,7 +491,8 @@ public class TezCompiler extends TaskCompiler { LOG.debug("Skipping cross product analysis"); } - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) + && ctx.getExplainAnalyze() == null) { physicalCtx = new Vectorizer().resolve(physicalCtx); } else { LOG.debug("Skipping vectorization"); @@ -517,6 +520,11 @@ public class TezCompiler extends TaskCompiler { // the backend. If you have a physical optimization that changes // table scans or filters, you have to invoke it before this one. physicalCtx = new SerializeFilter().resolve(physicalCtx); + + if (physicalCtx.getContext().getExplainAnalyze() != null) { + new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan"); return; } http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 33fbffe..34d83ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -260,6 +260,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { // references. HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); rewrittenCtx = new Context(conf); + rewrittenCtx.setExplainConfig(ctx.getExplainConfig()); } catch (IOException e) { throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg()); } http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index 08278de..baf77c7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization; import org.apache.hadoop.hive.ql.optimizer.SparkRemoveDynamicPruningBySize; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; +import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; @@ -429,7 +430,8 @@ public class SparkCompiler extends TaskCompiler { LOG.debug("Skipping cross product analysis"); } - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) + && ctx.getExplainAnalyze() == null) { (new Vectorizer()).resolve(physicalCtx); } else { LOG.debug("Skipping vectorization"); @@ -443,6 +445,10 @@ public class SparkCompiler extends TaskCompiler { new CombineEquivalentWorkResolver().resolve(physicalCtx); + if (physicalCtx.getContext().getExplainAnalyze() != null) { + new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx); + } + PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE); return; } http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index adec5c7..e217bdf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -21,8 +21,10 @@ package org.apache.hadoop.hive.ql.plan; import java.util.Map; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.PTFUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; public class AbstractOperatorDesc implements OperatorDesc { @@ -31,6 +33,7 @@ public class AbstractOperatorDesc implements OperatorDesc { protected transient OpTraits opTraits; protected transient Map<String, String> opProps; protected long memNeeded = 0; + protected String runtimeStatsTmpDir; @Override @Explain(skipHeader = true, displayName = "Statistics") @@ -89,4 +92,13 @@ public class AbstractOperatorDesc implements OperatorDesc { public void setMemoryNeeded(long memNeeded) { this.memNeeded = memNeeded; } + + public String getRuntimeStatsTmpDir() { + return runtimeStatsTmpDir; + } + + public void setRuntimeStatsTmpDir(String runtimeStatsTmpDir) { + this.runtimeStatsTmpDir = runtimeStatsTmpDir; + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index a213c83..9f4767c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; import org.apache.hadoop.hive.ql.parse.ParseContext; /** @@ -42,15 +43,10 @@ public class ExplainWork implements Serializable { private HashSet<ReadEntity> inputs; private ParseContext pCtx; - boolean extended; - boolean formatted; - boolean dependency; - boolean logical; + private ExplainConfiguration config; boolean appendTaskType; - boolean authorize; - boolean userLevelExplain; String cboInfo; private transient BaseSemanticAnalyzer analyzer; @@ -63,12 +59,7 @@ public class ExplainWork implements Serializable { List<Task<? extends Serializable>> rootTasks, Task<? extends Serializable> fetchTask, BaseSemanticAnalyzer analyzer, - boolean extended, - boolean formatted, - boolean dependency, - boolean logical, - boolean authorize, - boolean userLevelExplain, + ExplainConfiguration config, String cboInfo) { this.resFile = resFile; this.rootTasks = new ArrayList<Task<? extends Serializable>>(rootTasks); @@ -77,14 +68,9 @@ public class ExplainWork implements Serializable { if (analyzer != null) { this.inputs = analyzer.getInputs(); } - this.extended = extended; - this.formatted = formatted; - this.dependency = dependency; - this.logical = logical; this.pCtx = pCtx; - this.authorize = authorize; - this.userLevelExplain = userLevelExplain; this.cboInfo = cboInfo; + this.config = config; } public Path getResFile() { @@ -120,27 +106,15 @@ public class ExplainWork implements Serializable { } public boolean getExtended() { - return extended; - } - - public void setExtended(boolean extended) { - this.extended = extended; + return config.isExtended(); } public boolean getDependency() { - return dependency; - } - - public void setDependency(boolean dependency) { - this.dependency = dependency; + return config.isDependency(); } public boolean isFormatted() { - return formatted; - } - - public void setFormatted(boolean formatted) { - this.formatted = formatted; + return config.isFormatted(); } public ParseContext getParseContext() { @@ -152,11 +126,7 @@ public class ExplainWork implements Serializable { } public boolean isLogical() { - return logical; - } - - public void setLogical(boolean logical) { - this.logical = logical; + return config.isLogical(); } public boolean isAppendTaskType() { @@ -168,11 +138,7 @@ public class ExplainWork implements Serializable { } public boolean isAuthorize() { - return authorize; - } - - public void setAuthorize(boolean authorize) { - this.authorize = authorize; + return config.isAuthorize(); } public BaseSemanticAnalyzer getAnalyzer() { @@ -180,11 +146,7 @@ public class ExplainWork implements Serializable { } public boolean isUserLevelExplain() { - return userLevelExplain; - } - - public void setUserLevelExplain(boolean userLevelExplain) { - this.userLevelExplain = userLevelExplain; + return config.isUserLevelExplain(); } public String getCboInfo() { @@ -195,4 +157,12 @@ public class ExplainWork implements Serializable { this.cboInfo = cboInfo; } + public ExplainConfiguration getConfig() { + return config; + } + + public void setConfig(ExplainConfiguration config) { + this.config = config; + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index ce0e0a8..07ed4fd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -61,7 +61,6 @@ public class FileSinkDesc extends AbstractOperatorDesc { private DynamicPartitionCtx dpCtx; private String staticSpec; // static partition spec ends with a '/' private boolean gatherStats; - private int indexInTezUnion = -1; // Consider a query like: // insert overwrite table T3 select ... from T1 join T2 on T1.key = T2.key; @@ -475,12 +474,4 @@ public class FileSinkDesc extends AbstractOperatorDesc { this.statsTmpDir = statsCollectionTempDir; } - public int getIndexInTezUnion() { - return indexInTezUnion; - } - - public void setIndexInTezUnion(int indexInTezUnion) { - this.indexInTezUnion = indexInTezUnion; - } - } http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java index a5527dc..20cd56f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -48,7 +49,12 @@ public class MergeJoinWork extends BaseWork { @Override public Set<Operator<?>> getAllRootOperators() { - return getMainWork().getAllRootOperators(); + Set<Operator<?>> set = new HashSet<>(); + set.addAll(getMainWork().getAllRootOperators()); + for (BaseWork w : mergeWorkList) { + set.addAll(w.getAllRootOperators()); + } + return set; } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java index 16be499..ad620c2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java @@ -30,4 +30,6 @@ public interface OperatorDesc extends Serializable, Cloneable { public Map<String, String> getOpProps(); public long getMemoryNeeded(); public void setMemoryNeeded(long memoryNeeded); + public String getRuntimeStatsTmpDir(); + public void setRuntimeStatsTmpDir(String runtimeStatsTmpDir); } http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index 029043f..c46ea70 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -40,18 +40,20 @@ public class Statistics implements Serializable { } private long numRows; + private long runTimeNumRows; private long dataSize; private State basicStatsState; private Map<String, ColStatistics> columnStats; private State columnStatsState; public Statistics() { - this(0, 0); + this(0, 0, -1); } - public Statistics(long nr, long ds) { + public Statistics(long nr, long ds, long rnr) { this.setNumRows(nr); this.setDataSize(ds); + this.setRunTimeNumRows(rnr); this.basicStatsState = State.NONE; this.columnStats = null; this.columnStatsState = State.NONE; @@ -107,6 +109,9 @@ public class Statistics implements Serializable { StringBuilder sb = new StringBuilder(); sb.append("Num rows: "); sb.append(numRows); + if (runTimeNumRows >= 0) { + sb.append("/" + runTimeNumRows); + } sb.append(" Data size: "); sb.append(dataSize); sb.append(" Basic stats: "); @@ -121,6 +126,9 @@ public class Statistics implements Serializable { StringBuilder sb = new StringBuilder(); sb.append("rows="); sb.append(numRows); + if (runTimeNumRows >= 0) { + sb.append("/" + runTimeNumRows); + } sb.append(" width="); // just to be safe about numRows if (numRows != 0) { @@ -148,7 +156,7 @@ public class Statistics implements Serializable { @Override public Statistics clone() throws CloneNotSupportedException { - Statistics clone = new Statistics(numRows, dataSize); + Statistics clone = new Statistics(numRows, dataSize, runTimeNumRows); clone.setBasicStatsState(basicStatsState); clone.setColumnStatsState(columnStatsState); if (columnStats != null) { @@ -263,4 +271,12 @@ public class Statistics implements Serializable { } return null; } + + public long getRunTimeNumRows() { + return runTimeNumRows; + } + + public void setRunTimeNumRows(long runTimeNumRows) { + this.runTimeNumRows = runTimeNumRows; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java index 990d80c..805bc5b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java @@ -27,6 +27,7 @@ import java.util.Map; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -132,6 +133,7 @@ public class TestExplainTask { pCtx.setTopOps(topOps); work.setParseContext(pCtx); ByteArrayOutputStream baos = new ByteArrayOutputStream(); + work.setConfig(new ExplainConfiguration()); new ExplainTask().getJSONLogicalPlan(new PrintStream(baos), work); baos.close(); return baos.toString(); http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java index ae1747d..d6fe540 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java @@ -297,8 +297,10 @@ public class TestUpdateDeleteSemanticAnalyzer { Path tmp = new Path(f.getPath()); fs.create(tmp); fs.deleteOnExit(tmp); + ExplainConfiguration config = new ExplainConfiguration(); + config.setExtended(true); ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(), - sem.getFetchTask(), sem, true, false, false, false, false, false, null); + sem.getFetchTask(), sem, config, null); ExplainTask task = new ExplainTask(); task.setWork(work); task.initialize(queryState, plan, null, null); http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/explainanalyze_1.q b/ql/src/test/queries/clientpositive/explainanalyze_1.q new file mode 100644 index 0000000..a4b3dc5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/explainanalyze_1.q @@ -0,0 +1,38 @@ +set hive.mapred.mode=nonstrict; + +explain analyze select * from src a union all select * from src b limit 10; + +explain analyze select key from src; + +explain analyze create table t as select key from src; + +create table t as select key from src; + +explain analyze insert overwrite table t select key from src; + +explain analyze select key from src limit 10; + +explain analyze select key from src where value < 10; + +explain analyze select key from src where key < 10; +select count(*) from (select key from src where key < 10)subq; + +explain analyze select key, count(key) from src group by key; +select count(*) from (select key, count(key) from src group by key)subq; + +explain analyze select count(*) from src a join src b on a.key = b.value where a.key > 0; + +explain analyze select count(*) from src a join src b on a.key = b.key where a.key > 0; +select count(*) from src a join src b on a.key = b.key where a.key > 0; + + +explain analyze select * from src a union all select * from src b; +select count(*) from (select * from src a union all select * from src b)subq; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +EXPLAIN analyze +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key); http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/explainanalyze_2.q b/ql/src/test/queries/clientpositive/explainanalyze_2.q new file mode 100644 index 0000000..dfee826 --- /dev/null +++ b/ql/src/test/queries/clientpositive/explainanalyze_2.q @@ -0,0 +1,329 @@ +set hive.explain.user=true; +set hive.metastore.aggregate.stats.cache.enabled=false; + +-- SORT_QUERY_RESULTS + +CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; + +CREATE TABLE ss(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE; + +CREATE TABLE sr(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE; + +CREATE TABLE cs(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE; + +INSERT OVERWRITE TABLE ss +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + +INSERT OVERWRITE TABLE sr +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=12); + +INSERT OVERWRITE TABLE cs +SELECT x.key,x.value,y.key,y.value,z.key,z.value +FROM src1 x +JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08'); + + +ANALYZE TABLE ss COMPUTE STATISTICS; +ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3; + +ANALYZE TABLE sr COMPUTE STATISTICS; +ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3; + +ANALYZE TABLE cs COMPUTE STATISTICS; +ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3; + +set hive.auto.convert.join=false; + +explain analyze +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + +explain analyze +select +ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3) +FROM +ss,sr,cs,src d1,src d2,src d3,src1,srcpart +where + ss.k1 = d1.key +and sr.k1 = d2.key +and cs.k1 = d3.key +and ss.k2 = sr.k2 +and ss.k3 = sr.k3 +and ss.v1 = src1.value +and ss.v2 = srcpart.value +and sr.v2 = cs.v2 +and sr.v3 = cs.v3 +and ss.v3='ssv3' +and sr.v1='srv1' +and src1.key = 'src1key' +and srcpart.key = 'srcpartkey' +and d1.value = 'd1value' +and d2.value in ('2000Q1','2000Q2','2000Q3') +and d3.value in ('2000Q1','2000Q2','2000Q3') +group by +ss.k1,sr.k2,cs.k3 +order by +ss.k1,sr.k2,cs.k3 +limit 100; + +explain analyze +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value); + +explain analyze +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value); + + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.stats.fetch.column.stats=false; + + +explain analyze +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11); + +explain analyze +select +ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3) +FROM +ss,sr,cs,src d1,src d2,src d3,src1,srcpart +where + ss.k1 = d1.key +and sr.k1 = d2.key +and cs.k1 = d3.key +and ss.k2 = sr.k2 +and ss.k3 = sr.k3 +and ss.v1 = src1.value +and ss.v2 = srcpart.value +and sr.v2 = cs.v2 +and sr.v3 = cs.v3 +and ss.v3='ssv3' +and sr.v1='srv1' +and src1.key = 'src1key' +and srcpart.key = 'srcpartkey' +and d1.value = 'd1value' +and d2.value in ('2000Q1','2000Q2','2000Q3') +and d3.value in ('2000Q1','2000Q2','2000Q3') +group by +ss.k1,sr.k2,cs.k3 +order by +ss.k1,sr.k2,cs.k3 +limit 100; + +explain analyze +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, z.value, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value); + +explain analyze +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value); + + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; + +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + + + +set hive.optimize.bucketingsorting=false; +insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; + +CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +CREATE TABLE tab2(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab2 partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +set hive.convert.join.bucket.mapjoin.tez = false; +set hive.auto.convert.sortmerge.join = true; + +set hive.auto.convert.join.noconditionaltask.size=500; + +explain analyze +select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key; + +explain analyze +select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value; + +explain analyze +select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key; + +explain analyze +select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key join tab2 s2 on s1.value=s2.value; + +explain analyze +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key); + +explain analyze +select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value +UNION ALL +select s2.key as key, s2.value as value from tab s2 +) a join tab_part b on (a.key = b.key); + +explain analyze +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union all select * from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value); + +explain analyze +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value); + +CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE; +CREATE TABLE b(key STRING, value STRING) STORED AS TEXTFILE; +CREATE TABLE c(key STRING, value STRING) STORED AS TEXTFILE; + +explain analyze +from +( +SELECT x.key, y.value +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union all select * from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +union all +SELECT x.key, y.value +FROM src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value) +) tmp +INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value; + +explain analyze +FROM +( +SELECT x.key as key, y.value as value from src1 x JOIN src y ON (x.key = y.key) +JOIN (select * from src1 union select * from src)z ON (x.value = z.value) +union +SELECT x.key as key, y.value as value from src x JOIN src y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value) +union +SELECT x.key as key, y.value as value from src1 x JOIN src1 y ON (x.key = y.key) +JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value) +) tmp +INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value +INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value; + + +CREATE TABLE DEST1(key STRING, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE; + +explain analyze +FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 + UNION DISTINCT + select s2.key as key, s2.value as value from src s2) unionsrc +INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key +INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value; + +explain analyze FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b (b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key; + +set hive.entity.capture.transform=true; + +explain analyze +SELECT +TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) +FROM src a join src b +on a.key = b.key; + +explain analyze +FROM ( + select key, value from ( + select 'tst1' as key, cast(count(1) as string) as value, 'tst1' as value2 from src s1 + UNION all + select s2.key as key, s2.value as value, 'tst1' as value2 from src s2) unionsub + UNION all + select key, value from src s0 + ) unionsrc +INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key +INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) +GROUP BY unionsrc.key, unionsrc.value; + +explain analyze +FROM ( + select 'tst1' as key, cast(count(1) as string) as value, 'tst1' as value2 from src s1 + UNION all + select s2.key as key, s2.value as value, 'tst1' as value2 from src s2 + ) unionsrc +INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key +INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) +GROUP BY unionsrc.key, unionsrc.value; http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/explainanalyze_3.q b/ql/src/test/queries/clientpositive/explainanalyze_3.q new file mode 100644 index 0000000..69f82e5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/explainanalyze_3.q @@ -0,0 +1,158 @@ +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider; +set hive.metastore.filter.hook=org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl; +set hive.mapred.mode=nonstrict; +set hive.explain.user=true; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=true; + +explain analyze select key, value +FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol; + +explain analyze show tables; + +explain analyze create database newDB location "/tmp/"; + +create database newDB location "/tmp/"; + +explain analyze describe database extended newDB; + +describe database extended newDB; + +explain analyze use newDB; + +use newDB; + +create table tab (name string); + +explain analyze alter table tab rename to newName; + +explain analyze drop table tab; + +drop table tab; + +explain analyze use default; + +use default; + +drop database newDB; + +explain analyze analyze table src compute statistics; + +explain analyze analyze table src compute statistics for columns; + +explain analyze +CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)); + +CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x)); + +explain analyze SELECT SIGMOID(2) FROM src LIMIT 1; +explain analyze DROP TEMPORARY MACRO SIGMOID; +DROP TEMPORARY MACRO SIGMOID; + +explain analyze create table src_autho_test as select * from src; +create table src_autho_test as select * from src; + +set hive.security.authorization.enabled=true; + +explain analyze grant select on table src_autho_test to user hive_test_user; +grant select on table src_autho_test to user hive_test_user; + +explain analyze show grant user hive_test_user on table src_autho_test; +explain analyze show grant user hive_test_user on table src_autho_test(key); + +select key from src_autho_test order by key limit 20; + +explain analyze revoke select on table src_autho_test from user hive_test_user; + +explain analyze grant select(key) on table src_autho_test to user hive_test_user; + +explain analyze revoke select(key) on table src_autho_test from user hive_test_user; + +explain analyze +create role sRc_roLE; + +create role sRc_roLE; + +explain analyze +grant role sRc_roLE to user hive_test_user; + +grant role sRc_roLE to user hive_test_user; + +explain analyze show role grant user hive_test_user; + +explain analyze drop role sRc_roLE; +drop role sRc_roLE; + +set hive.security.authorization.enabled=false; +drop table src_autho_test; + +explain analyze drop view v; + +explain analyze create view v as with cte as (select * from src order by key limit 5) +select * from cte; + +explain analyze with cte as (select * from src order by key limit 5) +select * from cte; + +create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc; + +load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=50000; +SET hive.optimize.index.filter=true; +set hive.merge.orcfile.stripe.level=false; +set hive.merge.tezfiles=false; +set hive.merge.mapfiles=false; +set hive.merge.mapredfiles=false; +set hive.compute.splits.in.am=true; +set tez.grouping.min-size=1000; +set tez.grouping.max-size=50000; + +set hive.merge.orcfile.stripe.level=true; +set hive.merge.tezfiles=true; +set hive.merge.mapfiles=true; +set hive.merge.mapredfiles=true; + +explain analyze insert overwrite table orc_merge5 select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13; + +drop table orc_merge5; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + + + +set hive.optimize.bucketingsorting=false; +insert overwrite table tab_part partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin_part; + +CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +insert overwrite table tab partition (ds='2008-04-08') +select key,value from srcbucket_mapjoin; + +set hive.convert.join.bucket.mapjoin.tez = true; +explain analyze +select a.key, a.value, b.value +from tab a join tab_part b on a.key = b.key; + + + http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/explainanalyze_4.q b/ql/src/test/queries/clientpositive/explainanalyze_4.q new file mode 100644 index 0000000..dad397b --- /dev/null +++ b/ql/src/test/queries/clientpositive/explainanalyze_4.q @@ -0,0 +1,103 @@ +set hive.mapred.mode=nonstrict; + +set hive.explain.user=true; +set hive.auto.convert.join=false; +set hive.optimize.dynamic.partition.hashjoin=false; + +-- First try with regular mergejoin +explain analyze +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +explain analyze +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +explain analyze +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; + +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; + +set hive.auto.convert.join=true; +set hive.optimize.dynamic.partition.hashjoin=true; +set hive.auto.convert.join.noconditionaltask.size=200000; +set hive.stats.fetch.column.stats=false; +set hive.exec.reducers.bytes.per.reducer=200000; + +-- Try with dynamically partitioned hashjoin +explain analyze +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +select + * +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +order by a.cint; + +explain analyze +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +select + count(*) +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null; + +explain analyze +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; + +select + a.csmallint, count(*) c1 +from alltypesorc a join alltypesorc b on a.cint = b.cint +where + a.cint between 1000000 and 3000000 and b.cbigint is not null +group by a.csmallint +order by c1; http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_5.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/explainanalyze_5.q b/ql/src/test/queries/clientpositive/explainanalyze_5.q new file mode 100644 index 0000000..bb23e45 --- /dev/null +++ b/ql/src/test/queries/clientpositive/explainanalyze_5.q @@ -0,0 +1,81 @@ +set hive.stats.column.autogather=true; + +explain analyze analyze table src compute statistics; + +explain analyze analyze table src compute statistics for columns; + +drop table src_multi2; + +create table src_multi2 like src; + +explain analyze insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key; + +select count(*) from (select * from src union select * from src1)subq; + +insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key; + +describe formatted src_multi2; + + +set hive.mapred.mode=nonstrict; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + + +-- SORT_QUERY_RESULTS + +create table acid_uami(i int, + de decimal(5,2), + vc varchar(128)) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); + +insert into table acid_uami values + (1, 109.23, 'mary had a little lamb'), + (6553, 923.19, 'its fleece was white as snow'); + +insert into table acid_uami values + (10, 119.23, 'and everywhere that mary went'), + (65530, 823.19, 'the lamb was sure to go'); + +select * from acid_uami order by de; + +explain analyze update acid_uami set de = 3.14 where de = 109.23 or de = 119.23; + +select * from acid_uami order by de; + +update acid_uami set de = 3.14 where de = 109.23 or de = 119.23; + +select * from acid_uami order by de; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; + +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/delete_orig_table; +dfs -copyFromLocal ../../data/files/alltypesorc ${system:test.tmp.dir}/delete_orig_table/00000_0; + +create table acid_dot( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc location '${system:test.tmp.dir}/delete_orig_table' TBLPROPERTIES ('transactional'='true'); + +select count(*) from acid_dot; + +explain analyze delete from acid_dot where cint < -1070551679; + +select count(*) from acid_dot; + +delete from acid_dot where cint < -1070551679; + +select count(*) from acid_dot; + +dfs -rmr ${system:test.tmp.dir}/delete_orig_table; http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/results/clientpositive/columnstats_partlvl.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out index f6f2bfa..7e2edd9 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -46,18 +46,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int) outputColumnNames: employeeid + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16) keys: 2000.0 (type: double) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 2000.0 (type: double) sort order: + Map-reduce partition columns: 2000.0 (type: double) + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator @@ -65,11 +69,14 @@ STAGE PLANS: keys: 2000.0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -98,20 +105,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: employeeid (type: int) outputColumnNames: employeeid + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16) keys: 2000.0 (type: double) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 2000.0 (type: double) null sort order: a sort order: + Map-reduce partition columns: 2000.0 (type: double) + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) auto parallelism: false @@ -173,14 +184,17 @@ STAGE PLANS: keys: 2000.0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -232,18 +246,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int) outputColumnNames: employeeid + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16) keys: 4000.0 (type: double) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 4000.0 (type: double) sort order: + Map-reduce partition columns: 4000.0 (type: double) + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator @@ -251,11 +269,14 @@ STAGE PLANS: keys: 4000.0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 4000.0 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -284,20 +305,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: employeeid (type: int) outputColumnNames: employeeid + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16) keys: 4000.0 (type: double) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 4000.0 (type: double) null sort order: a sort order: + Map-reduce partition columns: 4000.0 (type: double) + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) auto parallelism: false @@ -359,14 +384,17 @@ STAGE PLANS: keys: 4000.0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 4000.0 (type: double) outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -418,18 +446,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int), employeename (type: string) outputColumnNames: employeeid, employeename + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) keys: 2000.0 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 2000.0 (type: double) sort order: + Map-reduce partition columns: 2000.0 (type: double) + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator @@ -437,11 +469,14 @@ STAGE PLANS: keys: 2000.0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -498,18 +533,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeesalary (type: double), employeeid (type: int), employeename (type: string) outputColumnNames: employeesalary, employeeid, employeename + Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) keys: employeesalary (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator @@ -517,11 +556,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -580,23 +622,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int), employeename (type: string) outputColumnNames: employeeid, employeename + Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index 21089e1..47fffab 100644 --- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out +++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -84,18 +84,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: country (type: string), employeename (type: string), employeeid (type: int) outputColumnNames: country, employeename, employeeid + Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: compute_stats(employeename, 16), compute_stats(employeeid, 16) keys: 4000.0 (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 4000.0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: 4000.0 (type: double), _col1 (type: string) + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator @@ -103,11 +107,14 @@ STAGE PLANS: keys: 4000.0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 4000.0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -157,18 +164,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: country (type: string), employeeid (type: int) outputColumnNames: country, employeeid + Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16) keys: 2000.0 (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 2000.0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: 2000.0 (type: double), _col1 (type: string) + Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator @@ -176,11 +187,14 @@ STAGE PLANS: keys: 2000.0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -241,18 +255,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeesalary (type: double), country (type: string), employeeid (type: int) outputColumnNames: employeesalary, country, employeeid + Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16) keys: employeesalary (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) + Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator @@ -260,11 +278,14 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -322,18 +343,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part + Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: employeesalary (type: double), country (type: string), employeeid (type: int), employeename (type: string) outputColumnNames: employeesalary, country, employeeid, employeename + Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16) keys: employeesalary (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) + Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator @@ -341,11 +366,14 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/results/clientpositive/columnstats_quoting.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/columnstats_quoting.q.out index 288e61b..52e3538 100644 --- a/ql/src/test/results/clientpositive/columnstats_quoting.q.out +++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out @@ -24,23 +24,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: user_web_events + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: user id (type: bigint), user name (type: string) outputColumnNames: user id, user name + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: compute_stats(user id, 16), compute_stats(user name, 16) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -75,23 +81,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: user_web_events + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: user id (type: bigint) outputColumnNames: user id + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: compute_stats(user id, 16) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
