HIVE-18926: Imporve operator-tree matching (Zoltan Haindrich reviewed by Ashutosh Chauhan)
Signed-off-by: Zoltan Haindrich <k...@rxd.hu> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a2394c5b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a2394c5b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a2394c5b Branch: refs/heads/master Commit: a2394c5bf2e5b257a3a1b3e2bda4a25b92005315 Parents: 1e884cc Author: Zoltan Haindrich <k...@rxd.hu> Authored: Tue Mar 27 13:00:44 2018 +0200 Committer: Zoltan Haindrich <k...@rxd.hu> Committed: Tue Mar 27 13:00:44 2018 +0200 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../java/org/apache/hadoop/hive/ql/Context.java | 10 +- .../java/org/apache/hadoop/hive/ql/Driver.java | 29 +- .../java/org/apache/hadoop/hive/ql/IDriver.java | 2 + .../apache/hadoop/hive/ql/exec/ExplainTask.java | 3 +- .../hive/ql/optimizer/ConvertJoinMapJoin.java | 34 +- .../ql/optimizer/signature/OpSignature.java | 24 +- .../ql/optimizer/signature/OpTreeSignature.java | 33 +- .../hive/ql/optimizer/signature/Signature.java | 4 +- .../ql/optimizer/signature/SignatureUtils.java | 8 +- .../stats/annotation/StatsRulesProcFactory.java | 19 +- .../hive/ql/plan/CommonMergeJoinDesc.java | 2 - .../hadoop/hive/ql/plan/JoinCondDesc.java | 6 +- .../hive/ql/plan/LateralViewJoinDesc.java | 2 - .../apache/hadoop/hive/ql/plan/MapJoinDesc.java | 5 - .../hadoop/hive/ql/plan/ReduceSinkDesc.java | 7 +- .../hive/ql/plan/mapper/EmptyStatsSource.java | 10 + .../hive/ql/plan/mapper/GroupTransformer.java | 4 +- .../hadoop/hive/ql/plan/mapper/PlanMapper.java | 79 ++++- .../hive/ql/plan/mapper/PlanMapperProcess.java | 47 --- .../hive/ql/plan/mapper/RuntimeStatsSource.java | 29 -- .../plan/mapper/SimpleRuntimeStatsSource.java | 10 +- .../hadoop/hive/ql/plan/mapper/StatsSource.java | 7 + .../hadoop/hive/ql/reexec/ReExecDriver.java | 14 +- .../hadoop/hive/ql/reexec/ReOptimizePlugin.java | 2 +- .../signature/TestOperatorSignature.java | 9 +- .../ql/plan/mapping/TestCounterMapping.java | 8 +- .../hive/ql/plan/mapping/TestOperatorCmp.java | 43 ++- .../ql/plan/mapping/TestReOptimization.java | 6 +- .../queries/clientpositive/explain_outputs.q | 15 + .../beeline/explain_outputs.q.out | 332 ++++++++++++++++++ .../clientpositive/explain_outputs.q.out | 338 +++++++++++++++++++ .../llap/retry_failure_stat_changes.q.out | 8 +- .../service/cli/operation/SQLOperation.java | 38 +-- 34 files changed, 960 insertions(+), 228 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 0d6aaae..5985dcf 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -901,6 +901,7 @@ beeline.positive.include=create_merge_compressed.q,\ colstats_all_nulls.q,\ drop_with_concurrency.q,\ escape_comments.q,\ + explain_outputs.q,\ insert_overwrite_local_directory_1.q,\ mapjoin2.q,\ materialized_view_create_rewrite.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/Context.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 58fa5f2..9ca8b00 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -28,7 +28,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Random; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -61,7 +60,6 @@ import org.apache.hadoop.hive.ql.parse.QB; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource; import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; -import org.apache.hadoop.hive.ql.plan.mapper.RuntimeStatsSource; import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.wm.WmContext; @@ -161,7 +159,7 @@ public class Context { private boolean isExplainPlan = false; private PlanMapper planMapper = new PlanMapper(); - private RuntimeStatsSource runtimeStatsSource; + private StatsSource runtimeStatsSource; private int executionIndex; public void setOperation(Operation operation) { @@ -1049,14 +1047,10 @@ public class Context { return planMapper; } - public void setRuntimeStatsSource(RuntimeStatsSource runtimeStatsSource) { + public void setStatsSource(StatsSource runtimeStatsSource) { this.runtimeStatsSource = runtimeStatsSource; } - public Optional<RuntimeStatsSource> getRuntimeStatsSource() { - return Optional.ofNullable(runtimeStatsSource); - } - public StatsSource getStatsSource() { if (runtimeStatsSource != null) { return runtimeStatsSource; http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 667f436..ed3984e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -111,7 +111,7 @@ import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; -import org.apache.hadoop.hive.ql.plan.mapper.RuntimeStatsSource; +import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; @@ -194,7 +194,7 @@ public class Driver implements IDriver { // Transaction manager used for the query. This will be set at compile time based on // either initTxnMgr or from the SessionState, in that order. private HiveTxnManager queryTxnMgr; - private RuntimeStatsSource runtimeStatsSource; + private StatsSource statsSource; // Boolean to store information about whether valid txn list was generated // for current query. @@ -290,6 +290,10 @@ public class Driver implements IDriver { return schema; } + public Schema getExplainSchema() { + return new Schema(ExplainTask.getResultSchema(), null); + } + @Override public Context getContext() { return ctx; @@ -574,7 +578,7 @@ public class Driver implements IDriver { setTriggerContext(queryId); } - ctx.setRuntimeStatsSource(runtimeStatsSource); + ctx.setStatsSource(statsSource); ctx.setCmd(command); ctx.setHDFSCleanup(true); @@ -2690,8 +2694,23 @@ public class Driver implements IDriver { return hookRunner; } - public void setRuntimeStatsSource(RuntimeStatsSource runtimeStatsSource) { - this.runtimeStatsSource = runtimeStatsSource; + public void setStatsSource(StatsSource runtimeStatsSource) { + this.statsSource = runtimeStatsSource; } + @Override + public boolean hasResultSet() { + + // TODO explain should use a FetchTask for reading + for (Task<? extends Serializable> task : plan.getRootTasks()) { + if (task.getClass() == ExplainTask.class) { + return true; + } + } + if (plan.getFetchTask() != null && schema != null && schema.isSetFieldSchemas()) { + return true; + } else { + return false; + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/IDriver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/IDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/IDriver.java index 41737fb..bab5014 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/IDriver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/IDriver.java @@ -73,4 +73,6 @@ public interface IDriver extends CommandProcessor { Context getContext(); + boolean hasResultSet(); + } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 0a6e17a..14c6398 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -427,6 +427,7 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { Class itface = SessionState.get().getAuthorizerInterface(); Object authorizer = AuthorizationFactory.create(delegate, itface, new AuthorizationFactory.AuthorizationExceptionHandler() { + @Override public void exception(Exception exception) { exceptions.add(exception.getMessage()); } @@ -1225,7 +1226,7 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { return "EXPLAIN"; } - public List<FieldSchema> getResultSchema() { + public static List<FieldSchema> getResultSchema() { FieldSchema tmpFieldSchema = new FieldSchema(); List<FieldSchema> colList = new ArrayList<FieldSchema>(); http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index a235f3f..7121bce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -181,7 +181,7 @@ public class ConvertJoinMapJoin implements NodeProcessor { // map join operator by default has no bucket cols and num of reduce sinks // reduced by 1 mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks())); - mapJoinOp.setStatistics(joinOp.getStatistics()); + preserveOperatorInfos(mapJoinOp, joinOp, context); // propagate this change till the next RS for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) { setAllChildrenTraits(childOp, mapJoinOp.getOpTraits()); @@ -215,7 +215,9 @@ public class ConvertJoinMapJoin implements NodeProcessor { // Determine the size of small table inputs long totalSize = 0; for (int pos = 0; pos < joinOp.getParentOperators().size(); pos++) { - if (pos == mapJoinConversionPos) continue; + if (pos == mapJoinConversionPos) { + continue; + } Operator<? extends OperatorDesc> parentOp = joinOp.getParentOperators().get(pos); totalSize += parentOp.getStatistics().getDataSize(); } @@ -376,11 +378,12 @@ public class ConvertJoinMapJoin implements NodeProcessor { (CommonMergeJoinOperator) OperatorFactory.get(joinOp.getCompilationOpContext(), new CommonMergeJoinDesc(numBuckets, mapJoinConversionPos, mapJoinDesc), joinOp.getSchema()); + context.parseContext.getContext().getPlanMapper().link(joinOp, mergeJoinOp); int numReduceSinks = joinOp.getOpTraits().getNumReduceSinks(); OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numBuckets, joinOp.getOpTraits().getSortCols(), numReduceSinks); mergeJoinOp.setOpTraits(opTraits); - mergeJoinOp.setStatistics(joinOp.getStatistics()); + preserveOperatorInfos(mergeJoinOp, joinOp, context); for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) { int pos = parentOp.getChildOperators().indexOf(joinOp); @@ -500,7 +503,7 @@ public class ConvertJoinMapJoin implements NodeProcessor { opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks()); mapJoinOp.setOpTraits(opTraits); - mapJoinOp.setStatistics(joinOp.getStatistics()); + preserveOperatorInfos(mapJoinOp, joinOp, context); setNumberOfBucketsOnChildren(mapJoinOp); // Once the conversion is done, we can set the partitioner to bucket cols on the small table @@ -513,7 +516,9 @@ public class ConvertJoinMapJoin implements NodeProcessor { // use the positions to only pick the partitionCols which are required // on the small table side. for (Operator<?> op : mapJoinOp.getParentOperators()) { - if (!(op instanceof ReduceSinkOperator)) continue;; + if (!(op instanceof ReduceSinkOperator)) { + continue; + } ReduceSinkOperator rsOp = (ReduceSinkOperator) op; ArrayList<ExprNodeDesc> newPartitionCols = new ArrayList<>(); @@ -534,6 +539,19 @@ public class ConvertJoinMapJoin implements NodeProcessor { return true; } + + /** + * Preserves additional informations about the operator. + * + * When an operator is replaced by a new one; some of the information of the old have to be retained. + */ + private void preserveOperatorInfos(Operator<?> newOp, Operator<?> oldOp, OptimizeTezProcContext context) { + newOp.setStatistics(oldOp.getStatistics()); + // linking these two operator declares that they are representing the same thing + // currently important because statistincs are actually gather for newOp; but the lookup is done using oldOp + context.parseContext.getContext().getPlanMapper().link(oldOp, newOp); + } + /* * This method tries to convert a join to an SMB. This is done based on * traits. If the sorted by columns are the same as the join columns then, we @@ -1058,7 +1076,9 @@ public class ConvertJoinMapJoin implements NodeProcessor { break; } } - if (found) break; + if (found) { + break; + } } } } @@ -1171,7 +1191,7 @@ public class ConvertJoinMapJoin implements NodeProcessor { null, joinOp.getOpTraits().getNumReduceSinks()); mapJoinOp.setOpTraits(opTraits); - mapJoinOp.setStatistics(joinOp.getStatistics()); + preserveOperatorInfos(mapJoinOp, joinOp, context); // propagate this change till the next RS for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) { setAllChildrenTraits(childOp, mapJoinOp.getOpTraits()); http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpSignature.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpSignature.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpSignature.java index 90b2fd3..e87bbce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpSignature.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpSignature.java @@ -36,14 +36,10 @@ public class OpSignature { * Holds the signature of the operator; the keys are are the methods name marked by {@link Signature}. */ private Map<String, Object> sigMap; - // FIXME: this is currently retained... - // but later the signature should be able to serve the same comparision granulaty level as op.logicalEquals right now - private Operator<? extends OperatorDesc> op; private OpSignature(Operator<? extends OperatorDesc> op) { - this.op = op; sigMap = new HashMap<>(); - // FIXME: consider to operator info as well..not just conf? + // FIXME: consider other operator info as well..not just conf? SignatureUtils.write(sigMap, op.getConf()); } @@ -65,7 +61,7 @@ public class OpSignature { return true; } OpSignature o = (OpSignature) obj; - return op.logicalEquals(o.op); + return signatureCompare(o); } public boolean signatureCompare(OpSignature other) { @@ -91,4 +87,20 @@ public class OpSignature { } } + @Override + public String toString() { + return toString(""); + } + + public String toString(String pad) { + StringBuffer sb = new StringBuffer(); + for (Entry<String, Object> e : sigMap.entrySet()) { + sb.append(pad); + sb.append(e.getKey()); + sb.append(" = "); + sb.append(e.getValue()); + sb.append('\n'); + } + return sb.toString(); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpTreeSignature.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpTreeSignature.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpTreeSignature.java index c6d1a6a..c3dc848 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpTreeSignature.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/OpTreeSignature.java @@ -28,13 +28,11 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; * Operator tree signature. */ public class OpTreeSignature { - private Operator<?> op; private int hashCode; private OpSignature sig; private ArrayList<OpTreeSignature> parentSig; OpTreeSignature(Operator<?> op, OpTreeSignatureFactory osf) { - this.op = op; sig = OpSignature.of(op); parentSig = new ArrayList<>(); for (Operator<? extends OperatorDesc> parentOp : op.getParentOperators()) { @@ -65,26 +63,23 @@ public class OpTreeSignature { return true; } OpTreeSignature o = (OpTreeSignature) obj; - // TODO: this should be removed as soon as signatures are able to provide the same level of confidentiality as logicalEquals - return logicalEqualsTree(op, o.op); + + return sig.equals(o.sig) && parentSig.equals(o.parentSig); } - // XXX: this is ain't cheap! :) - private final boolean logicalEqualsTree(Operator<?> o1, Operator<?> o) { - if (!o1.logicalEquals(o)) { - return false; - } - if (o.getNumParent() != o1.getNumParent()) { - return false; - } - for (int i = 0; i < o1.getNumParent(); i++) { - Operator<? extends OperatorDesc> copL = o1.getParentOperators().get(i); - Operator<? extends OperatorDesc> copR = o.getParentOperators().get(i); - if (!copL.logicalEquals(copR)) { - return false; - } + @Override + public String toString() { + return toString(""); + } + + public String toString(String pad) { + StringBuffer sb = new StringBuffer(); + sb.append(pad + "hashcode:" + hashCode + "\n"); + sb.append(sig.toString(pad)); + for (OpTreeSignature p : parentSig) { + sb.append(p.toString(pad + " ")); } - return true; + return sb.toString(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/Signature.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/Signature.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/Signature.java index c228a8e..12dc004 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/Signature.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/Signature.java @@ -28,9 +28,7 @@ import java.lang.annotation.Target; * * The signature is used to enable logical level tree comparisions between operator trees. */ -@Target(value = ElementType.METHOD) +@Target(value = { ElementType.METHOD, ElementType.TYPE }) @Retention(RetentionPolicy.RUNTIME) public @interface Signature { - - } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/SignatureUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/SignatureUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/SignatureUtils.java index 2269322..4f3e338 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/SignatureUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/signature/SignatureUtils.java @@ -48,6 +48,8 @@ public class SignatureUtils { private List<Method> sigMethods; + private String classLabel; + public SignatureMapper(Class<?> o) { Method[] f = o.getMethods(); sigMethods = new ArrayList<>(); @@ -61,15 +63,17 @@ public class SignatureUtils { sigMethods.add(method); } } + + classLabel = o.getName(); } public void write(Map<String, Object> ret, Object o) { if (sigMethods.isEmpty()) { // by supplying using "o" this enforces identity/equls matching // which will most probably make the signature very unique - ret.put(o.getClass().getName(), o); + ret.put(classLabel, o); } else { - ret.put(o.getClass().getName(), "1"); + ret.put(classLabel, "1"); for (Method method : sigMethods) { try { Object res = method.invoke(o); http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 22b052c..345595b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -32,6 +32,7 @@ import java.util.Set; import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -53,6 +54,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature; import org.apache.hadoop.hive.ql.parse.ColumnStatsList; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -75,7 +77,8 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.Statistics.State; -import org.apache.hadoop.hive.ql.plan.mapper.RuntimeStatsSource; +import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; +import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; import org.apache.hadoop.hive.ql.stats.OperatorStats; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -2493,12 +2496,20 @@ public class StatsRulesProcFactory { private static Statistics applyRuntimeStats(Context context, Statistics stats, Operator<?> op) { - if (!context.getRuntimeStatsSource().isPresent()) { + if (!((HiveConf) context.getConf()).getBoolVar(ConfVars.HIVE_QUERY_REEXECUTION_ENABLED)) { return stats; } - RuntimeStatsSource rss = context.getRuntimeStatsSource().get(); - Optional<OperatorStats> os = rss.lookup(op); + PlanMapper pm = context.getPlanMapper(); + OpTreeSignature treeSig = pm.getSignatureOf(op); + pm.link(op, treeSig); + + StatsSource statsSource = context.getStatsSource(); + if (!statsSource.canProvideStatsFor(op.getClass())) { + return stats; + } + + Optional<OperatorStats> os = statsSource.lookup(treeSig); if (!os.isPresent()) { return stats; http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java index 5a81add..315bcdc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CommonMergeJoinDesc.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; -import org.apache.hadoop.hive.ql.optimizer.signature.Signature; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -40,7 +39,6 @@ public class CommonMergeJoinDesc extends MapJoinDesc implements Serializable { this.mapJoinConversionPos = mapJoinConversionPos; } - @Signature public int getNumBuckets() { return numBuckets; } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java index ea22131..3820d57 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java @@ -27,6 +27,7 @@ import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Objects; /** * Join conditions Descriptor implementation. @@ -201,5 +202,8 @@ public class JoinCondDesc implements Serializable { return true; } - // XXX: is hashCode missing here? + @Override + public int hashCode() { + return Objects.hashCode(type, left, right, preserved); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java index 85a4683..97aa8ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.plan; import java.util.ArrayList; import java.util.Objects; -import org.apache.hadoop.hive.ql.optimizer.signature.Signature; import org.apache.hadoop.hive.ql.plan.Explain.Level; @@ -50,7 +49,6 @@ public class LateralViewJoinDesc extends AbstractOperatorDesc { } @Explain(displayName = "outputColumnNames") - @Signature public ArrayList<String> getOutputInternalColNames() { return outputInternalColNames; } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index 91ea159..54b705d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -33,7 +33,6 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; -import org.apache.hadoop.hive.ql.optimizer.signature.Signature; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; @@ -140,7 +139,6 @@ public class MapJoinDesc extends JoinDesc implements Serializable { } @Explain(displayName = "input vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) - @Signature public Map<Integer, String> getParentToInput() { return parentToInput; } @@ -158,7 +156,6 @@ public class MapJoinDesc extends JoinDesc implements Serializable { } @Explain(displayName = "Estimated key counts", explainLevels = { Level.EXTENDED }) - @Signature public String getKeyCountsExplainDesc() { StringBuilder result = null; for (Map.Entry<Integer, Long> entry : parentKeyCounts.entrySet()) { @@ -253,7 +250,6 @@ public class MapJoinDesc extends JoinDesc implements Serializable { * @return the position of the big table not in memory */ @Explain(displayName = "Position of Big Table", explainLevels = { Level.EXTENDED }) - @Signature public int getPosBigTable() { return posBigTable; } @@ -344,7 +340,6 @@ public class MapJoinDesc extends JoinDesc implements Serializable { } @Explain(displayName = "BucketMapJoin", explainLevels = { Level.USER, Level.EXTENDED }, displayOnlyOnTrue = true) - @Signature public boolean isBucketMapJoin() { return isBucketMapJoin; } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index f2955af..aa3c72b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -203,11 +203,11 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { } @Explain(displayName = "key expressions") + @Signature public String getKeyColString() { return PlanUtils.getExprListString(keyCols); } - @Signature public java.util.ArrayList<ExprNodeDesc> getKeyCols() { return keyCols; } @@ -225,11 +225,11 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { } @Explain(displayName = "value expressions") + @Signature public String getValueColsString() { return PlanUtils.getExprListString(valueCols); } - @Signature public java.util.ArrayList<ExprNodeDesc> getValueCols() { return valueCols; } @@ -244,11 +244,11 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { } @Explain(displayName = "PartitionCols", explainLevels = { Level.USER }) + @Signature public String getUserLevelExplainParitionColsString() { return PlanUtils.getExprListString(partitionCols, true); } - @Signature public java.util.ArrayList<ExprNodeDesc> getPartitionCols() { return partitionCols; } @@ -444,7 +444,6 @@ public class ReduceSinkDesc extends AbstractOperatorDesc { return forwarding; } - @Signature @Explain(displayName = "auto parallelism", explainLevels = { Level.EXTENDED }) public final boolean isAutoParallel() { return (this.reduceTraits.contains(ReducerTraits.AUTOPARALLEL)); http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/EmptyStatsSource.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/EmptyStatsSource.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/EmptyStatsSource.java index 57762ed..72092ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/EmptyStatsSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/EmptyStatsSource.java @@ -18,6 +18,11 @@ package org.apache.hadoop.hive.ql.plan.mapper; +import java.util.Optional; + +import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature; +import org.apache.hadoop.hive.ql.stats.OperatorStats; + public class EmptyStatsSource implements StatsSource { @Override @@ -25,4 +30,9 @@ public class EmptyStatsSource implements StatsSource { return false; } + @Override + public Optional<OperatorStats> lookup(OpTreeSignature treeSig) { + return Optional.empty(); + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/GroupTransformer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/GroupTransformer.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/GroupTransformer.java index 7b9e99e..b6b77e4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/GroupTransformer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/GroupTransformer.java @@ -18,8 +18,8 @@ package org.apache.hadoop.hive.ql.plan.mapper; -import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.LinkGroup; +import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup; public interface GroupTransformer { - void map(LinkGroup group); + void map(EquivGroup group); } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapper.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapper.java index 36d7e58..a372804 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapper.java @@ -19,8 +19,10 @@ package org.apache.hadoop.hive.ql.plan.mapper; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.IdentityHashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -28,6 +30,9 @@ import java.util.NoSuchElementException; import java.util.Objects; import java.util.Set; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature; +import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignatureFactory; import com.google.common.annotations.VisibleForTesting; /** @@ -37,13 +42,28 @@ import com.google.common.annotations.VisibleForTesting; */ public class PlanMapper { - Set<LinkGroup> groups = new HashSet<>(); - private Map<Object, LinkGroup> objectMap = new HashMap<>(); - - public class LinkGroup { + Set<EquivGroup> groups = new HashSet<>(); + private Map<Object, EquivGroup> objectMap = new HashMap<>(); + + /** + * A set of objects which are representing the same thing. + * + * A Group may contain different kind of things which are connected by their purpose; + * For example currently a group may contain the following objects: + * <ul> + * <li> Operator(s) - which are doing the actual work; + * there might be more than one, since an optimization may replace an operator with a new one + * <li> Signature - to enable inter-plan look up of the same data + * <li> OperatorStats - collected runtime information + * <ul> + */ + public class EquivGroup { Set<Object> members = new HashSet<>(); public void add(Object o) { + if (members.contains(o)) { + return; + } members.add(o); objectMap.put(o, this); } @@ -60,34 +80,64 @@ public class PlanMapper { } } + /** + * States that the two objects are representing the same. + * + * For example if during an optimization Operator_A is replaced by a specialized Operator_A1; + * then those two can be linked. + */ public void link(Object o1, Object o2) { - LinkGroup g1 = objectMap.get(o1); - LinkGroup g2 = objectMap.get(o2); - if (g1 != null && g2 != null && g1 != g2) { + + Set<Object> keySet = Collections.newSetFromMap(new IdentityHashMap<Object, Boolean>()); + keySet.add(o1); + keySet.add(o2); + keySet.add(getKeyFor(o1)); + keySet.add(getKeyFor(o2)); + + Set<EquivGroup> mGroups = Collections.newSetFromMap(new IdentityHashMap<EquivGroup, Boolean>()); + + for (Object object : keySet) { + EquivGroup group = objectMap.get(object); + if (group != null) { + mGroups.add(group); + } + } + if (mGroups.size() > 1) { throw new RuntimeException("equivalence mapping violation"); } - LinkGroup targetGroup = (g1 != null) ? g1 : (g2 != null ? g2 : new LinkGroup()); + EquivGroup targetGroup = mGroups.isEmpty() ? new EquivGroup() : mGroups.iterator().next(); groups.add(targetGroup); targetGroup.add(o1); targetGroup.add(o2); + + } + + private OpTreeSignatureFactory signatureCache = OpTreeSignatureFactory.newCache(); + + private Object getKeyFor(Object o) { + if (o instanceof Operator) { + Operator operator = (Operator) o; + return signatureCache.getSignature(operator); + } + return o; } public <T> List<T> getAll(Class<T> clazz) { List<T> ret = new ArrayList<>(); - for (LinkGroup g : groups) { + for (EquivGroup g : groups) { ret.addAll(g.getAll(clazz)); } return ret; } public void runMapper(GroupTransformer mapper) { - for (LinkGroup equivGroup : groups) { + for (EquivGroup equivGroup : groups) { mapper.map(equivGroup); } } public <T> List<T> lookupAll(Class<T> clazz, Object key) { - LinkGroup group = objectMap.get(key); + EquivGroup group = objectMap.get(key); if (group == null) { throw new NoSuchElementException(Objects.toString(key)); } @@ -104,9 +154,14 @@ public class PlanMapper { } @VisibleForTesting - public Iterator<LinkGroup> iterateGroups() { + public Iterator<EquivGroup> iterateGroups() { return groups.iterator(); } + public OpTreeSignature getSignatureOf(Operator<?> op) { + OpTreeSignature sig = signatureCache.getSignature(op); + return sig; + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapperProcess.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapperProcess.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapperProcess.java deleted file mode 100644 index 424dd79..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapperProcess.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.plan.mapper; - -import java.util.List; - -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature; -import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignatureFactory; -import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.LinkGroup; - -public class PlanMapperProcess { - - private static class OpTreeSignatureMapper implements GroupTransformer { - - private OpTreeSignatureFactory cache = OpTreeSignatureFactory.newCache(); - - @Override - public void map(LinkGroup group) { - List<Operator> operators= group.getAll(Operator.class); - for (Operator op : operators) { - group.add(OpTreeSignature.of(op,cache)); - } - } - } - - public static void runPostProcess(PlanMapper planMapper) { - planMapper.runMapper(new OpTreeSignatureMapper()); - } - -} http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/RuntimeStatsSource.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/RuntimeStatsSource.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/RuntimeStatsSource.java deleted file mode 100644 index 21a0678..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/RuntimeStatsSource.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.plan.mapper; - -import java.util.Optional; - -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.stats.OperatorStats; - -public interface RuntimeStatsSource extends StatsSource { - public Optional<OperatorStats> lookup(Operator<?> tsop); - -} http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/SimpleRuntimeStatsSource.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/SimpleRuntimeStatsSource.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/SimpleRuntimeStatsSource.java index 6f340b8..b5a3c24 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/SimpleRuntimeStatsSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/SimpleRuntimeStatsSource.java @@ -23,24 +23,21 @@ import java.util.NoSuchElementException; import java.util.Optional; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature; import org.apache.hadoop.hive.ql.stats.OperatorStats; -public class SimpleRuntimeStatsSource implements RuntimeStatsSource { +public class SimpleRuntimeStatsSource implements StatsSource { private final PlanMapper pm; public SimpleRuntimeStatsSource(PlanMapper pm) { - PlanMapperProcess.runPostProcess(pm); this.pm = pm; } @Override - public Optional<OperatorStats> lookup(Operator<?> op) { + public Optional<OperatorStats> lookup(OpTreeSignature sig) { try { - OpTreeSignature sig = OpTreeSignature.of(op); List<OperatorStats> v = pm.lookupAll(OperatorStats.class, sig); if (v.size() > 0) { return Optional.of(v.get(0)); @@ -56,9 +53,6 @@ public class SimpleRuntimeStatsSource implements RuntimeStatsSource { if (Operator.class.isAssignableFrom(class1)) { return true; } - if (HiveFilter.class.isAssignableFrom(class1)) { - return true; - } return false; } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSource.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSource.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSource.java index a4cb6e9..df5aa0c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSource.java @@ -18,8 +18,15 @@ package org.apache.hadoop.hive.ql.plan.mapper; +import java.util.Optional; + +import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature; +import org.apache.hadoop.hive.ql.stats.OperatorStats; + public interface StatsSource { boolean canProvideStatsFor(Class<?> class1); + Optional<OperatorStats> lookup(OpTreeSignature treeSig); + } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecDriver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecDriver.java index 9303171..8a5595d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecDriver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecDriver.java @@ -42,7 +42,7 @@ import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; -import org.apache.hadoop.hive.ql.plan.mapper.SimpleRuntimeStatsSource; +import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -227,6 +227,9 @@ public class ReExecDriver implements IDriver { @Override public Schema getSchema() { + if(explainReOptimization) { + return coreDriver.getExplainSchema(); + } return coreDriver.getSchema(); } @@ -256,8 +259,13 @@ public class ReExecDriver implements IDriver { } @VisibleForTesting - public void setRuntimeStatsSource(SimpleRuntimeStatsSource statsSource) { - coreDriver.setRuntimeStatsSource(statsSource); + public void setStatsSource(StatsSource statsSource) { + coreDriver.setStatsSource(statsSource); + } + + @Override + public boolean hasResultSet() { + return explainReOptimization || coreDriver.hasResultSet(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReOptimizePlugin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReOptimizePlugin.java b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReOptimizePlugin.java index 7078587..f731315 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReOptimizePlugin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReOptimizePlugin.java @@ -91,7 +91,7 @@ public class ReOptimizePlugin implements IReExecutionPlugin { public void prepareToReExecute() { statsReaderHook.setCollectOnSuccess(true); PlanMapper pm = coreDriver.getContext().getPlanMapper(); - coreDriver.setRuntimeStatsSource(new SimpleRuntimeStatsSource(pm)); + coreDriver.setStatsSource(new SimpleRuntimeStatsSource(pm)); retryPossible = false; } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java index 8c899e7..0afc533 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/signature/TestOperatorSignature.java @@ -94,7 +94,7 @@ public class TestOperatorSignature { checkNotEquals(t1, t2); } - private void checkEquals(Operator<?> o1, Operator<?> o2) { + public static void checkEquals(Operator<?> o1, Operator<?> o2) { assertTrue(o1.logicalEquals(o2)); OpSignature s1 = OpSignature.of(o1); OpSignature s2 = OpSignature.of(o2); @@ -106,7 +106,7 @@ public class TestOperatorSignature { } - private void checkNotEquals(Operator<? extends OperatorDesc> o1, Operator<? extends OperatorDesc> o2) { + public static void checkNotEquals(Operator<? extends OperatorDesc> o1, Operator<? extends OperatorDesc> o2) { assertFalse(o1.logicalEquals(o2)); OpSignature s1 = OpSignature.of(o1); OpSignature s2 = OpSignature.of(o2); @@ -117,7 +117,7 @@ public class TestOperatorSignature { assertNotEquals(s1, s2); } - private void checkTreeEquals(Operator<?> o1, Operator<?> o2) { + public static void checkTreeEquals(Operator<?> o1, Operator<?> o2) { OpTreeSignature ts1 = OpTreeSignature.of(o1); OpTreeSignature ts2 = OpTreeSignature.of(o2); @@ -125,12 +125,13 @@ public class TestOperatorSignature { assertEquals(ts1, ts2); } - private void checkTreeNotEquals(Operator<? extends OperatorDesc> o1, Operator<? extends OperatorDesc> o2) { + public static void checkTreeNotEquals(Operator<? extends OperatorDesc> o1, Operator<? extends OperatorDesc> o2) { OpTreeSignature ts1 = OpTreeSignature.of(o1); OpTreeSignature ts2 = OpTreeSignature.of(o2); assertNotEquals(ts1.hashCode(), ts2.hashCode()); + ts1.equals(ts2); assertNotEquals(ts1, ts2); } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java index 9fe95e4..18aeb33 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java @@ -36,7 +36,7 @@ import org.apache.hadoop.hive.ql.parse.ParseException; import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; import org.apache.hadoop.hive.ql.plan.mapper.SimpleRuntimeStatsSource; import org.apache.hadoop.hive.ql.reexec.ReExecDriver; -import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.LinkGroup; +import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.OperatorStats; import org.apache.hadoop.hive.ql.stats.OperatorStatsReaderHook; @@ -129,7 +129,7 @@ public class TestCounterMapping { FilterOperator filter1 = filters1.get(0); driver = createDriver(); - ((ReExecDriver) driver).setRuntimeStatsSource(new SimpleRuntimeStatsSource(pm1)); + ((ReExecDriver) driver).setStatsSource(new SimpleRuntimeStatsSource(pm1)); PlanMapper pm2 = getMapperForQuery(driver, query); @@ -149,10 +149,10 @@ public class TestCounterMapping { PlanMapper pm0 = getMapperForQuery(driver, "select sum(tu.id_uv),sum(u) from tu join tv on (tu.id_uv = tv.id_uv) where u>1 and v>1"); - Iterator<LinkGroup> itG = pm0.iterateGroups(); + Iterator<EquivGroup> itG = pm0.iterateGroups(); int checkedOperators = 0; while (itG.hasNext()) { - LinkGroup g = itG.next(); + EquivGroup g = itG.next(); List<HiveFilter> hfs = g.getAll(HiveFilter.class); List<OperatorStats> oss = g.getAll(OperatorStats.class); List<FilterOperator> fos = g.getAll(FilterOperator.class); http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestOperatorCmp.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestOperatorCmp.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestOperatorCmp.java index cfb0ca3..7abc2e7 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestOperatorCmp.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestOperatorCmp.java @@ -27,12 +27,15 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.DriverFactory; import org.apache.hadoop.hive.ql.IDriver; +import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.optimizer.signature.TestOperatorSignature; import org.apache.hadoop.hive.ql.parse.ParseException; import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; -import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.LinkGroup; +import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.OperatorStatsReaderHook; import org.apache.hive.testutils.HiveTestEnvSetup; @@ -99,11 +102,15 @@ public class TestOperatorCmp { String query = "select u from tu where id_uv = 1 union all select v from tv where id_uv = 1"; PlanMapper pm = getMapperForQuery(driver, query); - Iterator<LinkGroup> itG = pm.iterateGroups(); + Iterator<EquivGroup> itG = pm.iterateGroups(); List<FilterOperator> fos = pm.getAll(FilterOperator.class); - assertEquals(2, fos.size()); + // the same operator is present 2 times + fos.sort(TestCounterMapping.OPERATOR_ID_COMPARATOR.reversed()); + assertEquals(4, fos.size()); - assertFalse("logicalEquals", compareOperators(fos.get(0), fos.get(1))); + assertTrue("logicalEquals", compareOperators(fos.get(0), fos.get(1))); + assertFalse("logicalEquals", compareOperators(fos.get(0), fos.get(2))); + assertTrue("logicalEquals", compareOperators(fos.get(2), fos.get(3))); } @@ -121,7 +128,7 @@ public class TestOperatorCmp { } @Test - public void testDifferentFiltersAreNotMatched1() throws ParseException { + public void testDifferentFiltersAreNotMatched() throws ParseException { IDriver driver = createDriver(); PlanMapper pm0 = getMapperForQuery(driver, "select u from tu where id_uv = 1 group by u"); PlanMapper pm1 = getMapperForQuery(driver, "select u from tu where id_uv = 2 group by u"); @@ -132,7 +139,7 @@ public class TestOperatorCmp { } @Test - public void testSameFiltersMatched1() throws ParseException, Exception { + public void testSameFiltersMatched() throws ParseException, Exception { IDriver driver = createDriver(); PlanMapper pm0 = getMapperForQuery(driver, "select u from tu where id_uv = 1 group by u"); PlanMapper pm1 = getMapperForQuery(driver, "select u from tu where id_uv = 1 group by u"); @@ -141,6 +148,18 @@ public class TestOperatorCmp { assertHelper(AssertHelperOp.SAME, pm0, pm1, TableScanOperator.class); } + @Test + public void testSameJoinMatched() throws ParseException, Exception { + IDriver driver = createDriver(); + PlanMapper pm0 = + getMapperForQuery(driver, "select u,v from tu,tv where tu.id_uv = tv.id_uv and u>1 and v<10 group by u,v"); + PlanMapper pm1 = + getMapperForQuery(driver, "select u,v from tu,tv where tu.id_uv = tv.id_uv and u>1 and v<10 group by u,v"); + + assertHelper(AssertHelperOp.SAME, pm0, pm1, CommonMergeJoinOperator.class); + assertHelper(AssertHelperOp.SAME, pm0, pm1, JoinOperator.class); + // assertHelper(AssertHelperOp.SAME, pm0, pm1, TableScanOperator.class); + } enum AssertHelperOp { SAME, NOT_SAME @@ -152,10 +171,16 @@ public class TestOperatorCmp { assertEquals(1, fos0.size()); assertEquals(1, fos1.size()); + T opL = fos0.get(0); + T opR = fos1.get(0); if (same == AssertHelperOp.SAME) { - assertTrue(clazz + " " + same, compareOperators(fos0.get(0), fos1.get(0))); + assertTrue(clazz + " " + same, compareOperators(opL, opR)); + TestOperatorSignature.checkEquals(opL, opR); + TestOperatorSignature.checkTreeEquals(opL, opR); } else { - assertFalse(clazz + " " + same, compareOperators(fos0.get(0), fos1.get(0))); + assertFalse(clazz + " " + same, compareOperators(opL, opR)); + TestOperatorSignature.checkNotEquals(opL, opR); + TestOperatorSignature.checkTreeNotEquals(opL, opR); } } @@ -181,6 +206,4 @@ public class TestOperatorCmp { IDriver driver = DriverFactory.newDriver(conf); return driver; } - - } http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestReOptimization.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestReOptimization.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestReOptimization.java index 6d7bb07..b726300 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestReOptimization.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestReOptimization.java @@ -30,7 +30,7 @@ import org.apache.hadoop.hive.ql.IDriver; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; -import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.LinkGroup; +import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.OperatorStats; @@ -101,10 +101,10 @@ public class TestReOptimization { String query = "select assert_true_oom(${hiveconf:zzz} > sum(u*v)) from tu join tv on (tu.id_uv=tv.id_uv) where u<10 and v>1"; PlanMapper pm = getMapperForQuery(driver, query); - Iterator<LinkGroup> itG = pm.iterateGroups(); + Iterator<EquivGroup> itG = pm.iterateGroups(); int checkedOperators = 0; while (itG.hasNext()) { - LinkGroup g = itG.next(); + EquivGroup g = itG.next(); List<FilterOperator> fos = g.getAll(FilterOperator.class); List<OperatorStats> oss = g.getAll(OperatorStats.class); // FIXME: oss seems to contain duplicates http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/test/queries/clientpositive/explain_outputs.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/explain_outputs.q b/ql/src/test/queries/clientpositive/explain_outputs.q new file mode 100644 index 0000000..d53b66e --- /dev/null +++ b/ql/src/test/queries/clientpositive/explain_outputs.q @@ -0,0 +1,15 @@ + +create table t1 (id int); +create table t2 (id int); + +insert into t1 values (1),(10); +insert into t2 values (1),(2),(3),(4),(5); + +explain +select sum(t1.id) from t1 join t2 on (t1.id=t2.id); + +explain analyze +select sum(t1.id) from t1 join t2 on (t1.id=t2.id); + +explain reoptimization +select sum(t1.id) from t1 join t2 on (t1.id=t2.id); http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/test/results/clientpositive/beeline/explain_outputs.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/beeline/explain_outputs.q.out b/ql/src/test/results/clientpositive/beeline/explain_outputs.q.out new file mode 100644 index 0000000..e771a9a --- /dev/null +++ b/ql/src/test/results/clientpositive/beeline/explain_outputs.q.out @@ -0,0 +1,332 @@ +PREHOOK: query: create table t1 (id int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (id int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (id int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (id int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into t1 values (1),(10) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t1 +POSTHOOK: query: insert into t1 values (1),(10) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.id SCRIPT [] +PREHOOK: query: insert into t2 values (1),(2),(3),(4),(5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 values (1),(2),(3),(4),(5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.id SCRIPT [] +PREHOOK: query: explain +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: t2 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 2/2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2/2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2/2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2/2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: t2 + Statistics: Num rows: 5/5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 5/5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5/5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5/5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5/1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain reoptimization +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain reoptimization +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain reoptimization +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +PREHOOK: type: QUERY +POSTHOOK: query: explain reoptimization +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: t2 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/a2394c5b/ql/src/test/results/clientpositive/explain_outputs.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/explain_outputs.q.out b/ql/src/test/results/clientpositive/explain_outputs.q.out new file mode 100644 index 0000000..6071fa7 --- /dev/null +++ b/ql/src/test/results/clientpositive/explain_outputs.q.out @@ -0,0 +1,338 @@ +PREHOOK: query: create table t1 (id int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (id int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (id int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (id int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into t1 values (1),(10) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t1 +POSTHOOK: query: insert into t1 values (1),(10) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.id SCRIPT [] +PREHOOK: query: insert into t2 values (1),(2),(3),(4),(5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 values (1),(2),(3),(4),(5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.id SCRIPT [] +PREHOOK: query: explain +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: t2 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain analyze +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 2/2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2/2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2/2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2/2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: t2 + Statistics: Num rows: 5/5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 5/5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5/5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5/5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5/1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain reoptimization +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: explain reoptimization +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain reoptimization +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +PREHOOK: type: QUERY +POSTHOOK: query: explain reoptimization +select sum(t1.id) from t1 join t2 on (t1.id=t2.id) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: t2 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +