HIVE-20090 : Extend creation of semijoin reduction filters to be able to discover new opportunities (Jesus Camacho Rodriguez via Deepak Jaiswal)
Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ab9e954d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ab9e954d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ab9e954d Branch: refs/heads/master-txnstats Commit: ab9e954d478ca0e117b04843ab645f2861e5c925 Parents: bf54424 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Wed Jul 4 14:05:00 2018 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Fri Jul 13 22:15:23 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 + .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/ql/parse/TezCompiler.java | 444 +++++++---- .../hive/ql/ppd/SyntheticJoinPredicate.java | 174 ++++- .../dynamic_semijoin_reduction_sw2.q | 59 ++ .../llap/dynamic_semijoin_reduction_sw2.q.out | 450 +++++++++++ .../clientpositive/llap/explainuser_1.q.out | 12 +- .../llap/tez_fixed_bucket_pruning.q.out | 8 +- .../clientpositive/perf/tez/query1.q.out | 76 +- .../clientpositive/perf/tez/query16.q.out | 118 +-- .../clientpositive/perf/tez/query17.q.out | 197 ++--- .../clientpositive/perf/tez/query18.q.out | 124 +-- .../clientpositive/perf/tez/query2.q.out | 116 +-- .../clientpositive/perf/tez/query23.q.out | 444 +++++------ .../clientpositive/perf/tez/query24.q.out | 252 +++--- .../clientpositive/perf/tez/query25.q.out | 188 ++--- .../clientpositive/perf/tez/query29.q.out | 148 ++-- .../clientpositive/perf/tez/query31.q.out | 322 ++++---- .../clientpositive/perf/tez/query32.q.out | 140 ++-- .../clientpositive/perf/tez/query39.q.out | 94 +-- .../clientpositive/perf/tez/query40.q.out | 92 +-- .../clientpositive/perf/tez/query54.q.out | 246 +++--- .../clientpositive/perf/tez/query59.q.out | 134 ++-- .../clientpositive/perf/tez/query64.q.out | 760 ++++++++++--------- .../clientpositive/perf/tez/query69.q.out | 144 ++-- .../clientpositive/perf/tez/query72.q.out | 178 ++--- .../clientpositive/perf/tez/query77.q.out | 248 +++--- .../clientpositive/perf/tez/query78.q.out | 136 ++-- .../clientpositive/perf/tez/query80.q.out | 336 ++++---- .../clientpositive/perf/tez/query91.q.out | 74 +- .../clientpositive/perf/tez/query92.q.out | 174 ++--- .../clientpositive/perf/tez/query94.q.out | 118 +-- .../clientpositive/perf/tez/query95.q.out | 241 +++--- .../spark_dynamic_partition_pruning_3.q.out | 3 +- 34 files changed, 3548 insertions(+), 2707 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 6ea68c3..41fae36 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3752,6 +3752,10 @@ public class HiveConf extends Configuration { "When dynamic pruning is enabled, joins on partition keys will be processed by sending\n" + "events from the processing vertices to the Tez application master. These events will be\n" + "used to prune unnecessary partitions."), + TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED("hive.tez.dynamic.partition.pruning.extended", true, + "Whether we should try to create additional opportunities for dynamic pruning, e.g., considering\n" + + "siblings that may not be created by normal dynamic pruning logic.\n" + + "Only works when dynamic pruning is enabled."), TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE("hive.tez.dynamic.partition.pruning.max.event.size", 1*1024*1024L, "Maximum size of events sent by processors in dynamic pruning. If this size is crossed no pruning will take place."), http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 4001b9f..d08528f 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -894,6 +894,7 @@ minillaplocal.query.files=\ unionDistinct_3.q,\ vectorized_join46.q,\ vectorized_multi_output_select.q,\ + dynamic_semijoin_reduction_sw2.q,\ partialdhj.q,\ stats_date.q,\ dst.q http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 119aa92..1b433c7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -169,44 +169,7 @@ public class TezCompiler extends TaskCompiler { } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run reduce sink after join algorithm selection"); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - runRemoveDynamicPruningOptimization(procCtx, inputs, outputs); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - markSemiJoinForDPP(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); - - // Removing semijoin optimization when it may not be beneficial - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - removeSemijoinOptimizationByBenefit(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // Remove any parallel edge between semijoin and mapjoin. - removeSemijoinsParallelToMapJoin(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // Remove semijoin optimization if it creates a cycle with mapside joins - removeSemiJoinCyclesDueToMapsideJoins(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // Remove semijoin optimization if SMB join is created. - removeSemijoinOptimizationFromSMBJoins(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // Remove bloomfilter if no stats generated - removeSemiJoinIfNoStats(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // after the stats phase we might have some cyclic dependencies that we need - // to take care of. - runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); + semijoinRemovalBasedTransformations(procCtx, inputs, outputs); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); if(procCtx.conf.getBoolVar(ConfVars.HIVE_SHARED_WORK_OPTIMIZATION)) { @@ -230,11 +193,6 @@ public class TezCompiler extends TaskCompiler { private void runCycleAnalysisForPartitionPruning(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException { - - if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) { - return; - } - boolean cycleFree = false; while (!cycleFree) { cycleFree = true; @@ -454,6 +412,80 @@ public class TezCompiler extends TaskCompiler { ogw.startWalking(topNodes, null); } + private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx, + Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException { + PerfLogger perfLogger = SessionState.getPerfLogger(); + + final boolean dynamicPartitionPruningEnabled = + procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING); + final boolean semiJoinReductionEnabled = dynamicPartitionPruningEnabled && + procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION); + final boolean extendedReductionEnabled = dynamicPartitionPruningEnabled && + procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (dynamicPartitionPruningEnabled) { + runRemoveDynamicPruningOptimization(procCtx, inputs, outputs); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size"); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled) { + markSemiJoinForDPP(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); + + // Removing semijoin optimization when it may not be beneficial + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled) { + removeSemijoinOptimizationByBenefit(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits"); + + // Remove any parallel edge between semijoin and mapjoin. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled) { + removeSemijoinsParallelToMapJoin(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove any parallel edge between semijoin and mapjoin"); + + // Remove semijoin optimization if it creates a cycle with mapside joins + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) { + removeSemiJoinCyclesDueToMapsideJoins(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join"); + + // Remove semijoin optimization if SMB join is created. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) { + removeSemijoinOptimizationFromSMBJoins(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed"); + + // Remove bloomfilter if no stats generated + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) { + removeSemiJoinIfNoStats(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); + + // after the stats phase we might have some cyclic dependencies that we need + // to take care of. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (dynamicPartitionPruningEnabled) { + runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); + + // remove redundant dpp and semijoins + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (extendedReductionEnabled) { + removeRedundantSemijoinAndDpp(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove redundant semijoin reduction"); + } + private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException { @@ -739,11 +771,6 @@ public class TezCompiler extends TaskCompiler { private static void removeSemijoinOptimizationFromSMBJoins( OptimizeTezProcContext procCtx) throws SemanticException { - if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || - procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) { - return; - } - Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + @@ -825,11 +852,6 @@ public class TezCompiler extends TaskCompiler { private static void removeSemiJoinCyclesDueToMapsideJoins( OptimizeTezProcContext procCtx) throws SemanticException { - if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || - procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) { - return; - } - Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" + @@ -914,99 +936,18 @@ public class TezCompiler extends TaskCompiler { } } - private static class SemiJoinRemovalIfNoStatsProc implements NodeProcessor { - - @Override - public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - assert nd instanceof ReduceSinkOperator; - ReduceSinkOperator rs = (ReduceSinkOperator) nd; - ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext; - SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs); - if (sjInfo == null) { - // nothing to do here. - return null; - } - - // This is a semijoin branch. The stack should look like, - // <Parent Ops>-SEL-GB1-RS1-GB2-RS2 - GroupByOperator gbOp = (GroupByOperator) (stack.get(stack.size() - 2)); - GroupByDesc gbDesc = gbOp.getConf(); - ArrayList<AggregationDesc> aggregationDescs = gbDesc.getAggregators(); - for (AggregationDesc agg : aggregationDescs) { - if (!"bloom_filter".equals(agg.getGenericUDAFName())) { - continue; - } - - GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator = - (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator(); - if (udafBloomFilterEvaluator.hasHintEntries()) - { - return null; // Created using hint, skip it - } - - long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries(); - if (expectedEntries == -1 || expectedEntries > - pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) { - if (sjInfo.getIsHint()) { - throw new SemanticException("Removing hinted semijoin due to lack to stats" + - " or exceeding max bloom filter entries"); - } - // Remove the semijoin optimization branch along with ALL the mappings - // The parent GB2 has all the branches. Collect them and remove them. - for (Node node : gbOp.getChildren()) { - ReduceSinkOperator rsFinal = (ReduceSinkOperator) node; - TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo(). - get(rsFinal).getTsOp(); - if (LOG.isDebugEnabled()) { - LOG.debug("expectedEntries=" + expectedEntries + ". " - + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. " - + "Removing semijoin " - + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); - } - GenTezUtils.removeBranch(rsFinal); - GenTezUtils.removeSemiJoinOperator(pCtx, rsFinal, ts); - } - return null; - } - } - - // At this point, hinted semijoin case has been handled already - // Check if big table is big enough that runtime filtering is - // worth it. - TableScanOperator ts = sjInfo.getTsOp(); - if (ts.getStatistics() != null) { - long numRows = ts.getStatistics().getNumRows(); - if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) { - if (sjInfo.getShouldRemove()) { - if (LOG.isDebugEnabled()) { - LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin " - + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); - } - GenTezUtils.removeBranch(rs); - GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); - } - } - } - return null; - } - } - private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) throws SemanticException { - if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { - // Not needed without semi-join reduction - return; - } - Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%"), - new SemiJoinRemovalIfNoStatsProc()); - Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); + new SemiJoinRemovalProc(true, false)); + SemiJoinRemovalContext ctx = + new SemiJoinRemovalContext(procCtx.parseContext); + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx); List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(procCtx.parseContext.getTopOps().values()); GraphWalker ogw = new PreOrderOnceWalker(disp); @@ -1077,6 +1018,218 @@ public class TezCompiler extends TaskCompiler { GraphWalker ogw = new PreOrderOnceWalker(disp); ogw.startWalking(topNodes, null); } + + private class SemiJoinRemovalProc implements NodeProcessor { + + private final boolean removeBasedOnStats; + private final boolean removeRedundant; + + private SemiJoinRemovalProc (boolean removeBasedOnStats, boolean removeRedundant) { + this.removeBasedOnStats = removeBasedOnStats; + this.removeRedundant = removeRedundant; + } + + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator rs = (ReduceSinkOperator) nd; + SemiJoinRemovalContext rCtx = (SemiJoinRemovalContext) procCtx; + ParseContext pCtx = rCtx.parseContext; + SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs); + if (sjInfo == null) { + // nothing to do here. + return null; + } + TableScanOperator targetTSOp = sjInfo.getTsOp(); + ExprNodeDesc targetColExpr = pCtx.getRsToRuntimeValuesInfoMap().get(rs).getTsColExpr(); + + // This is a semijoin branch. The stack should look like, + // <Parent Ops>-SEL-GB1-RS1-GB2-RS2 + GroupByOperator gbOp = (GroupByOperator) stack.get(stack.size() - 2); + GroupByDesc gbDesc = gbOp.getConf(); + ArrayList<AggregationDesc> aggregationDescs = gbDesc.getAggregators(); + for (AggregationDesc agg : aggregationDescs) { + if (!isBloomFilterAgg(agg)) { + continue; + } + + GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator = + (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator(); + if (udafBloomFilterEvaluator.hasHintEntries()) { + return null; // Created using hint, skip it + } + + if (removeBasedOnStats) { + long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries(); + if (expectedEntries == -1 || expectedEntries > + pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) { + if (sjInfo.getIsHint()) { + throw new SemanticException("Removing hinted semijoin due to lack to stats" + + " or exceeding max bloom filter entries"); + } + // Remove the semijoin optimization branch along with ALL the mappings + // The parent GB2 has all the branches. Collect them and remove them. + for (Node node : gbOp.getChildren()) { + ReduceSinkOperator rsFinal = (ReduceSinkOperator) node; + TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo(). + get(rsFinal).getTsOp(); + if (LOG.isDebugEnabled()) { + LOG.debug("expectedEntries=" + expectedEntries + ". " + + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. " + + "Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } + GenTezUtils.removeBranch(rsFinal); + GenTezUtils.removeSemiJoinOperator(pCtx, rsFinal, ts); + } + return null; + } + } + } + + if (removeBasedOnStats) { + // At this point, hinted semijoin case has been handled already + // Check if big table is big enough that runtime filtering is + // worth it. + TableScanOperator ts = sjInfo.getTsOp(); + if (ts.getStatistics() != null) { + long numRows = ts.getStatistics().getNumRows(); + if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) { + if (sjInfo.getShouldRemove()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); + } + } + } + } + + if (removeRedundant) { + // Look for RS ops above the current semijoin branch + Set<ReduceSinkOperator> rsOps = OperatorUtils.findOperators( + ((Operator<?>) stack.get(stack.size() - 5)).getParentOperators().get(0), + ReduceSinkOperator.class); + for (Operator<?> otherRSOp : rsOps) { + SemiJoinBranchInfo otherSjInfo = pCtx.getRsToSemiJoinBranchInfo().get(otherRSOp); + // First conjunct prevents SJ RS from removing itself + if (otherRSOp != rs && otherSjInfo != null && otherSjInfo.getTsOp() == targetTSOp) { + if (rCtx.opsToRemove.containsKey(otherRSOp)) { + // We found siblings, since we are removing the other operator, no need to remove this one + continue; + } + ExprNodeDesc otherColExpr = pCtx.getRsToRuntimeValuesInfoMap().get(otherRSOp).getTsColExpr(); + if (!otherColExpr.isSame(targetColExpr)) { + // Filter should be on the same column, otherwise we do not proceed + continue; + } + rCtx.opsToRemove.put(rs, targetTSOp); + break; + } + } + } + + return null; + } + } + + private static boolean isBloomFilterAgg(AggregationDesc agg) { + return "bloom_filter".equals(agg.getGenericUDAFName()); + } + + private static class DynamicPruningRemovalRedundantProc implements NodeProcessor { + + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + AppMasterEventOperator event = (AppMasterEventOperator) nd; + if (!(event.getConf() instanceof DynamicPruningEventDesc)) { + return null; + } + + SemiJoinRemovalContext rCtx = (SemiJoinRemovalContext) procCtx; + + DynamicPruningEventDesc desc = (DynamicPruningEventDesc) event.getConf(); + TableScanOperator targetTSOp = desc.getTableScan(); + String targetColumnName = desc.getTargetColumnName(); + + // Look for event ops above the current event op branch + Operator<?> op = event.getParentOperators().get(0); + while (op.getChildOperators().size() < 2) { + op = op.getParentOperators().get(0); + } + Set<AppMasterEventOperator> eventOps = OperatorUtils.findOperators( + op, AppMasterEventOperator.class); + for (AppMasterEventOperator otherEvent : eventOps) { + if (!(otherEvent.getConf() instanceof DynamicPruningEventDesc)) { + continue; + } + DynamicPruningEventDesc otherDesc = (DynamicPruningEventDesc) otherEvent.getConf(); + if (otherEvent != event && otherDesc.getTableScan() == targetTSOp && + otherDesc.getTargetColumnName().equals(targetColumnName)) { + if (rCtx.opsToRemove.containsKey(otherEvent)) { + // We found siblings, since we are removing the other operator, no need to remove this one + continue; + } + rCtx.opsToRemove.put(event, targetTSOp); + break; + } + } + + return null; + } + } + + private void removeRedundantSemijoinAndDpp(OptimizeTezProcContext procCtx) + throws SemanticException { + Map<Rule, NodeProcessor> opRules = new LinkedHashMap<>(); + opRules.put( + new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + + ReduceSinkOperator.getOperatorName() + "%" + + GroupByOperator.getOperatorName() + "%" + + ReduceSinkOperator.getOperatorName() + "%"), + new SemiJoinRemovalProc(false, true)); + opRules.put( + new RuleRegExp("R2", + AppMasterEventOperator.getOperatorName() + "%"), + new DynamicPruningRemovalRedundantProc()); + + // Gather + SemiJoinRemovalContext ctx = + new SemiJoinRemovalContext(procCtx.parseContext); + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx); + List<Node> topNodes = new ArrayList<Node>(); + topNodes.addAll(procCtx.parseContext.getTopOps().values()); + GraphWalker ogw = new PreOrderOnceWalker(disp); + ogw.startWalking(topNodes, null); + + // Remove + for (Map.Entry<Operator<?>, TableScanOperator> p : ctx.opsToRemove.entrySet()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Removing redundant " + OperatorUtils.getOpNamePretty(p.getKey()) + " - " + OperatorUtils.getOpNamePretty(p.getValue())); + } + GenTezUtils.removeBranch(p.getKey()); + if (p.getKey() instanceof AppMasterEventOperator) { + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (AppMasterEventOperator) p.getKey(), p.getValue()); + } else if (p.getKey() instanceof ReduceSinkOperator) { + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (ReduceSinkOperator) p.getKey(), p.getValue()); + } else { + throw new SemanticException("Unexpected error - type for branch could not be recognized"); + } + } + } + + private class SemiJoinRemovalContext implements NodeProcessorCtx { + private final ParseContext parseContext; + private final Map<Operator<?>, TableScanOperator> opsToRemove; + + private SemiJoinRemovalContext(final ParseContext parseContext) { + this.parseContext = parseContext; + this.opsToRemove = new HashMap<>(); + } + } private boolean findParallelSemiJoinBranch(Operator<?> mapjoin, TableScanOperator bigTableTS, ParseContext parseContext, @@ -1166,9 +1319,8 @@ public class TezCompiler extends TaskCompiler { */ private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx) throws SemanticException { - if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || - !procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN) || - procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) { + if(!procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN) || + procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) { // Not needed without semi-join reduction or mapjoins or when semijoins // are enabled for parallel mapjoins. return; @@ -1376,11 +1528,6 @@ public class TezCompiler extends TaskCompiler { private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) throws SemanticException { - if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { - // Not needed without semi-join reduction - return; - } - List<ReduceSinkOperator> semijoinRsToRemove = new ArrayList<ReduceSinkOperator>(); Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); double semijoinReductionThreshold = procCtx.conf.getFloatVar( @@ -1437,11 +1584,6 @@ public class TezCompiler extends TaskCompiler { private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) throws SemanticException { - if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { - // Not needed without semi-join reduction - return; - } - // Stores the Tablescan operators processed to avoid redoing them. Map<TableScanOperator, TableScanOperator> tsOps = new HashMap<>(); Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java index dec2d1e..1f533bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java @@ -26,6 +26,12 @@ import java.util.Map; import java.util.Set; import java.util.Stack; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -119,14 +125,20 @@ public class SyntheticJoinPredicate extends Transform { private static class SyntheticContext implements NodeProcessorCtx { ParseContext parseContext; + boolean extended; public SyntheticContext(ParseContext pCtx) { parseContext = pCtx; + extended = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED); } public ParseContext getParseContext() { return parseContext; } + + public boolean isExtended() { + return extended; + } } private static class JoinSynthetic implements NodeProcessor { @@ -134,6 +146,8 @@ public class SyntheticJoinPredicate extends Transform { public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + SyntheticContext sCtx = (SyntheticContext) procCtx; + @SuppressWarnings("unchecked") CommonJoinOperator<JoinDesc> join = (CommonJoinOperator<JoinDesc>) nd; @@ -161,9 +175,6 @@ public class SyntheticJoinPredicate extends Transform { continue; } - if (LOG.isDebugEnabled()) { - LOG.debug("Synthetic predicate: " + srcPos + " --> " + targetPos); - } ReduceSinkOperator target = (ReduceSinkOperator) parents.get(targetPos); List<ExprNodeDesc> sourceKeys = source.getConf().getKeyCols(); List<ExprNodeDesc> targetKeys = target.getConf().getKeyCols(); @@ -175,8 +186,10 @@ public class SyntheticJoinPredicate extends Transform { ExprNodeDesc syntheticExpr = null; for (int i = 0; i < sourceKeys.size(); ++i) { - List<ExprNodeDesc> inArgs = new ArrayList<ExprNodeDesc>(); - inArgs.add(sourceKeys.get(i)); + final ExprNodeDesc sourceKey = sourceKeys.get(i); + + List<ExprNodeDesc> inArgs = new ArrayList<>(); + inArgs.add(sourceKey); ExprNodeDynamicListDesc dynamicExpr = new ExprNodeDynamicListDesc(targetKeys.get(i).getTypeInfo(), target, i); @@ -186,17 +199,36 @@ public class SyntheticJoinPredicate extends Transform { ExprNodeDesc syntheticInExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("in") .getGenericUDF(), inArgs); + if (LOG.isDebugEnabled()) { + LOG.debug("Synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + syntheticInExpr + ")"); + } + List<ExprNodeDesc> andArgs = new ArrayList<>(); if (syntheticExpr != null) { - List<ExprNodeDesc> andArgs = new ArrayList<ExprNodeDesc>(); andArgs.add(syntheticExpr); - andArgs.add(syntheticInExpr); + } + andArgs.add(syntheticInExpr); + + if(sCtx.isExtended()) { + // Backtrack + List<ExprNodeDesc> newExprs = createDerivatives(target.getParentOperators().get(0), targetKeys.get(i), sourceKey); + if (!newExprs.isEmpty()) { + if (LOG.isDebugEnabled()) { + for (ExprNodeDesc expr : newExprs) { + LOG.debug("Additional synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + expr + ")"); + } + } + andArgs.addAll(newExprs); + } + } + if (andArgs.size() < 2) { + syntheticExpr = syntheticInExpr; + } else { + // Create AND expression syntheticExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and") .getGenericUDF(), andArgs); - } else { - syntheticExpr = syntheticInExpr; } } @@ -241,6 +273,129 @@ public class SyntheticJoinPredicate extends Transform { } return result; } + + private List<ExprNodeDesc> createDerivatives(final Operator<?> currentOp, + final ExprNodeDesc currentNode, final ExprNodeDesc sourceKey) throws SemanticException { + List<ExprNodeDesc> resultExprs = new ArrayList<>(); + return createDerivatives(resultExprs, currentOp, currentNode, sourceKey) ? resultExprs : new ArrayList<>(); + } + + private boolean createDerivatives(final List<ExprNodeDesc> resultExprs, final Operator<?> op, + final ExprNodeDesc currentNode, final ExprNodeDesc sourceKey) throws SemanticException { + // 1. Obtain join operator upstream + Operator<?> currentOp = op; + while (!(currentOp instanceof CommonJoinOperator)) { + if (currentOp.getParentOperators() == null || currentOp.getParentOperators().size() != 1) { + // Cannot backtrack + currentOp = null; + break; + } + if (!(currentOp instanceof FilterOperator) && + !(currentOp instanceof SelectOperator) && + !(currentOp instanceof ReduceSinkOperator) && + !(currentOp instanceof GroupByOperator)) { + // Operator not supported + currentOp = null; + break; + } + // Move the pointer + currentOp = currentOp.getParentOperators().get(0); + } + if (currentOp == null) { + // We did not find any join, we are done + return true; + } + CommonJoinOperator<JoinDesc> joinOp = (CommonJoinOperator) currentOp; + + // 2. Backtrack expression to join output + final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(currentNode, op, joinOp); + if (joinExprNode == null || !(joinExprNode instanceof ExprNodeColumnDesc)) { + // TODO: We can extend to other expression types + // We are done + return true; + } + final String columnRefJoinInput = ((ExprNodeColumnDesc)joinExprNode).getColumn(); + + // 3. Find input position in join for expression obtained + String columnOutputName = null; + for (Map.Entry<String, ExprNodeDesc> e : joinOp.getColumnExprMap().entrySet()) { + if (e.getValue() == joinExprNode) { + columnOutputName = e.getKey(); + break; + } + } + if (columnOutputName == null) { + // Maybe the join is pruning columns, though it should not. + // In any case, we are done + return true; + } + final int srcPos = joinOp.getConf().getReversedExprs().get(columnOutputName); + final int[][] targets = getTargets(joinOp); + final ReduceSinkOperator rsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(srcPos); + + // 4. Find expression in input RS operator. + final Operator<?> rsOpInput = rsOp.getParentOperators().get(0); + final ExprNodeDesc rsOpInputExprNode = rsOp.getColumnExprMap().get(columnRefJoinInput); + if (rsOpInputExprNode == null) { + // Unexpected, we just bail out and we do not infer additional predicates + return false; + } + int posInRSOpKeys = -1; + for (int i = 0; i < rsOp.getConf().getKeyCols().size(); i++) { + if (rsOpInputExprNode.isSame(rsOp.getConf().getKeyCols().get(i))) { + posInRSOpKeys = i; + break; + } + } + + // 5. If it is part of the key, we can create a new semijoin. + // In addition, we can do the same for siblings + if (posInRSOpKeys >= 0) { + // We pass the tests, we add it to the args for the AND expression + addParentReduceSink(resultExprs, rsOp, posInRSOpKeys, sourceKey); + for (int targetPos: targets[srcPos]) { + if (srcPos == targetPos) { + continue; + } + final ReduceSinkOperator otherRsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(targetPos); + final Operator<?> otherRsOpInput = otherRsOp.getParentOperators().get(0); + // We pass the tests, we add it to the args for the AND expression + addParentReduceSink(resultExprs, otherRsOp, posInRSOpKeys, sourceKey); + // We propagate to operator below + boolean success = createDerivatives( + resultExprs, otherRsOpInput, otherRsOp.getConf().getKeyCols().get(posInRSOpKeys), sourceKey); + if (!success) { + // Something went wrong, bail out + return false; + } + } + } + + // 6. Whether it was part of the key or of the value, if we reach here, we can at least + // continue propagating to operators below + boolean success = createDerivatives( + resultExprs, rsOpInput, rsOpInputExprNode, sourceKey); + if (!success) { + // Something went wrong, bail out + return false; + } + + // 7. We are done, success + return true; + } + + private void addParentReduceSink(final List<ExprNodeDesc> andArgs, final ReduceSinkOperator rsOp, + final int keyIndex, final ExprNodeDesc sourceKey) throws SemanticException { + ExprNodeDynamicListDesc dynamicExpr = + new ExprNodeDynamicListDesc(rsOp.getConf().getKeyCols().get(keyIndex).getTypeInfo(), rsOp, keyIndex); + // Create synthetic IN expression + List<ExprNodeDesc> inArgs = new ArrayList<>(); + inArgs.add(sourceKey); + inArgs.add(dynamicExpr); + ExprNodeDesc newNode = ExprNodeGenericFuncDesc.newInstance( + FunctionRegistry.getFunctionInfo("in").getGenericUDF(), inArgs); + andArgs.add(newNode); + } } private static class Vectors { @@ -285,4 +440,5 @@ public class SyntheticJoinPredicate extends Transform { } } } + } http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q new file mode 100644 index 0000000..910119d --- /dev/null +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q @@ -0,0 +1,59 @@ +--! qt:dataset:srcpart +--! qt:dataset:alltypesorc +set hive.compute.query.using.stats=false; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.tez.dynamic.partition.pruning=true; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; +set hive.stats.autogather=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; +set hive.stats.fetch.column.stats=true; +set hive.cbo.enable=false; +set hive.reorder.nway.joins=false; +set hive.merge.nway.joins=false; + +-- Create Tables +create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC; +create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC; +CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC; + +-- Add Partitions +alter table srcpart_date_n6 add partition (ds = "2008-04-08"); +alter table srcpart_date_n6 add partition (ds = "2008-04-09"); + +alter table srcpart_small_n2 add partition (ds1 = "2008-04-08"); +alter table srcpart_small_n2 add partition (ds1 = "2008-04-09"); + +-- Load +insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc; +insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"; +insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; +insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20; + +set hive.tez.dynamic.semijoin.reduction=false; + +analyze table alltypesorc_int_n0 compute statistics for columns; +analyze table srcpart_date_n6 compute statistics for columns; +analyze table srcpart_small_n2 compute statistics for columns; + +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN +SELECT count(*) + FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6` + JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2` + ON (srcpart_date_n6.key = srcpart_small_n2.key1) + JOIN ( + SELECT * + FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0` + JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2` + ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b + ON (srcpart_small_n2.key1 = b.cstring); + +drop table srcpart_date_n6; +drop table srcpart_small_n2; +drop table alltypesorc_int_n0; http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out new file mode 100644 index 0000000..883bdd7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out @@ -0,0 +1,450 @@ +PREHOOK: query: create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypesorc_int_n0 +POSTHOOK: query: create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypesorc_int_n0 +PREHOOK: query: create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_date_n6 +POSTHOOK: query: create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_date_n6 +PREHOOK: query: CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_small_n2 +POSTHOOK: query: CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_small_n2 +PREHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date_n6 +POSTHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date_n6 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +PREHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date_n6 +POSTHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date_n6 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +PREHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small_n2 +POSTHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small_n2 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08 +PREHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small_n2 +POSTHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small_n2 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +PREHOOK: query: insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc_int_n0 +POSTHOOK: query: insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc_int_n0 +POSTHOOK: Lineage: alltypesorc_int_n0.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypesorc_int_n0.cstring SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +POSTHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +POSTHOOK: Lineage: srcpart_small_n2 PARTITION(ds1=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_small_n2 PARTITION(ds1=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table alltypesorc_int_n0 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@alltypesorc_int_n0 +PREHOOK: Output: default@alltypesorc_int_n0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table alltypesorc_int_n0 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@alltypesorc_int_n0 +POSTHOOK: Output: default@alltypesorc_int_n0 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_date_n6 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@srcpart_date_n6 +PREHOOK: Input: default@srcpart_date_n6@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date_n6@ds=2008-04-09 +PREHOOK: Output: default@srcpart_date_n6 +PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_date_n6 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@srcpart_date_n6 +POSTHOOK: Input: default@srcpart_date_n6@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date_n6@ds=2008-04-09 +POSTHOOK: Output: default@srcpart_date_n6 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_small_n2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@srcpart_small_n2 +PREHOOK: Input: default@srcpart_small_n2@ds1=2008-04-08 +PREHOOK: Input: default@srcpart_small_n2@ds1=2008-04-09 +PREHOOK: Output: default@srcpart_small_n2 +PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08 +PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_small_n2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@srcpart_small_n2 +POSTHOOK: Input: default@srcpart_small_n2@ds1=2008-04-08 +POSTHOOK: Input: default@srcpart_small_n2@ds1=2008-04-09 +POSTHOOK: Output: default@srcpart_small_n2 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN +SELECT count(*) + FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6` + JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2` + ON (srcpart_date_n6.key = srcpart_small_n2.key1) + JOIN ( + SELECT * + FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0` + JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2` + ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b + ON (srcpart_small_n2.key1 = b.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(*) + FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6` + JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2` + ON (srcpart_date_n6.key = srcpart_small_n2.key1) + JOIN ( + SELECT * + FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0` + JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2` + ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b + ON (srcpart_small_n2.key1 = b.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 6 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Map 11 <- Reducer 10 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) + Reducer 10 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date_n6 + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and (key BETWEEN DynamicValue(RS_25_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_25_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key, DynamicValue(RS_25_alltypesorc_int_n0_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and (key BETWEEN DynamicValue(RS_25_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_25_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key, DynamicValue(RS_25_alltypesorc_int_n0_cstring_bloom_filter))) and key is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 11 + Map Operator Tree: + TableScan + alias: srcpart_small_n2 + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_12_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_12_alltypesorc_int_n0_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key1, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Filter Operator + predicate: ((key1 BETWEEN DynamicValue(RS_12_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_12_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_12_alltypesorc_int_n0_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key1, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and key1 is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: srcpart_small_n2 + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: alltypesorc_int_n0 + filterExpr: ((cint = 10) and cstring is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((cint = 10) and cstring is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col3 + Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col1 (type: string) + Statistics: Num rows: 1210 Data size: 105270 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=22) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=22) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table srcpart_date_n6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_date_n6 +PREHOOK: Output: default@srcpart_date_n6 +POSTHOOK: query: drop table srcpart_date_n6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_date_n6 +POSTHOOK: Output: default@srcpart_date_n6 +PREHOOK: query: drop table srcpart_small_n2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_small_n2 +PREHOOK: Output: default@srcpart_small_n2 +POSTHOOK: query: drop table srcpart_small_n2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_small_n2 +POSTHOOK: Output: default@srcpart_small_n2 +PREHOOK: query: drop table alltypesorc_int_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypesorc_int_n0 +PREHOOK: Output: default@alltypesorc_int_n0 +POSTHOOK: query: drop table alltypesorc_int_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypesorc_int_n0 +POSTHOOK: Output: default@alltypesorc_int_n0 http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/results/clientpositive/llap/explainuser_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index f87fe36..6a2ae62 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -5347,8 +5347,8 @@ Stage-0 Stage-1 Map 3 llap File Output Operator [FS_21] - Map Join Operator [MAPJOIN_67] (rows=2 width=404) - Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_66._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Map Join Operator [MAPJOIN_71] (rows=2 width=404) + Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_70._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 1 [BROADCAST_EDGE] llap BROADCAST [RS_16] PartitionCols:_col0 @@ -5367,7 +5367,7 @@ Stage-0 predicate:key is not null TableScan [TS_3] (rows=1 width=368) default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map Join Operator [MAPJOIN_66] (rows=1 width=404) + <-Map Join Operator [MAPJOIN_70] (rows=1 width=404) Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 4 [BROADCAST_EDGE] llap BROADCAST [RS_13] @@ -5408,8 +5408,8 @@ Stage-0 Stage-1 Map 3 llap File Output Operator [FS_21] - Map Join Operator [MAPJOIN_67] (rows=2 width=404) - Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_66._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Map Join Operator [MAPJOIN_71] (rows=2 width=404) + Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_70._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 1 [BROADCAST_EDGE] llap BROADCAST [RS_16] PartitionCols:_col0 @@ -5428,7 +5428,7 @@ Stage-0 predicate:key is not null TableScan [TS_3] (rows=1 width=368) default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map Join Operator [MAPJOIN_66] (rows=1 width=404) + <-Map Join Operator [MAPJOIN_70] (rows=1 width=404) Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 4 [BROADCAST_EDGE] llap BROADCAST [RS_13] http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index 6987a96..74fc2e8 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -424,7 +424,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@l3_monthly_dw_dimplan POSTHOOK: Output: default@l3_monthly_dw_dimplan #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN EXTENDED SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join @@ -873,7 +873,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1 @@ -915,7 +915,7 @@ POSTHOOK: Input: default@l3_monthly_dw_dimplan 7147200 NULL 27114 7147200 NULL 27114 7147200 NULL 27114 -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN EXTENDED SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join @@ -1365,7 +1365,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1 http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/results/clientpositive/perf/tez/query1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/query1.q.out index 579940c..58c422d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query1.q.out @@ -63,10 +63,10 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_159] - Limit [LIM_158] (rows=100 width=860) + File Output Operator [FS_161] + Limit [LIM_160] (rows=100 width=860) Number of rows:100 - Select Operator [SEL_157] (rows=32266667 width=860) + Select Operator [SEL_159] (rows=32266667 width=860) Output:["_col0"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_50] @@ -74,96 +74,96 @@ Stage-0 Output:["_col0"] Filter Operator [FIL_48] (rows=32266667 width=860) predicate:(_col2 > _col7) - Merge Join Operator [MERGEJOIN_132] (rows=96800003 width=860) - Conds:RS_45._col1=RS_156._col1(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_134] (rows=96800003 width=860) + Conds:RS_45._col1=RS_158._col1(Inner),Output:["_col2","_col6","_col7"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_130] (rows=88000001 width=860) - Conds:RS_42._col0=RS_151._col0(Inner),Output:["_col1","_col2","_col6"] + Merge Join Operator [MERGEJOIN_132] (rows=88000001 width=860) + Conds:RS_42._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col6"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] + SHUFFLE [RS_153] PartitionCols:_col0 - Select Operator [SEL_150] (rows=80000000 width=860) + Select Operator [SEL_152] (rows=80000000 width=860) Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=80000000 width=860) + Filter Operator [FIL_151] (rows=80000000 width=860) predicate:c_customer_sk is not null TableScan [TS_17] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_129] (rows=34842647 width=77) - Conds:RS_145._col1=RS_148._col0(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_131] (rows=34842647 width=77) + Conds:RS_147._col1=RS_150._col0(Inner),Output:["_col0","_col1","_col2"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_147] (rows=852 width=1910) + Select Operator [SEL_149] (rows=852 width=1910) Output:["_col0"] - Filter Operator [FIL_146] (rows=852 width=1910) + Filter Operator [FIL_148] (rows=852 width=1910) predicate:((s_state = 'NM') and s_store_sk is not null) TableScan [TS_14] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] + SHUFFLE [RS_147] PartitionCols:_col1 - Select Operator [SEL_144] (rows=31675133 width=77) + Select Operator [SEL_146] (rows=31675133 width=77) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_143] (rows=31675133 width=77) + Group By Operator [GBY_145] (rows=31675133 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_11] PartitionCols:_col0, _col1 Group By Operator [GBY_10] (rows=63350266 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_128] (rows=63350266 width=77) - Conds:RS_137._col0=RS_141._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_130] (rows=63350266 width=77) + Conds:RS_139._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] + SHUFFLE [RS_139] PartitionCols:_col0 - Select Operator [SEL_135] (rows=57591150 width=77) + Select Operator [SEL_137] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_133] (rows=57591150 width=77) + Filter Operator [FIL_135] (rows=57591150 width=77) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_store_sk is not null) TableScan [TS_0] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_140] (rows=36524 width=1119) + Select Operator [SEL_142] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_139] (rows=36524 width=1119) + Filter Operator [FIL_141] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_158] PartitionCols:_col1 - Select Operator [SEL_155] (rows=15837566 width=77) + Select Operator [SEL_157] (rows=15837566 width=77) Output:["_col0","_col1"] - Group By Operator [GBY_154] (rows=15837566 width=77) + Group By Operator [GBY_156] (rows=15837566 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Select Operator [SEL_153] (rows=31675133 width=77) + Select Operator [SEL_155] (rows=31675133 width=77) Output:["_col1","_col2"] - Group By Operator [GBY_152] (rows=31675133 width=77) + Group By Operator [GBY_154] (rows=31675133 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col0 Group By Operator [GBY_30] (rows=63350266 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_131] (rows=63350266 width=77) - Conds:RS_138._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_133] (rows=63350266 width=77) + Conds:RS_140._col0=RS_144._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_136] (rows=57591150 width=77) + Select Operator [SEL_138] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_134] (rows=57591150 width=77) + Filter Operator [FIL_136] (rows=57591150 width=77) predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) Please refer to the previous TableScan [TS_0] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_142] + SHUFFLE [RS_144] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_140] + Please refer to the previous Select Operator [SEL_142]