HIVE-17308: Improvement in join cardinality estimation (Vineet Garg, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff30a1eb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff30a1eb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff30a1eb Branch: refs/heads/master Commit: ff30a1ebf6a470ce66d7bc7cb8ed452ac956b347 Parents: 2dd9844 Author: Vineet Garg <vg...@apache.com> Authored: Tue Aug 22 11:24:44 2017 -0700 Committer: Vineet Garg <vg...@apache.com> Committed: Tue Aug 22 11:24:44 2017 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../calcite/HiveConfPlannerContext.java | 34 ++ .../optimizer/calcite/HivePlannerContext.java | 7 +- .../calcite/stats/HiveRelMdSelectivity.java | 33 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 4 +- .../calcite/TestCBORuleFiredOnlyOnce.java | 2 +- .../clientpositive/annotate_stats_join.q.out | 32 +- .../clientpositive/join_alt_syntax.q.out | 272 +++++---- .../clientpositive/join_cond_pushdown_2.q.out | 112 ++-- .../clientpositive/join_cond_pushdown_4.q.out | 112 ++-- .../llap/correlationoptimizer1.q.out | 40 +- .../llap/dynamic_semijoin_reduction.q.out | 4 +- .../llap/dynamic_semijoin_user_level.q.out | 4 +- .../clientpositive/llap/explainuser_1.q.out | 10 +- .../clientpositive/llap/explainuser_2.q.out | 2 +- .../llap/join_max_hashtable.q.out | 12 +- .../results/clientpositive/llap/skewjoin.q.out | 2 +- .../clientpositive/llap/subquery_exists.q.out | 4 +- .../clientpositive/llap/subquery_in.q.out | 112 ++-- .../clientpositive/llap/subquery_multi.q.out | 82 +-- .../clientpositive/llap/subquery_notin.q.out | 86 +-- .../clientpositive/llap/subquery_scalar.q.out | 8 +- .../clientpositive/llap/subquery_select.q.out | 36 +- .../clientpositive/llap/subquery_views.q.out | 22 +- .../results/clientpositive/perf/query17.q.out | 236 ++++--- .../results/clientpositive/perf/query24.q.out | 124 ++-- .../results/clientpositive/perf/query25.q.out | 232 ++++--- .../results/clientpositive/perf/query29.q.out | 232 +++---- .../results/clientpositive/perf/query50.q.out | 70 +-- .../results/clientpositive/perf/query54.q.out | 394 ++++++------ .../results/clientpositive/perf/query64.q.out | 610 +++++++++---------- .../results/clientpositive/perf/query72.q.out | 250 ++++---- .../results/clientpositive/perf/query85.q.out | 136 ++--- .../spark/annotate_stats_join.q.out | 32 +- .../clientpositive/spark/join_alt_syntax.q.out | 220 +++---- .../spark/join_cond_pushdown_2.q.out | 92 +-- .../spark/join_cond_pushdown_4.q.out | 92 +-- .../spark/spark_explainuser_1.q.out | 10 +- 38 files changed, 1922 insertions(+), 1842 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index c3fc1a5..37eb8fe 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1751,7 +1751,7 @@ public class HiveConf extends Configuration { "uses column statistics to estimate the number of rows flowing out of it and hence the data size.\n" + "In the absence of column statistics, this factor determines the amount of rows that flows out\n" + "of JOIN operator."), - HIVE_STATS_CORRELATED_MULTI_KEY_JOINS("hive.stats.correlated.multi.key.joins", false, + HIVE_STATS_CORRELATED_MULTI_KEY_JOINS("hive.stats.correlated.multi.key.joins", true, "When estimating output rows for a join involving multiple columns, the default behavior assumes" + "the columns are independent. Setting this flag to true will cause the estimator to assume" + "the columns are correlated."), http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfPlannerContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfPlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfPlannerContext.java new file mode 100644 index 0000000..756b671 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfPlannerContext.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.rel.RelNode; + +import java.util.Set; + +public class HiveConfPlannerContext{ + + private boolean isCorrelatedColumns; + + + public HiveConfPlannerContext(boolean isCorrelatedColumns) { + this.isCorrelatedColumns = isCorrelatedColumns; + } + + public boolean getIsCorrelatedColumns() { return isCorrelatedColumns;} +} http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java index bdf9955..56e2f88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java @@ -31,10 +31,11 @@ public class HivePlannerContext implements Context { private HiveRulesRegistry registry; private CalciteConnectionConfig calciteConfig; private SubqueryConf subqueryConfig; + private HiveConfPlannerContext isCorrelatedColumns; public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, CalciteConnectionConfig calciteConfig, Set<RelNode> corrScalarRexSQWithAgg, - Set<RelNode> scalarAggNoGbyWindowing) { + Set<RelNode> scalarAggNoGbyWindowing, HiveConfPlannerContext isCorrelatedColumns) { this.algoConfig = algoConfig; this.registry = registry; this.calciteConfig = calciteConfig; @@ -42,6 +43,7 @@ public class HivePlannerContext implements Context { // this is computed in CalcitePlanner while planning and is later required by subuery remove rule // hence this is passed using HivePlannerContext this.subqueryConfig = new SubqueryConf(corrScalarRexSQWithAgg, scalarAggNoGbyWindowing); + this.isCorrelatedColumns = isCorrelatedColumns; } public <T> T unwrap(Class<T> clazz) { @@ -57,6 +59,9 @@ public class HivePlannerContext implements Context { if(clazz.isInstance(subqueryConfig)) { return clazz.cast(subqueryConfig); } + if(clazz.isInstance(isCorrelatedColumns)) { + return clazz.cast(isCorrelatedColumns); + } return null; } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index 046f51b..00996a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -37,6 +37,7 @@ import org.apache.calcite.util.Pair; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -121,23 +122,30 @@ public class HiveRelMdSelectivity extends RelMdSelectivity { // NDV of the join can not exceed the cardinality of cross join. List<JoinLeafPredicateInfo> peLst = jpi.getEquiJoinPredicateElements(); int noOfPE = peLst.size(); - double ndvCrossProduct = 1; + double ndvEstimate = 1; if (noOfPE > 0) { - ndvCrossProduct = exponentialBackoff(peLst, colStatMap); + boolean isCorrelatedColumns = j.getCluster().getPlanner().getContext(). + unwrap(HiveConfPlannerContext.class).getIsCorrelatedColumns(); + if (noOfPE > 1 && isCorrelatedColumns ){ + ndvEstimate = maxNdvForCorrelatedColumns(peLst, colStatMap); + } + else { + ndvEstimate = exponentialBackoff(peLst, colStatMap); + } if (j instanceof SemiJoin) { - ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()), - ndvCrossProduct); + ndvEstimate = Math.min(mq.getRowCount(j.getLeft()), + ndvEstimate); }else if (j instanceof HiveJoin){ - ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()) - * mq.getRowCount(j.getRight()), ndvCrossProduct); + ndvEstimate = Math.min(mq.getRowCount(j.getLeft()) + * mq.getRowCount(j.getRight()), ndvEstimate); } else { throw new RuntimeException("Unexpected Join type: " + j.getClass().getName()); } } // 4. Join Selectivity = 1/NDV - return (1 / ndvCrossProduct); + return (1 / ndvEstimate); } // 3.2 if conjunctive predicate elements are more than one, then walk @@ -185,6 +193,17 @@ public class HiveRelMdSelectivity extends RelMdSelectivity { return ndvCrossProduct; } + // max ndv across all column references from both sides of table + protected double maxNdvForCorrelatedColumns(List<JoinLeafPredicateInfo> peLst, + ImmutableMap<Integer, Double> colStatMap) { + int noOfPE = peLst.size(); + List<Double> ndvs = new ArrayList<Double>(noOfPE); + for (int i = 0; i < noOfPE; i++) { + ndvs.add(getMaxNDVForJoinSelectivity(peLst.get(i), colStatMap)); + } + return Collections.max(ndvs); + } + /* * a) Order predciates based on ndv in reverse order. b) ndvCrossProduct = * ndv(pe0) * ndv(pe1) ^(1/2) * ndv(pe2) ^(1/4) * ndv(pe3) ^(1/8) ... http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index d6695cc..c9cb298 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -148,6 +148,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; @@ -1334,8 +1335,9 @@ public class CalcitePlanner extends SemanticAnalyzer { CalciteConnectionProperty.MATERIALIZATIONS_ENABLED.camelName(), Boolean.FALSE.toString()); CalciteConnectionConfig calciteConfig = new CalciteConnectionConfigImpl(calciteConfigProperties); + boolean isCorrelatedColumns = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_CORRELATED_MULTI_KEY_JOINS); HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, - corrScalarRexSQWithAgg, scalarAggNoGbyNoWin); + corrScalarRexSQWithAgg, scalarAggNoGbyNoWin, new HiveConfPlannerContext(isCorrelatedColumns)); RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); final RexBuilder rexBuilder = cluster.getRexBuilder(); final RelOptCluster optCluster = RelOptCluster.create(planner, rexBuilder); http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java index 884e034..94d6693 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java @@ -62,7 +62,7 @@ public class TestCBORuleFiredOnlyOnce { // Create rules registry to not trigger a rule more than once HiveRulesRegistry registry = new HiveRulesRegistry(); HivePlannerContext context = new HivePlannerContext(null, registry, null, - null, null); + null, null, null); HepPlanner planner = new HepPlanner(programBuilder.build(), context); // Cluster http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/annotate_stats_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out index d24324a..a7e73a0 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -236,10 +236,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -302,10 +302,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -368,10 +368,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -622,10 +622,10 @@ STAGE PLANS: 1 _col1 (type: string), _col0 (type: int) 2 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -682,10 +682,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 5607 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 5607 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -753,10 +753,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -813,10 +813,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -873,10 +873,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 54 Data size: 1746 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 9506 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 54 Data size: 1746 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 9506 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/join_alt_syntax.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/join_alt_syntax.q.out index 1c08e6a..5f9a7ce 100644 --- a/ql/src/test/results/clientpositive/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/join_alt_syntax.q.out @@ -356,9 +356,9 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -375,32 +375,34 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: p2 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -415,11 +417,58 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -440,10 +489,10 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -451,53 +500,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) - TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 @@ -516,9 +518,9 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -535,32 +537,34 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: p2 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -575,11 +579,58 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -600,10 +651,10 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -611,53 +662,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) - TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out index 6b03800..374aefb 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out @@ -142,30 +142,30 @@ from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -177,18 +177,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -203,53 +203,58 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) + key expressions: _col9 (type: int), _col10 (type: string) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - alias: p3 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + 0 _col9 (type: int), _col10 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -273,15 +278,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out index 1408dad..0270744 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out @@ -146,30 +146,30 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -181,18 +181,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -207,53 +207,58 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) + key expressions: _col9 (type: int), _col10 (type: string) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan - alias: p3 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + 0 _col9 (type: int), _col10 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) TableScan alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -277,15 +282,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out index 1e4e084..b138a2d 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out @@ -1375,18 +1375,18 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 25 Data size: 2241 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7080 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1219 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3700 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 12 Data size: 1219 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3700 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -1396,10 +1396,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1219 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3700 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1219 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3700 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1506,18 +1506,18 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 25 Data size: 2241 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 7080 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1219 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3700 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 12 Data size: 1219 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3700 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -1527,10 +1527,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1219 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3700 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1219 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3700 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2796,18 +2796,18 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2817,11 +2817,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash @@ -2944,18 +2944,18 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 3440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -2965,11 +2965,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1) mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 478b082..3bd35bf 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -944,7 +944,7 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash @@ -1090,7 +1090,7 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out index a73faeb..3effe59 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out @@ -486,7 +486,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=1 width=8) + Merge Join Operator [MERGEJOIN_19] (rows=129 width=8) Conds:RS_6._col0, _col1=RS_7._col0, _col1(Inner) <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] @@ -551,7 +551,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_39] (rows=1 width=8) + Merge Join Operator [MERGEJOIN_39] (rows=129 width=8) Conds:RS_6._col0, _col1=RS_7._col0, _col1(Inner) <-Map 4 [SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/llap/explainuser_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index b36fe2a..22d8e95 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -1746,11 +1746,11 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_18] - Select Operator [SEL_17] (rows=250 width=178) + Select Operator [SEL_17] (rows=183 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=250 width=178) + Filter Operator [FIL_16] (rows=183 width=179) predicate:_col4 is null - Merge Join Operator [MERGEJOIN_21] (rows=250 width=178) + Merge Join Operator [MERGEJOIN_21] (rows=250 width=179) Conds:RS_13._col0, _col1=RS_14._col0, _col1(Left Outer),Output:["_col0","_col1","_col4"] <-Reducer 2 [ONE_TO_ONE_EDGE] llap FORWARD [RS_13] @@ -1821,7 +1821,7 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_11] - Merge Join Operator [MERGEJOIN_16] (rows=1 width=178) + Merge Join Operator [MERGEJOIN_16] (rows=134 width=178) Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_7] @@ -1871,7 +1871,7 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_11] - Merge Join Operator [MERGEJOIN_16] (rows=1 width=178) + Merge Join Operator [MERGEJOIN_16] (rows=134 width=178) Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_7] http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/llap/explainuser_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 4f1a4a1..4400aae 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -317,7 +317,7 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_48] (rows=5 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col11)","count(_col21)","count(_col3)"],keys:_col10, _col20, _col2 - Merge Join Operator [MERGEJOIN_97] (rows=940 width=534) + Merge Join Operator [MERGEJOIN_97] (rows=4704 width=534) Conds:RS_44._col1, _col3=RS_45._col15, _col17(Inner),Output:["_col2","_col3","_col10","_col11","_col20","_col21"] <-Reducer 10 [SIMPLE_EDGE] llap SHUFFLE [RS_45]