HIVE-12325 : Turn hive.map.groupby.sorted on by default (Chetna Chaudhari via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/58b85acc Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/58b85acc Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/58b85acc Branch: refs/heads/spark Commit: 58b85acca168fd179a0cd39fb735e21a361cb95d Parents: 678b77b Author: Chetna Chaudhari <[email protected]> Authored: Thu Nov 5 20:44:00 2015 -0800 Committer: Ashutosh Chauhan <[email protected]> Committed: Fri Nov 6 16:04:06 2015 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 6 +- .../hive/ql/optimizer/GroupByOptimizer.java | 8 -- .../queries/clientpositive/groupby_sort_8.q | 6 -- .../clientpositive/groupby_sort_test_1.q | 1 - .../clientpositive/auto_sortmerge_join_10.q.out | 100 +++++++------------ .../results/clientpositive/bucket_groupby.q.out | 46 +++------ .../results/clientpositive/groupby_sort_8.q.out | 64 ------------ .../clientpositive/groupby_sort_test_1.q.out | 87 ++++++++++------ .../spark/auto_sortmerge_join_10.q.out | 45 +++------ .../tez/auto_sortmerge_join_10.q.out | 71 ++++++------- 10 files changed, 155 insertions(+), 279 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 7272ea4..7a8517b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -862,14 +862,10 @@ public class HiveConf extends Configuration { HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, "Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" + "common group by keys, it will be optimized to generate single M/R job."), - HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false, + HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true, "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" + "is that it limits the number of mappers to the number of files."), - HIVE_MAP_GROUPBY_SORT_TESTMODE("hive.map.groupby.sorted.testmode", false, - "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + - "the group by in the mapper by using BucketizedHiveInputFormat. If the test mode is set, the plan\n" + - "is not converted, but a query property is set to denote the same."), HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false, "Whether to enable using Column Position Alias in Group By or Order By"), HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30, http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java index f758776..fe459f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java @@ -212,11 +212,7 @@ public class GroupByOptimizer implements Transform { convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth); } else if (optimizeDistincts && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { - // In test mode, dont change the query plan. However, setup a query property pGraphContext.getQueryProperties().setHasMapGroupBy(true); - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) { - return; - } ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator)groupByOp.getChildOperators().get(0); GroupByDesc childGroupByDesc = @@ -518,11 +514,7 @@ public class GroupByOptimizer implements Transform { // The operators specified by depth and removed from the tree. protected void convertGroupByMapSideSortedGroupBy( HiveConf conf, GroupByOperator groupByOp, int depth) { - // In test mode, dont change the query plan. However, setup a query property pGraphContext.getQueryProperties().setHasMapGroupBy(true); - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) { - return; - } if (removeChildren(groupByOp, depth)) { // Use bucketized hive input format - that makes sure that one mapper reads the entire file http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/queries/clientpositive/groupby_sort_8.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_8.q b/ql/src/test/queries/clientpositive/groupby_sort_8.q index f53295e..f0d3a59 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_8.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_8.q @@ -18,10 +18,4 @@ EXPLAIN select count(distinct key) from T1; select count(distinct key) from T1; -set hive.map.groupby.sorted.testmode=true; --- In testmode, the plan is not changed -EXPLAIN -select count(distinct key) from T1; -select count(distinct key) from T1; - DROP TABLE T1; http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/queries/clientpositive/groupby_sort_test_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_test_1.q b/ql/src/test/queries/clientpositive/groupby_sort_test_1.q index 4ec138e..70eef33 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_test_1.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_test_1.q @@ -2,7 +2,6 @@ set hive.enforce.bucketing = true; set hive.enforce.sorting = true; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; -set hive.map.groupby.sorted.testmode=true; CREATE TABLE T1(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out index e7f6de3..fb1e656 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out @@ -242,15 +242,19 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-6 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq1:a TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -259,43 +263,22 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - bucketGroup: true keys: key (type: int) - mode: hash + mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - subq2:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq2:a + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -306,31 +289,22 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/bucket_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucket_groupby.q.out b/ql/src/test/results/clientpositive/bucket_groupby.q.out index 1b48d3a..1ac5287 100644 --- a/ql/src/test/results/clientpositive/bucket_groupby.q.out +++ b/ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -1191,38 +1191,24 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - bucketGroup: true keys: _col0 (type: string), _col1 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/groupby_sort_8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_sort_8.q.out b/ql/src/test/results/clientpositive/groupby_sort_8.q.out index 5152385..5d8f513 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_8.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_8.q.out @@ -101,70 +101,6 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@ds=1 #### A masked pattern was here #### 5 -PREHOOK: query: -- In testmode, the plan is not changed -EXPLAIN -select count(distinct key) from T1 -PREHOOK: type: QUERY -POSTHOOK: query: -- In testmode, the plan is not changed -EXPLAIN -select count(distinct key) from T1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col0:0._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(distinct key) from T1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t1@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select count(distinct key) from T1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t1@ds=1 -#### A masked pattern was here #### -5 PREHOOK: query: DROP TABLE T1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@t1 http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out index 8c1765d..dfe0ff1 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out @@ -50,8 +50,13 @@ SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -67,34 +72,30 @@ STAGE PLANS: Group By Operator aggregations: count(1) keys: _col0 (type: string) - mode: hash + mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### Stage: Stage-0 Move Operator @@ -109,3 +110,33 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out index ee9f448..17d20cb 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out @@ -206,8 +206,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -220,43 +218,28 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - bucketGroup: true keys: key (type: int) - mode: hash + mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: a @@ -275,7 +258,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) input vertices: - 0 Reducer 2 + 0 Map 1 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -288,7 +271,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out index 0d22ea7..98e099c 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out @@ -245,8 +245,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -259,18 +259,34 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - bucketGroup: true keys: key (type: int) - mode: hash + mode: final outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Map 4 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 Map Operator Tree: TableScan alias: a @@ -291,37 +307,6 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
