[30/55] [abbrv] hive git commit: HIVE-12325 : Turn hive.map.groupby.sorted on by default (Chetna Chaudhari via Ashutosh Chauhan)

xuefu Wed, 11 Nov 2015 19:59:45 -0800

HIVE-12325 : Turn hive.map.groupby.sorted on by default (Chetna Chaudhari via 
Ashutosh Chauhan)


Signed-off-by: Ashutosh Chauhan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/58b85acc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/58b85acc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/58b85acc

Branch: refs/heads/spark
Commit: 58b85acca168fd179a0cd39fb735e21a361cb95d
Parents: 678b77b
Author: Chetna Chaudhari <[email protected]>
Authored: Thu Nov 5 20:44:00 2015 -0800
Committer: Ashutosh Chauhan <[email protected]>
Committed: Fri Nov 6 16:04:06 2015 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   6 +-
 .../hive/ql/optimizer/GroupByOptimizer.java     |   8 --
 .../queries/clientpositive/groupby_sort_8.q     |   6 --
 .../clientpositive/groupby_sort_test_1.q        |   1 -
 .../clientpositive/auto_sortmerge_join_10.q.out | 100 +++++++------------
 .../results/clientpositive/bucket_groupby.q.out |  46 +++------
 .../results/clientpositive/groupby_sort_8.q.out |  64 ------------
 .../clientpositive/groupby_sort_test_1.q.out    |  87 ++++++++++------
 .../spark/auto_sortmerge_join_10.q.out          |  45 +++------
 .../tez/auto_sortmerge_join_10.q.out            |  71 ++++++-------
 10 files changed, 155 insertions(+), 279 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 7272ea4..7a8517b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -862,14 +862,10 @@ public class HiveConf extends Configuration {
     HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true,
         "Whether to optimize multi group by query to generate single M/R  job 
plan. If the multi group by query has \n" +
         "common group by keys, it will be optimized to generate single M/R 
job."),
-    HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false,
+    HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true,
         "If the bucketing/sorting properties of the table exactly match the 
grouping key, whether to perform \n" +
         "the group by in the mapper by using BucketizedHiveInputFormat. The 
only downside to this\n" +
         "is that it limits the number of mappers to the number of files."),
-    HIVE_MAP_GROUPBY_SORT_TESTMODE("hive.map.groupby.sorted.testmode", false,
-        "If the bucketing/sorting properties of the table exactly match the 
grouping key, whether to perform \n" +
-        "the group by in the mapper by using BucketizedHiveInputFormat. If the 
test mode is set, the plan\n" +
-        "is not converted, but a query property is set to denote the same."),
     HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", 
false,
         "Whether to enable using Column Position Alias in Group By or Order 
By"),
     
HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 
30,

http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
index f758776..fe459f7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
@@ -212,11 +212,7 @@ public class GroupByOptimizer implements Transform {
         convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth);
       }
       else if (optimizeDistincts && !HiveConf.getBoolVar(hiveConf, 
HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
-        // In test mode, dont change the query plan. However, setup a query 
property
         pGraphContext.getQueryProperties().setHasMapGroupBy(true);
-        if (HiveConf.getBoolVar(hiveConf, 
HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
-          return;
-        }
         ReduceSinkOperator reduceSinkOp =
             (ReduceSinkOperator)groupByOp.getChildOperators().get(0);
         GroupByDesc childGroupByDesc =
@@ -518,11 +514,7 @@ public class GroupByOptimizer implements Transform {
     // The operators specified by depth and removed from the tree.
     protected void convertGroupByMapSideSortedGroupBy(
         HiveConf conf, GroupByOperator groupByOp, int depth) {
-      // In test mode, dont change the query plan. However, setup a query 
property
       pGraphContext.getQueryProperties().setHasMapGroupBy(true);
-      if (HiveConf.getBoolVar(conf, 
HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
-        return;
-      }
 
       if (removeChildren(groupByOp, depth)) {
         // Use bucketized hive input format - that makes sure that one mapper 
reads the entire file

http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/queries/clientpositive/groupby_sort_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_8.q 
b/ql/src/test/queries/clientpositive/groupby_sort_8.q
index f53295e..f0d3a59 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_8.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_8.q
@@ -18,10 +18,4 @@ EXPLAIN
 select count(distinct key) from T1;
 select count(distinct key) from T1;
 
-set hive.map.groupby.sorted.testmode=true;
--- In testmode, the plan is not changed
-EXPLAIN
-select count(distinct key) from T1;
-select count(distinct key) from T1;
-
 DROP TABLE T1;

http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_test_1.q 
b/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
index 4ec138e..70eef33 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_test_1.q
@@ -2,7 +2,6 @@ set hive.enforce.bucketing = true;
 set hive.enforce.sorting = true;
 set hive.exec.reducers.max = 10;
 set hive.map.groupby.sorted=true;
-set hive.map.groupby.sorted.testmode=true;
 
 CREATE TABLE T1(key STRING, val STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;

http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out 
b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
index e7f6de3..fb1e656 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
@@ -242,15 +242,19 @@ select count(*) from
   on subq1.key = subq2.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-6 depends on stages: Stage-1
-  Stage-3 depends on stages: Stage-6
-  Stage-0 depends on stages: Stage-3
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        subq1:a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        subq1:a 
           TableScan
             alias: a
             Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
@@ -259,43 +263,22 @@ STAGE PLANS:
               Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: count()
-                bucketGroup: true
                 keys: key (type: int)
-                mode: hash
+                mode: final
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col1 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          keys: KEY._col0 (type: int)
-          mode: mergepartial
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column 
stats: NONE
-          Select Operator
-            expressions: _col0 (type: int)
-            outputColumnNames: _col0
-            Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column 
stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE 
Column stats: NONE
+                  HashTable Sink Operator
+                    keys:
+                      0 _col0 (type: int)
+                      1 _col0 (type: int)
 
-  Stage: Stage-6
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        subq2:a 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        subq2:a 
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: a
             Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
@@ -306,31 +289,22 @@ STAGE PLANS:
                 expressions: key (type: int)
                 outputColumnNames: _col0
                 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE 
Column stats: NONE
-                HashTable Sink Operator
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
                   keys:
                     0 _col0 (type: int)
                     1 _col0 (type: int)
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Map Join Operator
-              condition map:
-                   Inner Join 0 to 1
-              keys:
-                0 _col0 (type: int)
-                1 _col0 (type: int)
-              Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE 
Column stats: NONE
-              Group By Operator
-                aggregations: count()
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col0 (type: bigint)
+                  Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    aggregations: count()
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: bigint)
       Local Work:
         Map Reduce Local Work
       Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/bucket_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucket_groupby.q.out 
b/ql/src/test/results/clientpositive/bucket_groupby.q.out
index 1b48d3a..1ac5287 100644
--- a/ql/src/test/results/clientpositive/bucket_groupby.q.out
+++ b/ql/src/test/results/clientpositive/bucket_groupby.q.out
@@ -1191,38 +1191,24 @@ STAGE PLANS:
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
                 aggregations: count(1)
-                bucketGroup: true
                 keys: _col0 (type: string), _col1 (type: string)
-                mode: hash
+                mode: final
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col2 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          keys: KEY._col0 (type: string), KEY._col1 (type: string)
-          mode: mergepartial
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-          Select Operator
-            expressions: _col0 (type: string), _col2 (type: bigint)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-            Limit
-              Number of rows: 10
-              Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: bigint)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                  Limit
+                    Number of rows: 10
+                    Statistics: Num rows: 10 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 10 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/groupby_sort_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_8.q.out 
b/ql/src/test/results/clientpositive/groupby_sort_8.q.out
index 5152385..5d8f513 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_8.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_8.q.out
@@ -101,70 +101,6 @@ POSTHOOK: Input: default@t1
 POSTHOOK: Input: default@t1@ds=1
 #### A masked pattern was here ####
 5
-PREHOOK: query: -- In testmode, the plan is not changed
-EXPLAIN
-select count(distinct key) from T1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- In testmode, the plan is not changed
-EXPLAIN
-select count(distinct key) from T1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: t1
-            Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column 
stats: NONE
-            Select Operator
-              expressions: key (type: string)
-              outputColumnNames: key
-              Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
-              Group By Operator
-                aggregations: count(DISTINCT key)
-                keys: key (type: string)
-                mode: hash
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(DISTINCT KEY._col0:0._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.TextInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(distinct key) from T1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-PREHOOK: Input: default@t1@ds=1
-#### A masked pattern was here ####
-POSTHOOK: query: select count(distinct key) from T1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-POSTHOOK: Input: default@t1@ds=1
-#### A masked pattern was here ####
-5
 PREHOOK: query: DROP TABLE T1
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@t1

http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out 
b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
index 8c1765d..dfe0ff1 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out
@@ -50,8 +50,13 @@ SELECT key, count(1) FROM T1 GROUP BY key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+  Stage-4
+  Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
   Stage-2 depends on stages: Stage-0
+  Stage-3
+  Stage-5
+  Stage-6 depends on stages: Stage-5
 
 STAGE PLANS:
   Stage: Stage-1
@@ -67,34 +72,30 @@ STAGE PLANS:
               Group By Operator
                 aggregations: count(1)
                 keys: _col0 (type: string)
-                mode: hash
+                mode: final
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col1 (type: bigint)
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          keys: KEY._col0 (type: string)
-          mode: mergepartial
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column 
stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) 
(type: int)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column 
stats: NONE
-            File Output Operator
-              compressed: false
-              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.outputtbl1
+                Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: UDFToInteger(_col0) (type: int), 
UDFToInteger(_col1) (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.outputtbl1
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-4
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
 
   Stage: Stage-0
     Move Operator
@@ -109,3 +110,33 @@ STAGE PLANS:
   Stage: Stage-2
     Stats-Aggr Operator
 
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.outputtbl1
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.outputtbl1
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+

http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out 
b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
index ee9f448..17d20cb 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out
@@ -206,8 +206,6 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-2
     Spark
-      Edges:
-        Reducer 2 <- Map 1 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -220,43 +218,28 @@ STAGE PLANS:
                     Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
-                      bucketGroup: true
                       keys: key (type: int)
-                      mode: hash
+                      mode: final
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: bigint)
-        Reducer 2 
+                      Statistics: Num rows: 1 Data size: 7 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: int)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 7 Basic stats: 
COMPLETE Column stats: NONE
+                        Spark HashTable Sink Operator
+                          keys:
+                            0 _col0 (type: int)
+                            1 _col0 (type: int)
             Local Work:
               Map Reduce Local Work
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int)
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE 
Column stats: NONE
-                  Spark HashTable Sink Operator
-                    keys:
-                      0 _col0 (type: int)
-                      1 _col0 (type: int)
 
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 4 <- Map 3 (GROUP, 1)
+        Reducer 3 <- Map 2 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 3 
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: a
@@ -275,7 +258,7 @@ STAGE PLANS:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         input vertices:
-                          0 Reducer 2
+                          0 Map 1
                         Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -288,7 +271,7 @@ STAGE PLANS:
                             value expressions: _col0 (type: bigint)
             Local Work:
               Map Reduce Local Work
-        Reducer 4 
+        Reducer 3 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)

http://git-wip-us.apache.org/repos/asf/hive/blob/58b85acc/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out 
b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
index 0d22ea7..98e099c 100644
--- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out
@@ -245,8 +245,8 @@ STAGE PLANS:
   Stage: Stage-1
     Tez
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -259,18 +259,34 @@ STAGE PLANS:
                     Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
-                      bucketGroup: true
                       keys: key (type: int)
-                      mode: hash
+                      mode: final
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: bigint)
-        Map 4 
+                      Statistics: Num rows: 1 Data size: 7 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: int)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 7 Basic stats: 
COMPLETE Column stats: NONE
+                        Map Join Operator
+                          condition map:
+                               Inner Join 0 to 1
+                          keys:
+                            0 _col0 (type: int)
+                            1 _col0 (type: int)
+                          input vertices:
+                            1 Map 3
+                          Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
+                          HybridGraceHashJoin: true
+                          Group By Operator
+                            aggregations: count()
+                            mode: hash
+                            outputColumnNames: _col0
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                            Reduce Output Operator
+                              sort order: 
+                              Statistics: Num rows: 1 Data size: 8 Basic 
stats: COMPLETE Column stats: NONE
+                              value expressions: _col0 (type: bigint)
+        Map 3 
             Map Operator Tree:
                 TableScan
                   alias: a
@@ -291,37 +307,6 @@ STAGE PLANS:
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int)
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE 
Column stats: NONE
-                  Map Join Operator
-                    condition map:
-                         Inner Join 0 to 1
-                    keys:
-                      0 _col0 (type: int)
-                      1 _col0 (type: int)
-                    input vertices:
-                      1 Map 4
-                    Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
-                    HybridGraceHashJoin: true
-                    Group By Operator
-                      aggregations: count()
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: bigint)
-        Reducer 3 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE

[30/55] [abbrv] hive git commit: HIVE-12325 : Turn hive.map.groupby.sorted on by default (Chetna Chaudhari via Ashutosh Chauhan)

Reply via email to