hive git commit: Revert "HIVE-17020: Aggressive RS dedup can incorrectly remove OP tree branch (Vineet Garg, reviewed by Rui Li)"

vgarg Wed, 12 Dec 2018 16:38:15 -0800

Repository: hive
Updated Branches:
  refs/heads/master 881e29124 -> a43581b6d



Revert "HIVE-17020: Aggressive RS dedup can incorrectly remove OP tree branch 
(Vineet Garg, reviewed by Rui Li)"

This reverts commit 8d084d676539b6ba3b9fd46e86505cca4be95b43.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a43581b6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a43581b6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a43581b6

Branch: refs/heads/master
Commit: a43581b6d884a7041930644fb3a580e398bc6951
Parents: 881e291
Author: Vineet Garg <vg...@apache.org>
Authored: Wed Dec 12 16:37:37 2018 -0800
Committer: Vineet Garg <vg...@apache.org>
Committed: Wed Dec 12 16:37:37 2018 -0800

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 -
 .../ReduceSinkDeDuplicationUtils.java           |  32 +--
 .../queries/clientpositive/reducesink_dedup.q   |  12 +-
 .../clientpositive/llap/reducesink_dedup.q.out  | 229 -------------------
 .../clientpositive/reducesink_dedup.q.out       | 202 +---------------
 5 files changed, 14 insertions(+), 462 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index e32fc60..01cad2a 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -634,7 +634,6 @@ minillaplocal.query.files=\
   ptf_streaming.q,\
   runtime_stats_merge.q,\
   quotedid_smb.q,\
-  reducesink_dedup.q,\
   resourceplan.q,\
   results_cache_1.q,\
   results_cache_2.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
index 23ec1eb..7ccd4a3 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
@@ -475,25 +475,6 @@ public class ReduceSinkDeDuplicationUtils {
     return 0;
   }
 
-  // Check that in the path between cRS and pRS, there are only Select 
operators
-  // i.e. the sequence must be pRS-SEL*-cRS
-  // ensure SEL does not branch
-  protected static boolean checkSelectSingleBranchOnly(ReduceSinkOperator cRS, 
ReduceSinkOperator pRS) {
-    Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
-    while (parent != pRS) {
-      assert parent.getNumParent() == 1;
-      if (!(parent instanceof SelectOperator)) {
-        return false;
-      }
-      if (parent.getChildOperators().size() > 1) {
-        return false;
-      }
-
-      parent = parent.getParentOperators().get(0);
-    }
-    return true;
-  }
-
   protected static boolean aggressiveDedup(ReduceSinkOperator cRS, 
ReduceSinkOperator pRS,
           ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException {
     assert cRS.getNumParent() == 1;
@@ -503,8 +484,15 @@ public class ReduceSinkDeDuplicationUtils {
     List<ExprNodeDesc> cKeys = cConf.getKeyCols();
     List<ExprNodeDesc> pKeys = pConf.getKeyCols();
 
-    if (!checkSelectSingleBranchOnly(cRS, pRS)) {
-      return false;
+    // Check that in the path between cRS and pRS, there are only Select 
operators
+    // i.e. the sequence must be pRS-SEL*-cRS
+    Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
+    while (parent != pRS) {
+      assert parent.getNumParent() == 1;
+      if (!(parent instanceof SelectOperator)) {
+        return false;
+      }
+      parent = parent.getParentOperators().get(0);
     }
 
     // If child keys are null or empty, we bail out
@@ -576,7 +564,7 @@ public class ReduceSinkDeDuplicationUtils {
 
     // Replace pRS with cRS and remove operator sequence from pRS to cRS
     // Recall that the sequence must be pRS-SEL*-cRS
-    Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
+    parent = cRS.getParentOperators().get(0);
     while (parent != pRS) {
       dedupCtx.addRemovedOperator(parent);
       parent = parent.getParentOperators().get(0);

http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/ql/src/test/queries/clientpositive/reducesink_dedup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/reducesink_dedup.q 
b/ql/src/test/queries/clientpositive/reducesink_dedup.q
index b7f9a01..352a558 100644
--- a/ql/src/test/queries/clientpositive/reducesink_dedup.q
+++ b/ql/src/test/queries/clientpositive/reducesink_dedup.q
@@ -1,13 +1,5 @@
 --! qt:dataset:part
---! qt:dataset:src
-select p_name
+select p_name 
 from (select p_name from part distribute by 1 sort by 1) p 
 distribute by 1 sort by 1
-;
-
-create temporary table d1 (key int);
-create temporary table d2 (key int);
-
-explain from (select key from src cluster by key) a
-  insert overwrite table d1 select a.key
-  insert overwrite table d2 select a.key cluster by a.key;
\ No newline at end of file
+;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out 
b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out
deleted file mode 100644
index 6dede4c..0000000
--- a/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out
+++ /dev/null
@@ -1,229 +0,0 @@
-PREHOOK: query: select p_name
-from (select p_name from part distribute by 1 sort by 1) p 
-distribute by 1 sort by 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_name
-from (select p_name from part distribute by 1 sort by 1) p 
-distribute by 1 sort by 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-almond antique burnished rose metallic
-almond antique burnished rose metallic
-almond antique chartreuse lavender yellow
-almond antique salmon chartreuse burlywood
-almond aquamarine burnished black steel
-almond aquamarine pink moccasin thistle
-almond antique violet chocolate turquoise
-almond antique violet turquoise frosted
-almond aquamarine midnight light salmon
-almond aquamarine rose maroon antique
-almond aquamarine sandy cyan gainsboro
-almond antique chartreuse khaki white
-almond antique forest lavender goldenrod
-almond antique metallic orange dim
-almond antique misty red olive
-almond antique olive coral navajo
-almond antique gainsboro frosted violet
-almond antique violet mint lemon
-almond aquamarine floral ivory bisque
-almond aquamarine yellow dodger mint
-almond azure aquamarine papaya violet
-almond antique blue firebrick mint
-almond antique medium spring khaki
-almond antique sky peru orange
-almond aquamarine dodger light gainsboro
-almond azure blanched chiffon midnight
-PREHOOK: query: create temporary table d1 (key int)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@d1
-POSTHOOK: query: create temporary table d1 (key int)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@d1
-PREHOOK: query: create temporary table d2 (key int)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@d2
-POSTHOOK: query: create temporary table d2 (key int)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@d2
-PREHOOK: query: explain from (select key from src cluster by key) a
-  insert overwrite table d1 select a.key
-  insert overwrite table d2 select a.key cluster by a.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@d1
-PREHOOK: Output: default@d2
-POSTHOOK: query: explain from (select key from src cluster by key) a
-  insert overwrite table d1 select a.key
-  insert overwrite table d2 select a.key cluster by a.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@d1
-POSTHOOK: Output: default@d2
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-0
-  Stage-1 depends on stages: Stage-3
-  Stage-5 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-2
-    Tez
-#### A masked pattern was here ####
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
-        Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: src
-                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: key (type: string)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
-            Execution mode: vectorized, llap
-            LLAP IO: no inputs
-        Reducer 2 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Select Operator
-                expressions: KEY.reducesinkkey0 (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Select Operator
-                  expressions: UDFToInteger(_col0) (type: int)
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.d1
-                  Select Operator
-                    expressions: _col0 (type: int)
-                    outputColumnNames: key
-                    Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: compute_stats(key, 'hll')
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 424 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 424 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
-                Select Operator
-                  expressions: UDFToInteger(_col0) (type: int)
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.d2
-                  Select Operator
-                    expressions: _col0 (type: int)
-                    outputColumnNames: key
-                    Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      aggregations: compute_stats(key, 'hll')
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 424 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 424 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
-        Reducer 3 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: compute_stats(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE 
Column stats: COMPLETE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  table:
-                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: compute_stats(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE 
Column stats: COMPLETE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  table:
-                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-3
-    Dependency Collection
-
-  Stage: Stage-0
-    Move Operator
-      tables:
-          replace: true
-          table:
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.d1
-
-  Stage: Stage-4
-    Stats Work
-      Basic Stats Work:
-      Column Stats Desc:
-          Columns: key
-          Column Types: int
-          Table: default.d1
-
-  Stage: Stage-1
-    Move Operator
-      tables:
-          replace: true
-          table:
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.d2
-
-  Stage: Stage-5
-    Stats Work
-      Basic Stats Work:
-      Column Stats Desc:
-          Columns: key
-          Column Types: int
-          Table: default.d2
-

http://git-wip-us.apache.org/repos/asf/hive/blob/a43581b6/ql/src/test/results/clientpositive/reducesink_dedup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/reducesink_dedup.q.out 
b/ql/src/test/results/clientpositive/reducesink_dedup.q.out
index 2b068ac..b89df52 100644
--- a/ql/src/test/results/clientpositive/reducesink_dedup.q.out
+++ b/ql/src/test/results/clientpositive/reducesink_dedup.q.out
@@ -1,10 +1,10 @@
-PREHOOK: query: select p_name
+PREHOOK: query: select p_name 
 from (select p_name from part distribute by 1 sort by 1) p 
 distribute by 1 sort by 1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@part
 #### A masked pattern was here ####
-POSTHOOK: query: select p_name
+POSTHOOK: query: select p_name 
 from (select p_name from part distribute by 1 sort by 1) p 
 distribute by 1 sort by 1
 POSTHOOK: type: QUERY
@@ -36,201 +36,3 @@ almond antique medium spring khaki
 almond antique sky peru orange
 almond aquamarine dodger light gainsboro
 almond azure blanched chiffon midnight
-PREHOOK: query: create temporary table d1 (key int)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@d1
-POSTHOOK: query: create temporary table d1 (key int)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@d1
-PREHOOK: query: create temporary table d2 (key int)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@d2
-POSTHOOK: query: create temporary table d2 (key int)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@d2
-PREHOOK: query: explain from (select key from src cluster by key) a
-  insert overwrite table d1 select a.key
-  insert overwrite table d2 select a.key cluster by a.key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@d1
-PREHOOK: Output: default@d2
-POSTHOOK: query: explain from (select key from src cluster by key) a
-  insert overwrite table d1 select a.key
-  insert overwrite table d2 select a.key cluster by a.key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@d1
-POSTHOOK: Output: default@d2
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-0 depends on stages: Stage-2
-  Stage-3 depends on stages: Stage-0, Stage-4, Stage-6
-  Stage-4 depends on stages: Stage-2
-  Stage-5 depends on stages: Stage-1, Stage-4, Stage-6
-  Stage-1 depends on stages: Stage-2
-  Stage-6 depends on stages: Stage-2
-
-STAGE PLANS:
-  Stage: Stage-2
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: src
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: key (type: string)
-              outputColumnNames: _col0
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-              Reduce Output Operator
-                key expressions: _col0 (type: string)
-                sort order: +
-                Map-reduce partition columns: _col0 (type: string)
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string)
-          outputColumnNames: _col0
-          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int)
-            outputColumnNames: _col0
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-            File Output Operator
-              compressed: false
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.d1
-            Select Operator
-              expressions: _col0 (type: int)
-              outputColumnNames: key
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-              Group By Operator
-                aggregations: compute_stats(key, 'hll')
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE 
Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  table:
-                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int)
-            outputColumnNames: _col0
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-            File Output Operator
-              compressed: false
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.d2
-            Select Operator
-              expressions: _col0 (type: int)
-              outputColumnNames: key
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-              Group By Operator
-                aggregations: compute_stats(key, 'hll')
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE 
Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  table:
-                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-0
-    Move Operator
-      tables:
-          replace: true
-          table:
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.d1
-
-  Stage: Stage-3
-    Stats Work
-      Basic Stats Work:
-      Column Stats Desc:
-          Columns: key
-          Column Types: int
-          Table: default.d1
-
-  Stage: Stage-4
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: compute_stats(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE 
Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-5
-    Stats Work
-      Basic Stats Work:
-      Column Stats Desc:
-          Columns: key
-          Column Types: int
-          Table: default.d2
-
-  Stage: Stage-1
-    Move Operator
-      tables:
-          replace: true
-          table:
-              input format: org.apache.hadoop.mapred.TextInputFormat
-              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-              name: default.d2
-
-  Stage: Stage-6
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: compute_stats(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE 
Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-

hive git commit: Revert "HIVE-17020: Aggressive RS dedup can incorrectly remove OP tree branch (Vineet Garg, reviewed by Rui Li)"

Reply via email to