[08/10] hive git commit: HIVE-13646 make hive.optimize.sort.dynamic.partition compatible with ACID tables (Eugene Koifman, reviewed by Wei Zheng)

jdere Wed, 04 May 2016 00:22:26 -0700

HIVE-13646 make hive.optimize.sort.dynamic.partition compatible with ACID 
tables (Eugene Koifman, reviewed by Wei Zheng)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87299662
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87299662
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87299662

Branch: refs/heads/llap
Commit: 8729966296a041b7ea952ba67f148d2c48c27749
Parents: 70fe310
Author: Eugene Koifman <[email protected]>
Authored: Tue May 3 17:11:47 2016 -0700
Committer: Eugene Koifman <[email protected]>
Committed: Tue May 3 17:11:47 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   1 -
 .../dynpart_sort_optimization_acid.q.out        | 120 +++++++++++++++----
 2 files changed, 100 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/87299662/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 06db7f9..2983d38 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7030,7 +7030,6 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, true);
     conf.setIntVar(ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER, 1);
     conf.set(AcidUtils.CONF_ACID_KEY, "true");
-    conf.setBoolVar(ConfVars.HIVEOPTSORTDYNAMICPARTITION, false);
 
     if (table.getNumBuckets() < 1) {
       throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, 
table.getTableName());

http://git-wip-us.apache.org/repos/asf/hive/blob/87299662/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out 
b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
index eca29df..62399e3 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
@@ -380,8 +380,9 @@ POSTHOOK: query: explain update acid set value = 'bar' 
where key = 'foo' and ds
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -397,12 +398,31 @@ STAGE PLANS:
                 Reduce Output Operator
                   key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                   sort order: +
-                  Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                   value expressions: _col3 (type: string)
       Reduce Operator Tree:
         Select Operator
-          expressions: KEY.reducesinkkey0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), VALUE._col2 (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3
+          expressions: KEY.reducesinkkey0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col2 (type: 
string)
+          outputColumnNames: _col0, _col3
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col3 (type: string), '_bucket_number' (type: 
string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+              sort order: +++
+              Map-reduce partition columns: _col3 (type: string)
+              value expressions: 'foo' (type: string), 'bar' (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: 
string), VALUE._col2 (type: string), KEY._col3 (type: string), 
KEY.'_bucket_number' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number'
           File Output Operator
             compressed: false
             table:
@@ -423,7 +443,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.acid
 
-  Stage: Stage-2
+  Stage: Stage-3
     Stats-Aggr Operator
 
 PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in 
('2008-04-08')
@@ -875,8 +895,9 @@ POSTHOOK: query: explain update acid set value = 'bar' 
where key = 'foo' and ds=
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -892,12 +913,31 @@ STAGE PLANS:
                 Reduce Output Operator
                   key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                   sort order: +
-                  Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                   value expressions: _col4 (type: int)
       Reduce Operator Tree:
         Select Operator
-          expressions: KEY.reducesinkkey0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), '2008-04-08' (type: string), VALUE._col3 (type: int)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4
+          expressions: KEY.reducesinkkey0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col3 (type: int)
+          outputColumnNames: _col0, _col4
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: '2008-04-08' (type: string), _col4 (type: int), 
'_bucket_number' (type: string), _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+              sort order: ++++
+              Map-reduce partition columns: '2008-04-08' (type: string), _col4 
(type: int)
+              value expressions: 'foo' (type: string), 'bar' (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: 
string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: 
int), KEY.'_bucket_number' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
'_bucket_number'
           File Output Operator
             compressed: false
             table:
@@ -919,7 +959,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.acid
 
-  Stage: Stage-2
+  Stage: Stage-3
     Stats-Aggr Operator
 
 PREHOOK: query: update acid set value = 'bar' where key = 'foo' and 
ds='2008-04-08' and hr>=11
@@ -1053,8 +1093,9 @@ POSTHOOK: query: explain update acid set value = 'bar' 
where key = 'foo' and ds=
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -1070,7 +1111,6 @@ STAGE PLANS:
                 Reduce Output Operator
                   key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                   sort order: +
-                  Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                   value expressions: _col1 (type: string), _col2 (type: 
string), _col3 (type: string), _col4 (type: int)
       Reduce Operator Tree:
         Select Operator
@@ -1079,6 +1119,26 @@ STAGE PLANS:
           File Output Operator
             compressed: false
             table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col3 (type: string), _col4 (type: int), 
'_bucket_number' (type: string), _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+              sort order: ++++
+              Map-reduce partition columns: _col3 (type: string), _col4 (type: 
int)
+              value expressions: _col1 (type: string), _col2 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: 
string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: 
int), KEY.'_bucket_number' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
'_bucket_number'
+          File Output Operator
+            compressed: false
+            table:
                 input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
                 serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1097,7 +1157,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.acid
 
-  Stage: Stage-2
+  Stage: Stage-3
     Stats-Aggr Operator
 
 PREHOOK: query: update acid set value = 'bar' where key = 'foo' and 
ds='2008-04-08' and hr=11
@@ -1127,8 +1187,9 @@ POSTHOOK: query: explain update acid set value = 'bar' 
where key = 'foo' and ds=
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-  Stage-2 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -1144,7 +1205,6 @@ STAGE PLANS:
                 Reduce Output Operator
                   key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                   sort order: +
-                  Map-reduce partition columns: UDFToInteger(_col0) (type: int)
                   value expressions: _col1 (type: string), _col2 (type: 
string), _col3 (type: string), _col4 (type: int)
       Reduce Operator Tree:
         Select Operator
@@ -1153,6 +1213,26 @@ STAGE PLANS:
           File Output Operator
             compressed: false
             table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col3 (type: string), _col4 (type: int), 
'_bucket_number' (type: string), _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+              sort order: ++++
+              Map-reduce partition columns: _col3 (type: string), _col4 (type: 
int)
+              value expressions: _col1 (type: string), _col2 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: 
string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: 
int), KEY.'_bucket_number' (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
'_bucket_number'
+          File Output Operator
+            compressed: false
+            table:
                 input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
                 serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1171,7 +1251,7 @@ STAGE PLANS:
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.acid
 
-  Stage: Stage-2
+  Stage: Stage-3
     Stats-Aggr Operator
 
 PREHOOK: query: update acid set value = 'bar' where key = 'foo' and 
ds='2008-04-08' and hr>=11

[08/10] hive git commit: HIVE-13646 make hive.optimize.sort.dynamic.partition compatible with ACID tables (Eugene Koifman, reviewed by Wei Zheng)

Reply via email to