[03/34] hive git commit: HIVE-14726 : delete statement fails when spdo is on (Ashutosh Chauhan via Jesus Camacho Rodriguez)

sershe Mon, 19 Sep 2016 15:42:18 -0700

HIVE-14726 : delete statement fails when spdo is on (Ashutosh Chauhan via Jesus 
Camacho Rodriguez)


Signed-off-by: Ashutosh Chauhan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0a6d30b3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0a6d30b3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0a6d30b3

Branch: refs/heads/hive-14535
Commit: 0a6d30b3e89d97028b3cd4174ec92e1f5a56d49f
Parents: 7c7fa75
Author: Ashutosh Chauhan <[email protected]>
Authored: Thu Sep 8 16:16:37 2016 -0700
Committer: Ashutosh Chauhan <[email protected]>
Committed: Tue Sep 13 07:42:09 2016 -0700

----------------------------------------------------------------------
 .../optimizer/SortedDynPartitionOptimizer.java  |   6 +-
 .../dynpart_sort_optimization_acid.q            |  13 +
 .../dynpart_sort_optimization_acid.q.out        | 283 ++++++++++++++++---
 3 files changed, 260 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0a6d30b3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
index febd446..c743bda 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
@@ -184,6 +184,7 @@ public class SortedDynPartitionOptimizer extends Transform {
           destTable.getCols());
       List<Integer> sortPositions = null;
       List<Integer> sortOrder = null;
+      ArrayList<ExprNodeDesc> bucketColumns;
       if (fsOp.getConf().getWriteType() == AcidUtils.Operation.UPDATE ||
           fsOp.getConf().getWriteType() == AcidUtils.Operation.DELETE) {
         // When doing updates and deletes we always want to sort on the rowid 
because the ACID
@@ -191,6 +192,7 @@ public class SortedDynPartitionOptimizer extends Transform {
         // ignore whatever comes from the table and enforce this sort order 
instead.
         sortPositions = Arrays.asList(0);
         sortOrder = Arrays.asList(1); // 1 means asc, could really use enum 
here in the thrift if
+        bucketColumns = new ArrayList<>(); // Bucketing column is already 
present in ROW__ID, which is specially handled in ReduceSink
       } else {
         if (!destTable.getSortCols().isEmpty()) {
           // Sort columns specified by table
@@ -202,6 +204,8 @@ public class SortedDynPartitionOptimizer extends Transform {
           sortOrder = Lists.newArrayList();
           inferSortPositions(fsParent, sortPositions, sortOrder);
         }
+        List<ColumnInfo> colInfos = fsParent.getSchema().getSignature();
+        bucketColumns = getPositionsToExprNodes(bucketPositions, colInfos);
       }
       List<Integer> sortNullOrder = new ArrayList<Integer>();
       for (int order : sortOrder) {
@@ -212,8 +216,6 @@ public class SortedDynPartitionOptimizer extends Transform {
       for (int i : sortOrder) LOG.debug("sort order " + i);
       for (int i : sortNullOrder) LOG.debug("sort null order " + i);
       List<Integer> partitionPositions = getPartitionPositions(dpCtx, 
fsParent.getSchema());
-      List<ColumnInfo> colInfos = fsParent.getSchema().getSignature();
-      ArrayList<ExprNodeDesc> bucketColumns = 
getPositionsToExprNodes(bucketPositions, colInfos);
 
       // update file sink descriptor
       fsOp.getConf().setMultiFileSpray(false);

http://git-wip-us.apache.org/repos/asf/hive/blob/0a6d30b3/ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q 
b/ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q
index 5e4e0f7..88ff1e1 100644
--- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q
+++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q
@@ -70,6 +70,12 @@ select count(*) from acid where ds='2008-04-08' and hr>=11;
 delete from acid where key = 'foo' and ds='2008-04-08' and hr=11;
 select count(*) from acid where ds='2008-04-08' and hr=11;
 
+-- test with bucketing column not in select list
+explain
+delete from acid where value = 'bar';
+delete from acid where value = 'bar';
+select count(*) from acid;
+
 set hive.optimize.sort.dynamic.partition=true;
 
 -- 2 level partition, sorted dynamic partition enabled
@@ -92,6 +98,13 @@ select count(*) from acid where ds='2008-04-08' and hr>=11;
 delete from acid where key = 'foo' and ds='2008-04-08' and hr=11;
 select count(*) from acid where ds='2008-04-08' and hr=11;
 
+-- test with bucketing column not in select list
+explain
+delete from acid where value = 'bar';
+delete from acid where value = 'bar';
+select count(*) from acid;
+
+
 set hive.optimize.sort.dynamic.partition=true;
 set hive.optimize.constant.propagation=false;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0a6d30b3/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out 
b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
index 9b7b4d2..ed3f8e9 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out
@@ -80,27 +80,27 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: acid
-            Statistics: Num rows: 1725 Data size: 5177 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1720 Data size: 5162 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (key = 'foo') (type: boolean)
-              Statistics: Num rows: 862 Data size: 2586 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 860 Data size: 2581 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                 outputColumnNames: _col0
-                Statistics: Num rows: 862 Data size: 2586 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 860 Data size: 2581 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                   sort order: +
                   Map-reduce partition columns: UDFToInteger(_col0) (type: int)
-                  Statistics: Num rows: 862 Data size: 2586 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 860 Data size: 2581 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), '2008-04-08' (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 862 Data size: 2586 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 860 Data size: 2581 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 862 Data size: 2586 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 860 Data size: 2581 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -158,28 +158,28 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: acid
-            Statistics: Num rows: 1945 Data size: 5835 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1940 Data size: 5820 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (key = 'foo') (type: boolean)
-              Statistics: Num rows: 972 Data size: 2916 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 970 Data size: 2910 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string)
                 outputColumnNames: _col0, _col3
-                Statistics: Num rows: 972 Data size: 2916 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 970 Data size: 2910 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                   sort order: +
                   Map-reduce partition columns: UDFToInteger(_col0) (type: int)
-                  Statistics: Num rows: 972 Data size: 2916 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 970 Data size: 2910 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col3 (type: string)
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), VALUE._col1 (type: string)
           outputColumnNames: _col0, _col1, _col2, _col3
-          Statistics: Num rows: 972 Data size: 2916 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 970 Data size: 2910 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 972 Data size: 2916 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 970 Data size: 2910 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -416,14 +416,14 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col3
                 Statistics: Num rows: 892 Data size: 2676 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col3 (type: string), '_bucket_number' 
(type: string), _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-                  sort order: +++
+                  key expressions: _col3 (type: string), _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                  sort order: ++
                   Map-reduce partition columns: _col3 (type: string)
                   Statistics: Num rows: 892 Data size: 2676 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Select Operator
-          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: 
string)
-          outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number'
+          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), KEY._col3 (type: string)
+          outputColumnNames: _col0, _col1, _col2, _col3
           Statistics: Num rows: 892 Data size: 2676 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false
@@ -583,27 +583,27 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: acid
-            Statistics: Num rows: 1547 Data size: 4642 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1548 Data size: 4644 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (key = 'foo') (type: boolean)
-              Statistics: Num rows: 773 Data size: 2319 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 774 Data size: 2322 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                 outputColumnNames: _col0
-                Statistics: Num rows: 773 Data size: 2319 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 774 Data size: 2322 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                   sort order: +
                   Map-reduce partition columns: UDFToInteger(_col0) (type: int)
-                  Statistics: Num rows: 773 Data size: 2319 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 774 Data size: 2322 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), '2008-04-08' (type: string), 11 (type: int)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4
-          Statistics: Num rows: 773 Data size: 2319 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 774 Data size: 2322 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 773 Data size: 2319 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 774 Data size: 2322 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -662,28 +662,28 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: acid
-            Statistics: Num rows: 3032 Data size: 9099 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 3033 Data size: 9100 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: (key = 'foo') (type: boolean)
-              Statistics: Num rows: 1516 Data size: 4549 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 1516 Data size: 4548 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), hr (type: int)
                 outputColumnNames: _col0, _col4
-                Statistics: Num rows: 1516 Data size: 4549 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 1516 Data size: 4548 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
                   sort order: +
                   Map-reduce partition columns: UDFToInteger(_col0) (type: int)
-                  Statistics: Num rows: 1516 Data size: 4549 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1516 Data size: 4548 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col4 (type: int)
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4
-          Statistics: Num rows: 1516 Data size: 4549 Basic stats: COMPLETE 
Column stats: NONE
+          Statistics: Num rows: 1516 Data size: 4548 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1516 Data size: 4549 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 1516 Data size: 4548 Basic stats: COMPLETE 
Column stats: NONE
             table:
                 input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -754,6 +754,108 @@ POSTHOOK: Input: default@acid
 POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11
 #### A masked pattern was here ####
 500
+PREHOOK: query: -- test with bucketing column not in select list
+explain
+delete from acid where value = 'bar'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- test with bucketing column not in select list
+explain
+delete from acid where value = 'bar'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: acid
+            Statistics: Num rows: 5958 Data size: 17874 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (value = 'bar') (type: boolean)
+              Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string), hr 
(type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2979 Data size: 8937 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                  sort order: +
+                  Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+                  Statistics: Num rows: 2979 Data size: 8937 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string), _col2 (type: int)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: 
string), VALUE._col1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                name: default.acid
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 
+            hr 
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.acid
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: delete from acid where value = 'bar'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08/hr=11
+PREHOOK: Input: default@acid@ds=2008-04-08/hr=12
+PREHOOK: Input: default@acid@ds=2008-04-09/hr=11
+PREHOOK: Input: default@acid@ds=2008-04-09/hr=12
+PREHOOK: Output: default@acid@ds=2008-04-08/hr=11
+PREHOOK: Output: default@acid@ds=2008-04-08/hr=12
+PREHOOK: Output: default@acid@ds=2008-04-09/hr=11
+PREHOOK: Output: default@acid@ds=2008-04-09/hr=12
+POSTHOOK: query: delete from acid where value = 'bar'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@acid@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@acid@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@acid@ds=2008-04-09/hr=11
+POSTHOOK: Output: default@acid@ds=2008-04-09/hr=12
+PREHOOK: query: select count(*) from acid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08/hr=11
+PREHOOK: Input: default@acid@ds=2008-04-08/hr=12
+PREHOOK: Input: default@acid@ds=2008-04-09/hr=11
+PREHOOK: Input: default@acid@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@acid@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@acid@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+2000
 PREHOOK: query: -- 2 level partition, sorted dynamic partition enabled
 drop table acid
 PREHOOK: type: DROPTABLE
@@ -934,14 +1036,14 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col4
                 Statistics: Num rows: 1517 Data size: 4551 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: '2008-04-08' (type: string), _col4 (type: 
int), '_bucket_number' (type: string), _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-                  sort order: ++++
+                  key expressions: '2008-04-08' (type: string), _col4 (type: 
int), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                  sort order: +++
                   Map-reduce partition columns: '2008-04-08' (type: string), 
_col4 (type: int)
                   Statistics: Num rows: 1517 Data size: 4551 Basic stats: 
COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Select Operator
-          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), 
KEY.'_bucket_number' (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
'_bucket_number'
+          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 
'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
           Statistics: Num rows: 1517 Data size: 4551 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false
@@ -1016,6 +1118,107 @@ POSTHOOK: Input: default@acid
 POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11
 #### A masked pattern was here ####
 500
+PREHOOK: query: -- test with bucketing column not in select list
+explain
+delete from acid where value = 'bar'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- test with bucketing column not in select list
+explain
+delete from acid where value = 'bar'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: acid
+            Statistics: Num rows: 5958 Data size: 17875 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (value = 'bar') (type: boolean)
+              Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: ROW__ID (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), ds (type: string), hr 
(type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 2979 Data size: 8937 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string), _col2 (type: int), 
_col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                  sort order: +++
+                  Map-reduce partition columns: _col1 (type: string), _col2 
(type: int)
+                  Statistics: Num rows: 2979 Data size: 8937 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY._col1 (type: 
string), KEY._col2 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                name: default.acid
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 
+            hr 
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.acid
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: delete from acid where value = 'bar'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08/hr=11
+PREHOOK: Input: default@acid@ds=2008-04-08/hr=12
+PREHOOK: Input: default@acid@ds=2008-04-09/hr=11
+PREHOOK: Input: default@acid@ds=2008-04-09/hr=12
+PREHOOK: Output: default@acid@ds=2008-04-08/hr=11
+PREHOOK: Output: default@acid@ds=2008-04-08/hr=12
+PREHOOK: Output: default@acid@ds=2008-04-09/hr=11
+PREHOOK: Output: default@acid@ds=2008-04-09/hr=12
+POSTHOOK: query: delete from acid where value = 'bar'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@acid@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@acid@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11
+POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@acid@ds=2008-04-09/hr=11
+POSTHOOK: Output: default@acid@ds=2008-04-09/hr=12
+PREHOOK: query: select count(*) from acid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08/hr=11
+PREHOOK: Input: default@acid@ds=2008-04-08/hr=12
+PREHOOK: Input: default@acid@ds=2008-04-09/hr=11
+PREHOOK: Input: default@acid@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@acid@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@acid@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+2000
 PREHOOK: query: -- 2 level partition, sorted dynamic partition enabled, 
constant propagation disabled
 drop table acid
 PREHOOK: type: DROPTABLE
@@ -1117,15 +1320,15 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col3, _col4
                 Statistics: Num rows: 23 Data size: 2322 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col3 (type: string), _col4 (type: int), 
'_bucket_number' (type: string), _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-                  sort order: ++++
+                  key expressions: _col3 (type: string), _col4 (type: int), 
_col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                  sort order: +++
                   Map-reduce partition columns: _col3 (type: string), _col4 
(type: int)
                   Statistics: Num rows: 23 Data size: 2322 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string), 'bar' (type: string)
       Reduce Operator Tree:
         Select Operator
-          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: 
string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: 
int), KEY.'_bucket_number' (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
'_bucket_number'
+          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: 
string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: 
int)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
           Statistics: Num rows: 23 Data size: 2322 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false
@@ -1197,15 +1400,15 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col3, _col4
                 Statistics: Num rows: 45 Data size: 4550 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col3 (type: string), _col4 (type: int), 
'_bucket_number' (type: string), _col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-                  sort order: ++++
+                  key expressions: _col3 (type: string), _col4 (type: int), 
_col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                  sort order: +++
                   Map-reduce partition columns: _col3 (type: string), _col4 
(type: int)
                   Statistics: Num rows: 45 Data size: 4550 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string), 'bar' (type: string)
       Reduce Operator Tree:
         Select Operator
-          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: 
string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: 
int), KEY.'_bucket_number' (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
'_bucket_number'
+          expressions: KEY._col0 (type: 
struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: 
string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: 
int)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4
           Statistics: Num rows: 45 Data size: 4550 Basic stats: COMPLETE 
Column stats: NONE
           File Output Operator
             compressed: false

[03/34] hive git commit: HIVE-14726 : delete statement fails when spdo is on (Ashutosh Chauhan via Jesus Camacho Rodriguez)

Reply via email to