HIVE-14783 : bucketing column should be part of sorting for delete/update operation when spdo is on (Ashutosh Chauhan via Prasanth J)
Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d7e2745e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d7e2745e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d7e2745e Branch: refs/heads/hive-14535 Commit: d7e2745eac5e4d56e8af928e98626194406ca9f2 Parents: 662728f Author: Ashutosh Chauhan <hashut...@apache.org> Authored: Fri Sep 16 18:15:26 2016 -0700 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Mon Sep 19 10:04:26 2016 -0700 ---------------------------------------------------------------------- .../optimizer/SortedDynPartitionOptimizer.java | 3 ++- .../dynpart_sort_optimization_acid.q.out | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d7e2745e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index c743bda..8b4af72 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -440,7 +441,7 @@ public class SortedDynPartitionOptimizer extends Transform { int numPartAndBuck = partitionPositions.size(); keyColsPosInVal.addAll(partitionPositions); - if (!bucketColumns.isEmpty()) { + if (!bucketColumns.isEmpty() || writeType == Operation.DELETE || writeType == Operation.UPDATE) { keyColsPosInVal.add(-1); numPartAndBuck += 1; } http://git-wip-us.apache.org/repos/asf/hive/blob/d7e2745e/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out index ed3f8e9..1838d6a 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out @@ -416,8 +416,8 @@ STAGE PLANS: outputColumnNames: _col0, _col3 Statistics: Num rows: 892 Data size: 2676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) - sort order: ++ + key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) + sort order: +++ Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 892 Data size: 2676 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: @@ -1036,8 +1036,8 @@ STAGE PLANS: outputColumnNames: _col0, _col4 Statistics: Num rows: 1517 Data size: 4551 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '2008-04-08' (type: string), _col4 (type: int), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) - sort order: +++ + key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) + sort order: ++++ Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) Statistics: Num rows: 1517 Data size: 4551 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: @@ -1146,8 +1146,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) - sort order: +++ + key expressions: _col1 (type: string), _col2 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) + sort order: ++++ Map-reduce partition columns: _col1 (type: string), _col2 (type: int) Statistics: Num rows: 2979 Data size: 8937 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: @@ -1320,8 +1320,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 23 Data size: 2322 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) - sort order: +++ + key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) + sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) Statistics: Num rows: 23 Data size: 2322 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), 'bar' (type: string) @@ -1400,8 +1400,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 45 Data size: 4550 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) - sort order: +++ + key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) + sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) Statistics: Num rows: 45 Data size: 4550 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), 'bar' (type: string)