HIVE-13646 make hive.optimize.sort.dynamic.partition compatible with ACID tables (Eugene Koifman, reviewed by Wei Zheng)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/87299662 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/87299662 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/87299662 Branch: refs/heads/llap Commit: 8729966296a041b7ea952ba67f148d2c48c27749 Parents: 70fe310 Author: Eugene Koifman <[email protected]> Authored: Tue May 3 17:11:47 2016 -0700 Committer: Eugene Koifman <[email protected]> Committed: Tue May 3 17:11:47 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 1 - .../dynpart_sort_optimization_acid.q.out | 120 +++++++++++++++---- 2 files changed, 100 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/87299662/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 06db7f9..2983d38 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7030,7 +7030,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { conf.setBoolVar(ConfVars.HIVEOPTREDUCEDEDUPLICATION, true); conf.setIntVar(ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER, 1); conf.set(AcidUtils.CONF_ACID_KEY, "true"); - conf.setBoolVar(ConfVars.HIVEOPTSORTDYNAMICPARTITION, false); if (table.getNumBuckets() < 1) { throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName()); http://git-wip-us.apache.org/repos/asf/hive/blob/87299662/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out index eca29df..62399e3 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out @@ -380,8 +380,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -397,12 +398,31 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) value expressions: _col3 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), VALUE._col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col2 (type: string) + outputColumnNames: _col0, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) + sort order: +++ + Map-reduce partition columns: _col3 (type: string) + value expressions: 'foo' (type: string), 'bar' (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number' File Output Operator compressed: false table: @@ -423,7 +443,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acid - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') @@ -875,8 +895,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds= POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -892,12 +913,31 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) value expressions: _col4 (type: int) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col3 (type: int) + outputColumnNames: _col0, _col4 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) + sort order: ++++ + Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) + value expressions: 'foo' (type: string), 'bar' (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' File Output Operator compressed: false table: @@ -919,7 +959,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acid - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 @@ -1053,8 +1093,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds= POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -1070,7 +1111,6 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) Reduce Operator Tree: Select Operator @@ -1079,6 +1119,26 @@ STAGE PLANS: File Output Operator compressed: false table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) + sort order: ++++ + Map-reduce partition columns: _col3 (type: string), _col4 (type: int) + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' + File Output Operator + compressed: false + table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1097,7 +1157,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acid - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 @@ -1127,8 +1187,9 @@ POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds= POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -1144,7 +1205,6 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) sort order: + - Map-reduce partition columns: UDFToInteger(_col0) (type: int) value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) Reduce Operator Tree: Select Operator @@ -1153,6 +1213,26 @@ STAGE PLANS: File Output Operator compressed: false table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>) + sort order: ++++ + Map-reduce partition columns: _col3 (type: string), _col4 (type: int) + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' + File Output Operator + compressed: false + table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1171,7 +1251,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acid - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11
