HIVE-10929: In Tez mode,dynamic partitioning query with union all fails at moveTask,Invalid partition key & values (Vikram Dixit K reviewed by Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4d592303 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4d592303 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4d592303 Branch: refs/heads/llap Commit: 4d592303ad0f925c7d0815c003cedd45ab4f0064 Parents: d3863be Author: Vaibhav Gumashta <[email protected]> Authored: Sun Jun 7 11:38:10 2015 -0700 Committer: Vaibhav Gumashta <[email protected]> Committed: Sun Jun 7 11:39:12 2015 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/ql/parse/GenTezUtils.java | 8 + .../tez_union_dynamic_partition.q | 21 + .../clientpositive/tez/explainuser_2.q.out | 63 +- .../tez/tez_union_dynamic_partition.q.out | 148 +++ .../results/clientpositive/tez/union4.q.out | 4 + .../results/clientpositive/tez/union6.q.out | 4 + .../tez/vector_leftsemi_mapjoin.q.out | 1032 +++++++++--------- .../tez/vector_multi_insert.q.out | 20 +- .../clientpositive/tez/vector_outer_join1.q.out | 48 +- .../clientpositive/tez/vector_outer_join2.q.out | 22 +- .../clientpositive/tez/vector_outer_join3.q.out | 60 +- .../clientpositive/tez/vector_outer_join4.q.out | 48 +- 13 files changed, 855 insertions(+), 624 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 47a1107..784b502 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -336,6 +336,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ tez_schema_evolution.q,\ tez_union.q,\ tez_union2.q,\ + tez_union_dynamic_partition.q,\ tez_union_view.q,\ tez_union_decimal.q,\ tez_union_group_by.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 0edfc5d..11c1df6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; @@ -238,6 +239,11 @@ public class GenTezUtils { Iterator<Operator<?>> it = newRoots.iterator(); for (Operator<?> orig: roots) { + Set<FileSinkOperator> fsOpSet = OperatorUtils.findOperators(orig, FileSinkOperator.class); + for (FileSinkOperator fsOp : fsOpSet) { + context.fileSinkSet.remove(fsOp); + } + Operator<?> newRoot = it.next(); replacementMap.put(orig, newRoot); @@ -301,6 +307,8 @@ public class GenTezUtils { linked.add(desc); desc.setDirName(new Path(path, ""+linked.size())); + desc.setLinkedFileSink(true); + desc.setParentDir(path); desc.setLinkedFileSinkDesc(linked); } http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q b/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q new file mode 100644 index 0000000..1c44a6c --- /dev/null +++ b/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q @@ -0,0 +1,21 @@ +create table dummy(i int); +insert into table dummy values (1); +select * from dummy; + +create table partunion1(id1 int) partitioned by (part1 string); + +set hive.exec.dynamic.partition.mode=nonstrict; + +explain insert into table partunion1 partition(part1) +select temps.* from ( +select 1 as id1, '2014' as part1 from dummy +union all +select 2 as id1, '2014' as part1 from dummy ) temps; + +insert into table partunion1 partition(part1) +select temps.* from ( +select 1 as id1, '2014' as part1 from dummy +union all +select 2 as id1, '2014' as part1 from dummy ) temps; + +select * from partunion1; http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/explainuser_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out index 0340714..222e89e 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -3960,11 +3960,11 @@ Map 5 <- Union 2 (CONTAINS) Map 7 <- Map 6 (BROADCAST_EDGE) Map 8 <- Union 9 (CONTAINS) -Stage-7 +Stage-15 Stats-Aggr Operator - Stage-2 + Stage-1 Move Operator - table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Stage-4 Dependency Collection{} Stage-3 @@ -4344,18 +4344,63 @@ Stage-7 Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Please refer to the previous Select Operator [SEL_17] -Stage-6 +Stage-14 Stats-Aggr Operator - Stage-1 + Stage-0 Move Operator - table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Please refer to the previous Stage-4 -Stage-5 +Stage-13 Stats-Aggr Operator - Stage-0 + Stage-2 Move Operator - table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} + table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} Please refer to the previous Stage-4 +Stage-12 + Stats-Aggr Operator + Please refer to the previous Stage-1 +Stage-19 + Stats-Aggr Operator + Please refer to the previous Stage-2 +Stage-18 + Stats-Aggr Operator + Please refer to the previous Stage-1 +Stage-17 + Stats-Aggr Operator + Please refer to the previous Stage-0 +Stage-16 + Stats-Aggr Operator + Please refer to the previous Stage-2 +Stage-20 + Stats-Aggr Operator + Please refer to the previous Stage-0 +Stage-9 + Stats-Aggr Operator + Please refer to the previous Stage-1 +Stage-22 + Stats-Aggr Operator + Please refer to the previous Stage-2 +Stage-8 + Stats-Aggr Operator + Please refer to the previous Stage-0 +Stage-21 + Stats-Aggr Operator + Please refer to the previous Stage-1 +Stage-7 + Stats-Aggr Operator + Please refer to the previous Stage-2 +Stage-6 + Stats-Aggr Operator + Please refer to the previous Stage-1 +Stage-10 + Stats-Aggr Operator + Please refer to the previous Stage-2 +Stage-11 + Stats-Aggr Operator + Please refer to the previous Stage-0 +Stage-5 + Stats-Aggr Operator + Please refer to the previous Stage-0 PREHOOK: query: explain FROM ( http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out new file mode 100644 index 0000000..68a7531 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out @@ -0,0 +1,148 @@ +PREHOOK: query: create table dummy(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: create table dummy(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy +PREHOOK: query: insert into table dummy values (1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@dummy +POSTHOOK: query: insert into table dummy values (1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@dummy +POSTHOOK: Lineage: dummy.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from dummy +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +#### A masked pattern was here #### +POSTHOOK: query: select * from dummy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +#### A masked pattern was here #### +1 +PREHOOK: query: create table partunion1(id1 int) partitioned by (part1 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partunion1 +POSTHOOK: query: create table partunion1(id1 int) partitioned by (part1 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partunion1 +PREHOOK: query: explain insert into table partunion1 partition(part1) +select temps.* from ( +select 1 as id1, '2014' as part1 from dummy +union all +select 2 as id1, '2014' as part1 from dummy ) temps +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table partunion1 partition(part1) +select temps.* from ( +select 1 as id1, '2014' as part1 from dummy +union all +select 2 as id1, '2014' as part1 from dummy ) temps +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 3 <- Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dummy + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), '2014' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partunion1 + Map 3 + Map Operator Tree: + TableScan + alias: dummy + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 2 (type: int), '2014' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partunion1 + Union 2 + Vertex: Union 2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + part1 + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.partunion1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Stats-Aggr Operator + +PREHOOK: query: insert into table partunion1 partition(part1) +select temps.* from ( +select 1 as id1, '2014' as part1 from dummy +union all +select 2 as id1, '2014' as part1 from dummy ) temps +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Output: default@partunion1 +POSTHOOK: query: insert into table partunion1 partition(part1) +select temps.* from ( +select 1 as id1, '2014' as part1 from dummy +union all +select 2 as id1, '2014' as part1 from dummy ) temps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Output: default@partunion1@part1=2014 +POSTHOOK: Lineage: partunion1 PARTITION(part1=2014).id1 EXPRESSION [] +PREHOOK: query: select * from partunion1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partunion1 +PREHOOK: Input: default@partunion1@part1=2014 +#### A masked pattern was here #### +POSTHOOK: query: select * from partunion1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partunion1 +POSTHOOK: Input: default@partunion1@part1=2014 +#### A masked pattern was here #### +1 2014 +2 2014 http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/union4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/union4.q.out b/ql/src/test/results/clientpositive/tez/union4.q.out index 7cd6099..9d079ad 100644 --- a/ql/src/test/results/clientpositive/tez/union4.q.out +++ b/ql/src/test/results/clientpositive/tez/union4.q.out @@ -33,6 +33,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -139,6 +140,9 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/union6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/union6.q.out b/ql/src/test/results/clientpositive/tez/union6.q.out index 77f1d0e..4647278 100644 --- a/ql/src/test/results/clientpositive/tez/union6.q.out +++ b/ql/src/test/results/clientpositive/tez/union6.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -111,6 +112,9 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Stats-Aggr Operator + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL
