HIVE-10929: In Tez mode,dynamic partitioning query with union all fails at 
moveTask,Invalid partition key & values (Vikram Dixit K reviewed by Gunther 
Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4d592303
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4d592303
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4d592303

Branch: refs/heads/llap
Commit: 4d592303ad0f925c7d0815c003cedd45ab4f0064
Parents: d3863be
Author: Vaibhav Gumashta <[email protected]>
Authored: Sun Jun 7 11:38:10 2015 -0700
Committer: Vaibhav Gumashta <[email protected]>
Committed: Sun Jun 7 11:39:12 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |    1 +
 .../hadoop/hive/ql/parse/GenTezUtils.java       |    8 +
 .../tez_union_dynamic_partition.q               |   21 +
 .../clientpositive/tez/explainuser_2.q.out      |   63 +-
 .../tez/tez_union_dynamic_partition.q.out       |  148 +++
 .../results/clientpositive/tez/union4.q.out     |    4 +
 .../results/clientpositive/tez/union6.q.out     |    4 +
 .../tez/vector_leftsemi_mapjoin.q.out           | 1032 +++++++++---------
 .../tez/vector_multi_insert.q.out               |   20 +-
 .../clientpositive/tez/vector_outer_join1.q.out |   48 +-
 .../clientpositive/tez/vector_outer_join2.q.out |   22 +-
 .../clientpositive/tez/vector_outer_join3.q.out |   60 +-
 .../clientpositive/tez/vector_outer_join4.q.out |   48 +-
 13 files changed, 855 insertions(+), 624 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 47a1107..784b502 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -336,6 +336,7 @@ minitez.query.files=bucket_map_join_tez1.q,\
   tez_schema_evolution.q,\
   tez_union.q,\
   tez_union2.q,\
+  tez_union_dynamic_partition.q,\
   tez_union_view.q,\
   tez_union_decimal.q,\
   tez_union_group_by.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 0edfc5d..11c1df6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.FetchTask;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.UnionOperator;
@@ -238,6 +239,11 @@ public class GenTezUtils {
 
     Iterator<Operator<?>> it = newRoots.iterator();
     for (Operator<?> orig: roots) {
+      Set<FileSinkOperator> fsOpSet = OperatorUtils.findOperators(orig, 
FileSinkOperator.class);
+      for (FileSinkOperator fsOp : fsOpSet) {
+        context.fileSinkSet.remove(fsOp);
+      }
+
       Operator<?> newRoot = it.next();
 
       replacementMap.put(orig, newRoot);
@@ -301,6 +307,8 @@ public class GenTezUtils {
         linked.add(desc);
 
         desc.setDirName(new Path(path, ""+linked.size()));
+        desc.setLinkedFileSink(true);
+        desc.setParentDir(path);
         desc.setLinkedFileSinkDesc(linked);
       }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q 
b/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q
new file mode 100644
index 0000000..1c44a6c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez_union_dynamic_partition.q
@@ -0,0 +1,21 @@
+create table dummy(i int);
+insert into table dummy values (1);
+select * from dummy;
+
+create table partunion1(id1 int) partitioned by (part1 string);
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+explain insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy 
+union all 
+select 2 as id1, '2014' as part1 from dummy ) temps;
+
+insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy 
+union all 
+select 2 as id1, '2014' as part1 from dummy ) temps;
+
+select * from partunion1;

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out 
b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
index 0340714..222e89e 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out
@@ -3960,11 +3960,11 @@ Map 5 <- Union 2 (CONTAINS)
 Map 7 <- Map 6 (BROADCAST_EDGE)
 Map 8 <- Union 9 (CONTAINS)
 
-Stage-7
+Stage-15
    Stats-Aggr Operator
-      Stage-2
+      Stage-1
          Move Operator
-            
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input
 format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+            
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input
 format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
             Stage-4
                Dependency Collection{}
                   Stage-3
@@ -4344,18 +4344,63 @@ Stage-7
                            Statistics:Num rows: 1705 Data size: 18038 Basic 
stats: COMPLETE Column stats: NONE
                            
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input
 format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
                             Please refer to the previous Select Operator 
[SEL_17]
-Stage-6
+Stage-14
    Stats-Aggr Operator
-      Stage-1
+      Stage-0
          Move Operator
-            
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b","input
 format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+            
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input
 format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
              Please refer to the previous Stage-4
-Stage-5
+Stage-13
    Stats-Aggr Operator
-      Stage-0
+      Stage-2
          Move Operator
-            
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a","input
 format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
+            
table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c","input
 format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}
              Please refer to the previous Stage-4
+Stage-12
+   Stats-Aggr Operator
+       Please refer to the previous Stage-1
+Stage-19
+   Stats-Aggr Operator
+       Please refer to the previous Stage-2
+Stage-18
+   Stats-Aggr Operator
+       Please refer to the previous Stage-1
+Stage-17
+   Stats-Aggr Operator
+       Please refer to the previous Stage-0
+Stage-16
+   Stats-Aggr Operator
+       Please refer to the previous Stage-2
+Stage-20
+   Stats-Aggr Operator
+       Please refer to the previous Stage-0
+Stage-9
+   Stats-Aggr Operator
+       Please refer to the previous Stage-1
+Stage-22
+   Stats-Aggr Operator
+       Please refer to the previous Stage-2
+Stage-8
+   Stats-Aggr Operator
+       Please refer to the previous Stage-0
+Stage-21
+   Stats-Aggr Operator
+       Please refer to the previous Stage-1
+Stage-7
+   Stats-Aggr Operator
+       Please refer to the previous Stage-2
+Stage-6
+   Stats-Aggr Operator
+       Please refer to the previous Stage-1
+Stage-10
+   Stats-Aggr Operator
+       Please refer to the previous Stage-2
+Stage-11
+   Stats-Aggr Operator
+       Please refer to the previous Stage-0
+Stage-5
+   Stats-Aggr Operator
+       Please refer to the previous Stage-0
 PREHOOK: query: explain
 FROM
 ( 

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out 
b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
new file mode 100644
index 0000000..68a7531
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez_union_dynamic_partition.q.out
@@ -0,0 +1,148 @@
+PREHOOK: query: create table dummy(i int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dummy
+POSTHOOK: query: create table dummy(i int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dummy
+PREHOOK: query: insert into table dummy values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@dummy
+POSTHOOK: query: insert into table dummy values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@dummy
+POSTHOOK: Lineage: dummy.i EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: select * from dummy
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dummy
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dummy
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dummy
+#### A masked pattern was here ####
+1
+PREHOOK: query: create table partunion1(id1 int) partitioned by (part1 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@partunion1
+POSTHOOK: query: create table partunion1(id1 int) partitioned by (part1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@partunion1
+PREHOOK: query: explain insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy 
+union all 
+select 2 as id1, '2014' as part1 from dummy ) temps
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy 
+union all 
+select 2 as id1, '2014' as part1 from dummy ) temps
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Union 2 (CONTAINS)
+        Map 3 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dummy
+                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: 1 (type: int), '2014' (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 92 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 184 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.partunion1
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: dummy
+                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: 2 (type: int), '2014' (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 92 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 184 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.partunion1
+        Union 2 
+            Vertex: Union 2
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            part1 
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.partunion1
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+  Stage: Stage-4
+    Stats-Aggr Operator
+
+PREHOOK: query: insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy 
+union all 
+select 2 as id1, '2014' as part1 from dummy ) temps
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dummy
+PREHOOK: Output: default@partunion1
+POSTHOOK: query: insert into table partunion1 partition(part1)
+select temps.* from (
+select 1 as id1, '2014' as part1 from dummy 
+union all 
+select 2 as id1, '2014' as part1 from dummy ) temps
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dummy
+POSTHOOK: Output: default@partunion1@part1=2014
+POSTHOOK: Lineage: partunion1 PARTITION(part1=2014).id1 EXPRESSION []
+PREHOOK: query: select * from partunion1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partunion1
+PREHOOK: Input: default@partunion1@part1=2014
+#### A masked pattern was here ####
+POSTHOOK: query: select * from partunion1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partunion1
+POSTHOOK: Input: default@partunion1@part1=2014
+#### A masked pattern was here ####
+1      2014
+2      2014

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/union4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union4.q.out 
b/ql/src/test/results/clientpositive/tez/union4.q.out
index 7cd6099..9d079ad 100644
--- a/ql/src/test/results/clientpositive/tez/union4.q.out
+++ b/ql/src/test/results/clientpositive/tez/union4.q.out
@@ -33,6 +33,7 @@ STAGE DEPENDENCIES:
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
   Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -139,6 +140,9 @@ STAGE PLANS:
   Stage: Stage-3
     Stats-Aggr Operator
 
+  Stage: Stage-4
+    Stats-Aggr Operator
+
 PREHOOK: query: insert overwrite table tmptable
 select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as 
value from src s1
                                         UNION  ALL  

http://git-wip-us.apache.org/repos/asf/hive/blob/4d592303/ql/src/test/results/clientpositive/tez/union6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union6.q.out 
b/ql/src/test/results/clientpositive/tez/union6.q.out
index 77f1d0e..4647278 100644
--- a/ql/src/test/results/clientpositive/tez/union6.q.out
+++ b/ql/src/test/results/clientpositive/tez/union6.q.out
@@ -31,6 +31,7 @@ STAGE DEPENDENCIES:
   Stage-2 depends on stages: Stage-1
   Stage-0 depends on stages: Stage-2
   Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-1
@@ -111,6 +112,9 @@ STAGE PLANS:
   Stage: Stage-3
     Stats-Aggr Operator
 
+  Stage: Stage-4
+    Stats-Aggr Operator
+
 PREHOOK: query: insert overwrite table tmptable
 select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) 
as string) as value from src s1
                                       UNION  ALL  

Reply via email to