Repository: hive Updated Branches: refs/heads/master 156ed46a3 -> dd5121c7b
HIVE-14557: Nullpointer When both SkewJoin and Mapjoin Enabled (Nemon Lou reviewed by Yongzhi and Rui) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dd5121c7 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dd5121c7 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dd5121c7 Branch: refs/heads/master Commit: dd5121c7be9bac03e12dbd2db7e3b863cefe851e Parents: 156ed46 Author: Rui Li <li...@apache.org> Authored: Fri Nov 16 19:20:46 2018 +0800 Committer: Rui Li <li...@apache.org> Committed: Fri Nov 16 19:20:46 2018 +0800 ---------------------------------------------------------------------- .../optimizer/physical/PhysicalOptimizer.java | 6 +- .../runtime_skewjoin_mapjoin_spark.q.out | 529 +++---------------- 2 files changed, 77 insertions(+), 458 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/dd5121c7/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java index d508d02..6c6908a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java @@ -46,9 +46,6 @@ public class PhysicalOptimizer { */ private void initialize(HiveConf hiveConf) { resolvers = new ArrayList<PhysicalPlanResolver>(); - if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { - resolvers.add(new SkewJoinResolver()); - } if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) { resolvers.add(new CommonJoinResolver()); @@ -59,6 +56,9 @@ public class PhysicalOptimizer { resolvers.add(new SortMergeJoinResolver()); } } + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { + resolvers.add(new SkewJoinResolver()); + } resolvers.add(new MapJoinResolver()); if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { http://git-wip-us.apache.org/repos/asf/hive/blob/dd5121c7/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index ee8dbc2..3ecf14f 100644 --- a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -35,39 +35,80 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-18 is a root stage , consists of Stage-22, Stage-23, Stage-24, Stage-1 - Stage-22 has a backup stage: Stage-1 - Stage-15 depends on stages: Stage-22 - Stage-8 depends on stages: Stage-1, Stage-11, Stage-12, Stage-13, Stage-15, Stage-16, Stage-17 , consists of Stage-20, Stage-21, Stage-2 - Stage-20 - Stage-6 depends on stages: Stage-20 - Stage-2 depends on stages: Stage-6, Stage-7 - Stage-21 - Stage-7 depends on stages: Stage-21 - Stage-23 has a backup stage: Stage-1 - Stage-16 depends on stages: Stage-23 - Stage-24 has a backup stage: Stage-1 - Stage-17 depends on stages: Stage-24 + Stage-14 is a root stage + Stage-10 depends on stages: Stage-14 + Stage-9 depends on stages: Stage-10 , consists of Stage-11, Stage-12, Stage-13, Stage-1 + Stage-11 has a backup stage: Stage-1 + Stage-6 depends on stages: Stage-11 + Stage-2 depends on stages: Stage-1, Stage-6, Stage-7, Stage-8 + Stage-12 has a backup stage: Stage-1 + Stage-7 depends on stages: Stage-12 + Stage-13 has a backup stage: Stage-1 + Stage-8 depends on stages: Stage-13 Stage-1 - Stage-29 is a root stage - Stage-19 depends on stages: Stage-29 - Stage-10 depends on stages: Stage-19 , consists of Stage-28, Stage-1 - Stage-28 - Stage-9 depends on stages: Stage-28 - Stage-14 depends on stages: Stage-9 , consists of Stage-25, Stage-26, Stage-27, Stage-1 - Stage-25 has a backup stage: Stage-1 - Stage-11 depends on stages: Stage-25 - Stage-26 has a backup stage: Stage-1 - Stage-12 depends on stages: Stage-26 - Stage-27 has a backup stage: Stage-1 - Stage-13 depends on stages: Stage-27 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-18 + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_2:$hdt$_3:t1_n94 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_2:$hdt$_3:t1_n94 + TableScan + alias: t1_n94 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: src + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-9 Conditional Operator - Stage: Stage-22 + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -102,7 +143,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) - Stage: Stage-15 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -140,61 +181,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-8 - Conditional Operator - - Stage: Stage-20 - Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - 2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - 2 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -218,59 +204,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-21 - Map Reduce Local Work - Alias -> Map Local Tables: - 0 - Fetch Operator - limit: -1 - 2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 0 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - 2 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-23 + Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -305,7 +239,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) - Stage: Stage-16 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -343,7 +277,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-24 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -388,7 +322,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) - Stage: Stage-17 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -462,7 +396,6 @@ STAGE PLANS: condition map: Inner Join 0 to 1 Inner Join 0 to 2 - handleSkewJoin: true keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -480,321 +413,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-29 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_2:$hdt$_3:t1_n94 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_3:t1_n94 - TableScan - alias: t1_n94 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-19 - Map Reduce - Map Operator Tree: - TableScan - alias: src - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-10 - Conditional Operator - - Stage: Stage-28 - Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-14 - Conditional Operator - - Stage: Stage-25 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - $hdt$_1:src2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - $hdt$_1:src2 - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-26 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - $hdt$_0:src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - $hdt$_0:src1 - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-27 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:src1 - Fetch Operator - limit: -1 - $hdt$_1:src2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:src1 - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - $hdt$_1:src2 - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 @@ -819,3 +437,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### +3