Repository: hive Updated Branches: refs/heads/master 0d36e8247 -> 77f30d4f9
HIVE-11714: Turn off hybrid grace hash join for cross product join (Wei Zheng via Gunther Hagleitner) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/77f30d4f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/77f30d4f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/77f30d4f Branch: refs/heads/master Commit: 77f30d4f98a7d0b7729747136ed0e0926dd56e6b Parents: 0d36e82 Author: Gunther Hagleitner <[email protected]> Authored: Fri Oct 2 12:51:40 2015 -0700 Committer: Gunther Hagleitner <[email protected]> Committed: Fri Oct 2 12:55:47 2015 -0700 ---------------------------------------------------------------------- .../hive/ql/exec/tez/HashTableLoader.java | 25 ++- .../hive/ql/optimizer/ConvertJoinMapJoin.java | 4 + ql/src/test/queries/clientpositive/cross_join.q | 8 + .../results/clientpositive/cross_join.q.out | 196 +++++++++++++++++ .../clientpositive/spark/cross_join.q.out | 211 +++++++++++++++++++ .../results/clientpositive/tez/auto_join0.q.out | 1 - .../tez/auto_sortmerge_join_12.q.out | 1 - .../results/clientpositive/tez/cross_join.q.out | 187 ++++++++++++++++ .../tez/cross_product_check_2.q.out | 6 - .../tez/dynamic_partition_pruning.q.out | 1 - .../vectorized_dynamic_partition_pruning.q.out | 1 - 11 files changed, 626 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java index f7d165a..8a3647c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.lang.management.ManagementFactory; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; @@ -38,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -77,6 +79,12 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable Map<Integer, String> parentToInput = desc.getParentToInput(); Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts(); + boolean isCrossProduct = false; + List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next(); + if (joinExprs.size() == 0) { + isCrossProduct = true; + } + boolean useOptimizedTables = HiveConf.getBoolVar( hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); boolean useHybridGraceHashJoin = desc.isHybridHashJoin(); @@ -184,11 +192,18 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable } } - MapJoinTableContainer tableContainer = useOptimizedTables - ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount, - memory, desc.getParentDataSizes().get(pos), nwayConf) - : new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0)) - : new HashMapWrapper(hconf, keyCount); + MapJoinTableContainer tableContainer; + if (useOptimizedTables) { + if (!useHybridGraceHashJoin || isCrossProduct) { + tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0); + } else { + tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, + desc.getParentDataSizes().get(pos), nwayConf); + } + } else { + tableContainer = new HashMapWrapper(hconf, keyCount); + } + LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName()); while (kvReader.next()) { http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index c357329..024849e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -634,6 +634,10 @@ public class ConvertJoinMapJoin implements NodeProcessor { joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink); mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)); + List<ExprNodeDesc> joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next(); + if (joinExprs.size() == 0) { // In case of cross join, we disable hybrid grace hash join + mapJoinOp.getConf().setHybridHashJoin(false); + } Operator<? extends OperatorDesc> parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition); http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/test/queries/clientpositive/cross_join.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cross_join.q b/ql/src/test/queries/clientpositive/cross_join.q index 8eb949e..d30df50 100644 --- a/ql/src/test/queries/clientpositive/cross_join.q +++ b/ql/src/test/queries/clientpositive/cross_join.q @@ -5,3 +5,11 @@ explain select src.key from src join src src2; explain select src.key from src cross join src src2; -- appending condition is allowed explain select src.key from src cross join src src2 on src.key=src2.key; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.mapjoin.hybridgrace.hashtable=true; + +explain select src.key from src join src src2; +explain select src.key from src cross join src src2; +explain select src.key from src cross join src src2 on src.key=src2.key; http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/test/results/clientpositive/cross_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cross_join.q.out b/ql/src/test/results/clientpositive/cross_join.q.out index a6dcd28..c543007 100644 --- a/ql/src/test/results/clientpositive/cross_join.q.out +++ b/ql/src/test/results/clientpositive/cross_join.q.out @@ -179,3 +179,199 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain select src.key from src join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain select src.key from src cross join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src cross join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/test/results/clientpositive/spark/cross_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/cross_join.q.out b/ql/src/test/results/clientpositive/spark/cross_join.q.out index 5322d39..af49a79 100644 --- a/ql/src/test/results/clientpositive/spark/cross_join.q.out +++ b/ql/src/test/results/clientpositive/spark/cross_join.q.out @@ -203,3 +203,214 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain select src.key from src join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Spark HashTable Sink Operator + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain select src.key from src cross join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src cross join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Spark HashTable Sink Operator + keys: + 0 + 1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/test/results/clientpositive/tez/auto_join0.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/auto_join0.q.out b/ql/src/test/results/clientpositive/tez/auto_join0.q.out index beaac17..1564b1c 100644 --- a/ql/src/test/results/clientpositive/tez/auto_join0.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_join0.q.out @@ -87,7 +87,6 @@ STAGE PLANS: input vertices: 1 Reducer 6 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out index e90af15..b83aa27 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out @@ -402,7 +402,6 @@ STAGE PLANS: 1 Map 5 Position of Big Table: 0 Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/test/results/clientpositive/tez/cross_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/cross_join.q.out b/ql/src/test/results/clientpositive/tez/cross_join.q.out index 6d10c1e..431aa63 100644 --- a/ql/src/test/results/clientpositive/tez/cross_join.q.out +++ b/ql/src/test/results/clientpositive/tez/cross_join.q.out @@ -203,3 +203,190 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain select src.key from src join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain select src.key from src cross join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src cross join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out b/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out index 14dd820..f30caa1 100644 --- a/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out +++ b/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out @@ -63,7 +63,6 @@ STAGE PLANS: input vertices: 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true File Output Operator compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE @@ -160,7 +159,6 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -260,7 +258,6 @@ STAGE PLANS: input vertices: 1 Reducer 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -324,7 +321,6 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 11 Data size: 105 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true Group By Operator keys: _col0 (type: string) mode: hash @@ -358,7 +354,6 @@ STAGE PLANS: input vertices: 1 Reducer 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -488,7 +483,6 @@ STAGE PLANS: input vertices: 1 Reducer 4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true File Output Operator compressed: false Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out index e11f3e5..bbc2e16 100644 --- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out @@ -4162,7 +4162,6 @@ STAGE PLANS: input vertices: 1 Reducer 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/77f30d4f/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index f0ddc5b..c779368 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -4215,7 +4215,6 @@ STAGE PLANS: input vertices: 1 Reducer 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash
