HIVE-18201 : Disable XPROD_EDGE for sq_count_check() created for scalar subqueries (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4d436953 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4d436953 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4d436953 Branch: refs/heads/master Commit: 4d436953e6be1302a0867aa16a8c5ecd2804eed7 Parents: e9e1f8f Author: Ashutosh Chauhan <[email protected]> Authored: Tue Dec 12 15:15:00 2017 -0800 Committer: Ashutosh Chauhan <[email protected]> Committed: Tue Jul 31 21:36:09 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../hive/ql/optimizer/ConvertJoinMapJoin.java | 25 +- .../test/queries/clientpositive/perf/query6.q | 2 + .../queries/clientpositive/subquery_in_having.q | 4 +- .../clientpositive/llap/auto_join_filters.q.out | 4 +- .../clientpositive/llap/auto_join_nulls.q.out | 2 +- .../results/clientpositive/llap/mapjoin2.q.out | 2 +- .../clientpositive/llap/mapjoin_hint.q.out | 62 ++- .../llap/subquery_in_having.q.out | 427 ++++++++----------- .../llap/tez_fixed_bucket_pruning.q.out | 252 +++++------ .../llap/vector_complex_all.q.out | 94 ++-- .../llap/vector_groupby_mapjoin.q.out | 113 ++--- .../llap/vector_join_filters.q.out | 2 +- .../llap/vectorized_multi_output_select.q.out | 58 ++- .../clientpositive/perf/tez/query6.q.out | 189 ++++---- 15 files changed, 592 insertions(+), 646 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index cce908f..093b4a7 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2027,6 +2027,8 @@ public class HiveConf extends Configuration { "However, if it is on, and the predicted number of entries in hashtable for a given join \n" + "input is larger than this number, the join will not be converted to a mapjoin. \n" + "The value \"-1\" means no limit."), + XPRODSMALLTABLEROWSTHRESHOLD("hive.xprod.mapjoin.small.table.rows", 1,"Maximum number of rows on build side" + + " of map join before it switches over to cross product edge"), HIVECONVERTJOINMAXSHUFFLESIZE("hive.auto.convert.join.shuffle.max.size", 10000000000L, "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" + "However, if it is on, and the predicted size of the larger input for a given join is greater \n" + http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 011dadf..4145baf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -113,14 +113,6 @@ public class ConvertJoinMapJoin implements NodeProcessor { MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(maxSize, context.conf, llapInfo); joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo); - // not use map join in case of cross product - boolean cartesianProductEdgeEnabled = - HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); - if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) { - fallbackToMergeJoin(joinOp, context); - return null; - } - TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf); boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin(); @@ -988,6 +980,23 @@ public class ConvertJoinMapJoin implements NodeProcessor { return -1; } + // only allow cross product in map joins if build side is 'small' + boolean cartesianProductEdgeEnabled = + HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); + if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) { + for (int i = 0 ; i < joinOp.getParentOperators().size(); i ++) { + if (i != bigTablePosition) { + Statistics parentStats = joinOp.getParentOperators().get(i).getStatistics(); + if (parentStats.getNumRows() > + HiveConf.getIntVar(context.conf, HiveConf.ConfVars.XPRODSMALLTABLEROWSTHRESHOLD)) { + // if any of smaller side is estimated to generate more than + // threshold rows we would disable mapjoin + return -1; + } + } + } + } + // We store the total memory that this MapJoin is going to use, // which is calculated as totalSize/buckets, with totalSize // equal to sum of small tables size. http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/queries/clientpositive/perf/query6.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/query6.q b/ql/src/test/queries/clientpositive/perf/query6.q index d45045d..aabce52 100644 --- a/ql/src/test/queries/clientpositive/perf/query6.q +++ b/ql/src/test/queries/clientpositive/perf/query6.q @@ -1,3 +1,5 @@ +set hive.auto.convert.join=true; +set hive.tez.cartesian-product.enabled=true; set hive.mapred.mode=nonstrict; -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 explain http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/queries/clientpositive/subquery_in_having.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q b/ql/src/test/queries/clientpositive/subquery_in_having.q index ec6981b..8b6d1a7 100644 --- a/ql/src/test/queries/clientpositive/subquery_in_having.q +++ b/ql/src/test/queries/clientpositive/subquery_in_having.q @@ -1,5 +1,6 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; +set hive.optimize.shared.work.extended=false; -- SORT_QUERY_RESULTS -- data setup @@ -154,4 +155,5 @@ group by key, value having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' and s1.value <> b.value group by s1.key ); DROP TABLE src_null_n4; -DROP TABLE part_subq; \ No newline at end of file +DROP TABLE part_subq; +reset hive.optimize.shared.work.extended; http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out b/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out index 7a271fc..a639792 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1_n5 -Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 @@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in/000001_0' into tabl POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_input2_n0 -Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out index c7bb127..194fc5d 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1_n2 -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a JOIN myinput1_n2 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n2 http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/mapjoin2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out index 4638fce..872f918 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out @@ -57,7 +57,7 @@ POSTHOOK: Input: default@tbl_n1 #### A masked pattern was here #### false false true true true true false false -Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out index 5cccce9..3c6270a 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out @@ -527,7 +527,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null) @@ -541,8 +541,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -558,13 +558,33 @@ STAGE PLANS: expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + input vertices: + 1 Reducer 3 + 2 Map 2 + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: part_null_n1 @@ -589,31 +609,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 - 1 - 2 - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out index 20428e1..af8e23a 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out @@ -1570,10 +1570,9 @@ POSTHOOK: Output: default@src_null_n4 POSTHOOK: Lineage: src_null_n4.key SCRIPT [] POSTHOOK: Lineage: src_null_n4.value EXPRESSION [] Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[134][bigTable=?] in task 'Map 1' is a cross product -Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[135][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product +Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product +Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product PREHOOK: query: explain select key, value, count(*) from src_null_n4 b @@ -1597,18 +1596,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE) - Reducer 11 <- Map 10 (SIMPLE_EDGE) - Reducer 12 <- Map 10 (SIMPLE_EDGE) - Reducer 13 <- Map 10 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) - Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Map 1 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Map 1 <- Reducer 4 (BROADCAST_EDGE) + Map 6 <- Reducer 5 (BROADCAST_EDGE) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Map 3 (SIMPLE_EDGE) + Reducer 7 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) - Reducer 9 <- Map 1 (SIMPLE_EDGE) + Reducer 9 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1628,7 +1625,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Reducer 11 + 1 Reducer 4 residual filter predicates: {(_col2 <> _col1)} Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1661,38 +1658,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Reducer 12 - residual filter predicates: {(_col2 <> _col1)} - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE @@ -1704,6 +1669,39 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: src_null_n4 + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1716,7 +1714,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Reducer 13 + 1 Reducer 5 residual filter predicates: {(_col2 <> _col1)} Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1736,87 +1734,61 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 10 - Map Operator Tree: - TableScan - alias: src_null_n4 - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 11 + Reducer 10 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: boolean) - Reducer 12 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: boolean) - Reducer 13 + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reducer 11 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: boolean) + key expressions: _col1 (type: string), _col0 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1838,7 +1810,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5 input vertices: - 1 Reducer 4 + 1 Reducer 8 Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -1848,7 +1820,7 @@ STAGE PLANS: 1 _col1 (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 input vertices: - 1 Reducer 8 + 1 Reducer 11 Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) @@ -1864,63 +1836,42 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - residual filter predicates: {(_col1 <> _col2)} - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col2) - keys: _col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: boolean) Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: boolean) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 @@ -1934,39 +1885,34 @@ STAGE PLANS: mode: complete outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - residual filter predicates: {(_col1 <> _col2)} - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + residual filter predicates: {(_col1 <> _col2)} Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1977,45 +1923,22 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col2 + outputColumnNames: _col1, _col2 Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) + Group By Operator + aggregations: count(), count(_col2) + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col2 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 9 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -2031,10 +1954,33 @@ STAGE PLANS: mode: complete outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2043,10 +1989,9 @@ STAGE PLANS: ListSink Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[134][bigTable=?] in task 'Map 1' is a cross product -Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[135][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product +Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product +Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product PREHOOK: query: select key, value, count(*) from src_null_n4 b where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> b.value) http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index 2c38d8c..98b2013 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -424,7 +424,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@l3_monthly_dw_dimplan POSTHOOK: Output: default@l3_monthly_dw_dimplan #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN EXTENDED SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join @@ -478,8 +478,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -497,13 +497,59 @@ STAGE PLANS: expressions: plan_detail_object_id (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 3 + Position of Big Table: 0 Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col0 (type: bigint) - auto parallelism: false + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Estimated key counts: Map 4 => 90170 + keys: + 0 _col2 (type: bigint), _col0 (type: bigint) + 1 _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col2, _col5 + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint), _col5 (type: bigint) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Estimated key counts: Map 5 => 1 + keys: + 0 _col0 (type: bigint) + 1 _col1 (type: bigint) + outputColumnNames: _col5, _col7 + input vertices: + 1 Map 5 + Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: bigint), _col7 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + null sort order: aa + sort order: ++ + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + TopN: 5 + TopN Hash Memory Usage: 0.1 + auto parallelism: false Execution mode: vectorized, llap LLAP IO: all inputs Path -> Alias: @@ -559,7 +605,7 @@ STAGE PLANS: name: default.l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 Truncated Path -> Alias: /l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 [dw] - Map 4 + Map 3 Map Operator Tree: TableScan alias: snap @@ -631,7 +677,7 @@ STAGE PLANS: name: default.l3_clarity__l3_snap_number_2018022300104 Truncated Path -> Alias: /l3_clarity__l3_snap_number_2018022300104 [snap] - Map 5 + Map 4 Map Operator Tree: TableScan alias: s1 @@ -712,7 +758,7 @@ STAGE PLANS: name: default.l3_monthly_dw_dimplan Truncated Path -> Alias: /l3_monthly_dw_dimplan [s1] - Map 6 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -792,60 +838,6 @@ STAGE PLANS: Truncated Path -> Alias: /l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 [s2] Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col2 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Estimated key counts: Map 5 => 90170 - keys: - 0 _col2 (type: bigint), _col0 (type: bigint) - 1 _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col2, _col5 - input vertices: - 1 Map 5 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: bigint), _col5 (type: bigint) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Estimated key counts: Map 6 => 1 - keys: - 0 _col0 (type: bigint) - 1 _col1 (type: bigint) - outputColumnNames: _col5, _col7 - input vertices: - 1 Map 6 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: bigint), _col7 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: bigint) - null sort order: aa - sort order: ++ - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 - TopN Hash Memory Usage: 0.1 - auto parallelism: false - Reducer 3 Execution mode: vectorized, llap Needs Tagging: false Reduce Operator Tree: @@ -889,7 +881,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1 @@ -931,7 +923,7 @@ POSTHOOK: Input: default@l3_monthly_dw_dimplan 7147200 NULL 27114 7147200 NULL 27114 7147200 NULL 27114 -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN EXTENDED SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join @@ -985,8 +977,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1004,13 +996,59 @@ STAGE PLANS: expressions: plan_detail_object_id (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 3 + Position of Big Table: 0 Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col0 (type: bigint) - auto parallelism: false + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Estimated key counts: Map 4 => 90170 + keys: + 0 _col2 (type: bigint), _col0 (type: bigint) + 1 _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col2, _col5 + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint), _col5 (type: bigint) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Estimated key counts: Map 5 => 1 + keys: + 0 _col0 (type: bigint) + 1 _col1 (type: bigint) + outputColumnNames: _col5, _col7 + input vertices: + 1 Map 5 + Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: bigint), _col7 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + null sort order: aa + sort order: ++ + Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + TopN: 5 + TopN Hash Memory Usage: 0.1 + auto parallelism: false Execution mode: vectorized, llap LLAP IO: all inputs Path -> Alias: @@ -1066,7 +1104,7 @@ STAGE PLANS: name: default.l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 Truncated Path -> Alias: /l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 [dw] - Map 4 + Map 3 Map Operator Tree: TableScan alias: snap @@ -1138,7 +1176,7 @@ STAGE PLANS: name: default.l3_clarity__l3_snap_number_2018022300104 Truncated Path -> Alias: /l3_clarity__l3_snap_number_2018022300104 [snap] - Map 5 + Map 4 Map Operator Tree: TableScan alias: s1 @@ -1220,7 +1258,7 @@ STAGE PLANS: name: default.l3_monthly_dw_dimplan Truncated Path -> Alias: /l3_monthly_dw_dimplan [s1] - Map 6 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -1300,60 +1338,6 @@ STAGE PLANS: Truncated Path -> Alias: /l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 [s2] Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col2 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Estimated key counts: Map 5 => 90170 - keys: - 0 _col2 (type: bigint), _col0 (type: bigint) - 1 _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col2, _col5 - input vertices: - 1 Map 5 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: bigint), _col5 (type: bigint) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Estimated key counts: Map 6 => 1 - keys: - 0 _col0 (type: bigint) - 1 _col1 (type: bigint) - outputColumnNames: _col5, _col7 - input vertices: - 1 Map 6 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col5 (type: bigint), _col7 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: bigint) - null sort order: aa - sort order: ++ - Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 - TopN Hash Memory Usage: 0.1 - auto parallelism: false - Reducer 3 Execution mode: vectorized, llap Needs Tagging: false Reduce Operator Tree: @@ -1397,7 +1381,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1 http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index d5ea64f..4e1698d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -642,7 +642,7 @@ b str two line1 four line2 six line3 -Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL INSERT INTO TABLE orc_create_complex_n0 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2 @@ -667,7 +667,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (XPROD_EDGE) + Map 4 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -712,7 +712,7 @@ STAGE PLANS: dataColumns: str:string, mp:map<string,string>, lst:array<string>, strct:struct<a:string,b:string> partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Map 2 Map Operator Tree: TableScan alias: spam2 @@ -751,7 +751,7 @@ STAGE PLANS: dataColumns: str:string, mp:map<string,string>, lst:array<string>, strct:struct<a:string,b:string> partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: spam1 @@ -790,7 +790,7 @@ STAGE PLANS: dataColumns: str:string, mp:map<string,string>, lst:array<string>, strct:struct<a:string,b:string> partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: src1 @@ -806,16 +806,47 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + keys: + 0 + 1 + 2 + 3 + Map Join Vectorization: + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col6 + input vertices: + 0 Map 1 + 1 Map 2 + 2 Map 3 + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: map<string,string>), _col2 (type: array<string>), _col3 (type: struct<a:string,b:string>), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex_n0 Execution mode: vectorized, llap Map Vectorization: enabled: true @@ -823,7 +854,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -831,34 +862,7 @@ STAGE PLANS: includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - keys: - 0 - 1 - 2 - 3 - outputColumnNames: _col0, _col1, _col2, _col3, _col6 - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: map<string,string>), _col2 (type: array<string>), _col3 (type: struct<a:string,b:string>), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orc_create_complex_n0 + scratchColumnTypeNames: [string, map<string,string>, array<string>, struct<a:string,b:string>] Stage: Stage-2 Dependency Collection @@ -877,7 +881,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product PREHOOK: query: INSERT INTO TABLE orc_create_complex_n0 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2 PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/4d436953/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index 10abe77..6443678 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain vectorization expression select * from src @@ -26,10 +26,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 6 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -47,14 +47,58 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Map Join Vectorization: + className: VectorMapJoinInnerMultiKeyOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 3:bigint, val 0), FilterExprAndExpr(children: SelectColumnIsNull(col 5:boolean), SelectColumnIsNotNull(col 0:string), FilterLongColGreaterEqualLongColumn(col 4:bigint, col 3:bigint))) + predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) + Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -66,7 +110,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -134,39 +178,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col5 - input vertices: - 1 Reducer 6 - Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -193,7 +204,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -222,7 +233,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -269,7 +280,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * from src where not key in @@ -298,7 +309,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * from orcsrc where not key in @@ -315,7 +326,7 @@ order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * from orcsrc where not key in
